summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJehan <jehan@girinstud.io>2021-03-17 02:07:17 +0100
committerJehan <jehan@girinstud.io>2022-12-14 00:23:13 +0100
commiteb8308d50a09821cb0b90aa0bca5a068d231d873 (patch)
tree8b77e5afbadcba9bf62d5ea637f1566f644430bb
parent5257fc1abf3171dad97a429830f56ff9971e91ff (diff)
src, script: regenerate all existing language models.
Now making sure that we have a generic language model working with UTF-8 for all 26 supported models which had single-byte encoding support until now.
-rw-r--r--script/BuildLangModelLogs/LangCroatianModel.log264
-rw-r--r--script/BuildLangModelLogs/LangCzechModel.log283
-rw-r--r--script/BuildLangModelLogs/LangEsperantoModel.log237
-rw-r--r--script/BuildLangModelLogs/LangEstonianModel.log261
-rw-r--r--script/BuildLangModelLogs/LangFinnishModel.log279
-rw-r--r--script/BuildLangModelLogs/LangGreekModel.log416
-rw-r--r--script/BuildLangModelLogs/LangHungarianModel.log234
-rw-r--r--script/BuildLangModelLogs/LangIrishModel.log252
-rw-r--r--script/BuildLangModelLogs/LangLatvianModel.log287
-rw-r--r--script/BuildLangModelLogs/LangLithuanianModel.log288
-rw-r--r--script/BuildLangModelLogs/LangMalteseModel.log240
-rw-r--r--script/BuildLangModelLogs/LangPolishModel.log281
-rw-r--r--script/BuildLangModelLogs/LangPortugueseModel.log286
-rw-r--r--script/BuildLangModelLogs/LangRomanianModel.log228
-rw-r--r--script/BuildLangModelLogs/LangSlovakModel.log252
-rw-r--r--script/BuildLangModelLogs/LangSloveneModel.log238
-rw-r--r--script/BuildLangModelLogs/LangSwedishModel.log267
-rw-r--r--script/BuildLangModelLogs/LangThaiModel.log301
-rw-r--r--script/BuildLangModelLogs/LangTurkishModel.log244
-rw-r--r--script/BuildLangModelLogs/LangVietnameseModel.log270
-rw-r--r--src/LangModels/LangCroatianModel.cpp276
-rw-r--r--src/LangModels/LangCzechModel.cpp317
-rw-r--r--src/LangModels/LangEsperantoModel.cpp125
-rw-r--r--src/LangModels/LangEstonianModel.cpp246
-rw-r--r--src/LangModels/LangFinnishModel.cpp236
-rw-r--r--src/LangModels/LangGreekModel.cpp286
-rw-r--r--src/LangModels/LangHungarianModel.cpp146
-rw-r--r--src/LangModels/LangIrishModel.cpp202
-rw-r--r--src/LangModels/LangLatvianModel.cpp185
-rw-r--r--src/LangModels/LangLithuanianModel.cpp202
-rw-r--r--src/LangModels/LangMalteseModel.cpp110
-rw-r--r--src/LangModels/LangPolishModel.cpp317
-rw-r--r--src/LangModels/LangPortugueseModel.cpp222
-rw-r--r--src/LangModels/LangRomanianModel.cpp193
-rw-r--r--src/LangModels/LangSlovakModel.cpp356
-rw-r--r--src/LangModels/LangSloveneModel.cpp220
-rw-r--r--src/LangModels/LangSwedishModel.cpp257
-rw-r--r--src/LangModels/LangThaiModel.cpp320
-rw-r--r--src/LangModels/LangTurkishModel.cpp150
-rw-r--r--src/LangModels/LangVietnameseModel.cpp304
-rw-r--r--src/nsLanguageDetector.h20
-rw-r--r--src/nsMBCSGroupProber.cpp34
-rw-r--r--src/nsMBCSGroupProber.h2
43 files changed, 5426 insertions, 4708 deletions
diff --git a/script/BuildLangModelLogs/LangCroatianModel.log b/script/BuildLangModelLogs/LangCroatianModel.log
index a79f123..542a251 100644
--- a/script/BuildLangModelLogs/LangCroatianModel.log
+++ b/script/BuildLangModelLogs/LangCroatianModel.log
@@ -1,157 +1,157 @@
= Logs of language model for Croatian (hr) =
- Generated by BuildLangModel.py
-- Started: 2016-09-25 23:41:35.999066
-- Maximum depth: 5
+- Started: 2021-03-16 19:09:36.740256
+- Maximum depth: 4
- Max number of pages: 100
== Parsed pages ==
-Fizika čvrstog stanja (revision 4776646)
-Agregatno stanje (revision 4663090)
+Fizika čvrstog stanja (revision 5777686)
+Agregatno stanje (revision 5764830)
Alnico (revision 3915185)
-Aluminij (revision 4772363)
-Amorfna tvar (revision 4659679)
-Antimon (revision 4420072)
-Antoine Henri Becquerel (revision 4634966)
-Apsolutna nula (revision 4706683)
-Arsen (revision 4540773)
-Arthur Holly Compton (revision 4736068)
-Atom (revision 4778162)
-Atomska jezgra (revision 4540956)
+Aluminij (revision 5755266)
+Amorfna tvar (revision 5392804)
+Antimon (revision 5435171)
+Antoine Henri Becquerel (revision 5556977)
+Apsolutna nula (revision 5482633)
+Arsen (revision 5752189)
+Arthur Holly Compton (revision 5313150)
+Atom (revision 5730600)
+Atomska jezgra (revision 5731544)
Bell Labs (revision 4769518)
-Bor (element) (revision 4602837)
-Brian Josephson (revision 4403761)
-Cink (revision 4537854)
-Coulombov zakon (revision 4710338)
-Dijamant (revision 4625335)
-Dimenzija (revision 4669110)
-Dinastija Han (revision 4541686)
-Dislokacija (revision 4668021)
-EV (revision 4538157)
-Eksponencijalna funkcija (revision 4160157)
-Električna struja (revision 4280621)
-Električna vodljivost (revision 4460160)
-Električni izolator (revision 4649046)
-Električni luk (revision 4646980)
-Električni naboj (revision 4727496)
-Električni otpor (revision 4593314)
-Električni vodič (revision 4333008)
-Električno polje (revision 4705679)
-Elektrolit (revision 4486319)
-Elektromagnetsko zračenje (revision 4537368)
-Elektron (revision 4630705)
-Elektronika (revision 4090016)
-Elektronska konfiguracija (revision 4420620)
-Elektronski mikroskop (revision 4413214)
-Elektrotehnika (revision 4596912)
-Energetika (revision 4586277)
-Energija (revision 4719089)
+Bor (element) (revision 5549612)
+Brian Josephson (revision 5446101)
+Cink (revision 5556719)
+Comptonov učinak (revision 5313303)
+Coulombov zakon (revision 5436283)
+Dijamant (revision 5775412)
+Dimenzija (revision 5379791)
+Dinastija Han (revision 5772176)
+Dislokacija (revision 5431109)
+EV (revision 5430610)
+Eksponencijalna funkcija (revision 5523460)
+Električna struja (revision 5653050)
+Električna vodljivost (revision 5376333)
+Električni izolator (revision 5258197)
+Električni luk (revision 5437134)
+Električni naboj (revision 5774260)
+Električni otpor (revision 4904596)
+Električni vodič (revision 5334900)
+Električno polje (revision 5247154)
+Elektrolit (revision 4858367)
+Elektromagnetsko zračenje (revision 5760956)
+Elektron (revision 5774256)
+Elektronika (revision 5556766)
+Elektronska konfiguracija (revision 4949752)
+Elektronski mikroskop (revision 5439229)
+Elektrotehnika (revision 5254565)
+Energetika (revision 4908587)
+Energija (revision 5767106)
Fermi-Diracova statistika (revision 3934172)
-Feromagnetizam (revision 4760511)
-Fizika (revision 4769955)
-Fizika kondenzirane tvari (revision 4769955)
-Fizikalna veličina (revision 4621676)
-Fosfor (revision 4602427)
-Fotodioda (revision 3939069)
-Fotoelektrični učinak (revision 4704417)
-Foton (revision 4537522)
-Fotonaponski sustavi (revision 4418887)
-Francuski jezik (revision 4771366)
-Galij (revision 4537855)
-Genitiv (revision 4625199)
-Germanij (revision 4537856)
-Helij (revision 4747001)
+Feromagnetizam (revision 5392729)
+Fizika (revision 5777684)
+Fizika kondenzirane tvari (revision 5455580)
+Fizikalna veličina (revision 5497656)
+Fosfor (revision 5556869)
+Fotodioda (revision 5235215)
+Fotoelektrični učinak (revision 5632628)
+Foton (revision 5635311)
+Fotonaponski sustavi (revision 5430012)
+Francuski jezik (revision 5771033)
+Galij (revision 5437600)
+Genitiv (revision 5767472)
+Germanij (revision 5437677)
+Helij (revision 5556716)
Henri (revision 3922500)
-Indij (revision 4537867)
-Integrirani krug (revision 4447159)
-Ion (revision 4549144)
-Ioniziranje (revision 4566703)
-Izolator (revision 4649046)
-John Bardeen (revision 4403736)
-Kadmij (revision 3921860)
-Kelvin (revision 4624351)
-Keramika (revision 4599177)
-Kinetička energija (revision 4719090)
-Klasična mehanika (revision 4637127)
-Kompas (revision 4702880)
-Kondenzacija (revision 4477825)
-Kondenzirana tvar (revision 4776646)
+Indij (revision 5439698)
+Integrirani krug (revision 5500904)
+Ion (revision 5750157)
+Ioniziranje (revision 5318213)
+John Bardeen (revision 5182165)
+Kadmij (revision 5440736)
+Kelvin (revision 5240179)
+Keramika (revision 5655772)
+Kinetička energija (revision 5753997)
+Klasična mehanika (revision 5656259)
+Kompas (revision 5750313)
+Kondenzacija (revision 5492249)
+Kondenzirana tvar (revision 5455580)
Konstrukcija (revision 4680450)
-Kovalentna veza (revision 4641419)
-Kristal (revision 4720329)
-Kristalna rešetka (revision 4479184)
+Kovalentna veza (revision 5751506)
+Kristal (revision 5455704)
+Kristalna rešetka (revision 5562348)
Kristalografija (revision 4105956)
-Krutine (revision 4625162)
-Kubični kristalni sustav (revision 4344344)
-Kubični metar (revision 4616551)
-Kvantna mehanika (revision 4541215)
-Latinski jezik (revision 4760544)
-Luminiscencija (revision 4708222)
-Magnet (revision 4603344)
-Magnetizam (revision 4760040)
+Krutine (revision 5196995)
+Kubični kristalni sustav (revision 5610803)
+Kubični metar (revision 5082862)
+Kvantna mehanika (revision 5777687)
+Latinski jezik (revision 5663325)
+Luminiscencija (revision 5052601)
+Magnet (revision 5743549)
+Magnetizam (revision 5728489)
Magnetska permeabilnost (revision 4675996)
-Magnetska vodljivost (revision 4736934)
-Magnetski moment (revision 4410235)
-Magnetsko polje (revision 4678057)
-Materijal (revision 4669230)
-Mehanika (revision 4698699)
-Metal (revision 4671710)
-Metan (revision 4422418)
-Metar (revision 4655527)
-Mjerna veličina (revision 4621676)
-Molekula (revision 4539232)
-Molekule (revision 4539232)
-Napon (revision 4585417)
+Magnetska vodljivost (revision 4899860)
+Magnetski moment (revision 5489691)
+Magnetsko polje (revision 5671905)
+Materijal (revision 5748275)
+Mehanika (revision 5777691)
+Metal (revision 5505185)
+Metan (revision 5611051)
+Metar (revision 5325605)
+Mjerna veličina (revision 5497656)
+Molekula (revision 5773190)
+Molekule (revision 5773190)
+Napon (revision 5556720)
Niskotemperaturna fizika (revision 4657522)
-Njemački jezik (revision 4731246)
-Optika (revision 4768098)
+Njemački jezik (revision 5710175)
+Optika (revision 5316843)
== End of Parsed pages ==
-- Wikipedia parsing ended at: 2016-09-25 23:50:27.589690
+- Wikipedia parsing ended at: 2021-03-16 19:18:55.485669
-49 characters appeared 500582 times.
+49 characters appeared 643453 times.
First 31 characters:
-[ 0] Char a: 10.808019465342342 %
-[ 1] Char i: 10.18554402675286 %
-[ 2] Char e: 9.571259054460608 %
-[ 3] Char o: 8.468143081453189 %
-[ 4] Char n: 6.952906816465634 %
-[ 5] Char t: 5.369549843981606 %
-[ 6] Char r: 5.331993559496746 %
-[ 7] Char j: 5.102860270644969 %
-[ 8] Char s: 4.717109284792501 %
-[ 9] Char k: 4.013927788054705 %
-[10] Char l: 3.854713113935379 %
-[11] Char u: 3.786792173909569 %
-[12] Char m: 3.730058212240951 %
-[13] Char v: 3.0989927724129114 %
-[14] Char p: 2.67308852495695 %
-[15] Char d: 2.6135578186990345 %
-[16] Char z: 1.8931963194841206 %
-[17] Char g: 1.5665765049482403 %
-[18] Char č: 1.161048539500022 %
-[19] Char b: 1.1440683044935693 %
-[20] Char c: 1.007627122029957 %
-[21] Char h: 0.8006680224219008 %
-[22] Char f: 0.5159993767254915 %
-[23] Char š: 0.422907735395999 %
-[24] Char ž: 0.3611795869607777 %
-[25] Char ć: 0.34959307366225717 %
-[26] Char đ: 0.2195444502598975 %
-[27] Char y: 0.11306838839590717 %
-[28] Char w: 0.07291512679241363 %
-[29] Char x: 0.04534721584076135 %
-[30] Char q: 0.02477116636235422 %
+[ 0] Char a: 10.677081309746011 %
+[ 1] Char i: 9.900023777960474 %
+[ 2] Char e: 9.741037806957152 %
+[ 3] Char o: 8.583843730622128 %
+[ 4] Char n: 6.852404138297591 %
+[ 5] Char t: 5.517885533209108 %
+[ 6] Char r: 5.292383437484944 %
+[ 7] Char j: 5.03952891664193 %
+[ 8] Char s: 4.730104607484929 %
+[ 9] Char k: 4.032773178460587 %
+[10] Char l: 3.9395262746463224 %
+[11] Char m: 3.8557594727198414 %
+[12] Char u: 3.7656207990327184 %
+[13] Char v: 3.0636270248176634 %
+[14] Char p: 2.654583940085756 %
+[15] Char d: 2.6340696212466175 %
+[16] Char z: 1.8657151338170777 %
+[17] Char g: 1.5614194043698606 %
+[18] Char č: 1.1537750231951673 %
+[19] Char b: 1.1304632972416013 %
+[20] Char c: 1.081042438220041 %
+[21] Char h: 0.7697531909867543 %
+[22] Char f: 0.4845730768214617 %
+[23] Char š: 0.4174353060751912 %
+[24] Char ž: 0.365217039939203 %
+[25] Char ć: 0.35123000436706336 %
+[26] Char đ: 0.22596833024323454 %
+[27] Char y: 0.14857340007739495 %
+[28] Char w: 0.06558365568269944 %
+[29] Char x: 0.04988709354063157 %
+[30] Char q: 0.030149832233278887 %
-The first 31 characters have an accumulated ratio of 0.9997702674087363.
+The first 31 characters have an accumulated ratio of 0.9998103979622444.
-712 sequences found.
+725 sequences found.
-First 512 (typical positive ratio): 0.9989731099787131
-Next 512 (512-1024): 1.9976747066414694e-06
-Rest: 3.7513395167998453e-17
+First 512 (typical positive ratio): 0.9990568119867879
+Next 512 (512-1024): 0.00365217039939203
+Rest: -4.0440741033709315e-17
-- Processing end: 2016-09-25 23:50:27.987029
+- Processing end: 2021-03-16 19:18:56.030353
diff --git a/script/BuildLangModelLogs/LangCzechModel.log b/script/BuildLangModelLogs/LangCzechModel.log
index 7d9c950..7d7cbd3 100644
--- a/script/BuildLangModelLogs/LangCzechModel.log
+++ b/script/BuildLangModelLogs/LangCzechModel.log
@@ -1,161 +1,158 @@
= Logs of language model for Czech (cs) =
- Generated by BuildLangModel.py
-- Started: 2016-09-21 03:20:56.824516
-- Maximum depth: 5
+- Started: 2021-03-16 18:42:56.950279
+- Maximum depth: 4
- Max number of pages: 100
== Parsed pages ==
-Sociální fobie (revision 13567590)
-Adaptace (revision 13991192)
-Agorafobie (revision 13013445)
-Alkoholismus (revision 13822064)
-Alprazolam (revision 14082425)
-Antidepresivum (revision 14113423)
-Asertivita (revision 14111958)
+Sociální fobie (revision 19562865)
+Adaptace (revision 18611473)
+Agorafobie (revision 19426793)
+Alkoholismus (revision 19586776)
+Alprazolam (revision 19373957)
+Americká psychiatrická společnost (revision 18200634)
+Antidepresivum (revision 19057482)
+Asertivita (revision 19469246)
Atenolol (revision 12051880)
-Automatické negativní myšlenky (revision 13567590)
-Benzodiazepin (revision 13947546)
-Beta-blokátory (revision 13428762)
-Blud (revision 13888988)
-Bohatství (revision 13556478)
-Bupropion (revision 13686045)
-Citaloparam (revision 13567590)
-Clonazepan (revision 13567590)
-Crohnova nemoc (revision 13745254)
-Deprese (psychologie) (revision 13695735)
-Diagnostický a statický manuál mentálních poruch (revision 13567590)
-Diagnostický a statistický manuál mentálních poruch (revision 13714660)
-Diagnóza (medicína) (revision 13052239)
-Dichotomické myšlení (revision 13567590)
-Digital object identifier (revision 14138049)
-Dopamin (revision 13714274)
-Dystymie (revision 13567267)
-Důkaz kruhem (revision 13190761)
-Elektivní mutismus (revision 9940891)
-Emoce (revision 14110033)
-Escitalopram (revision 12954987)
-Evoluce (revision 13951488)
-Expozice (psychologie) (revision 14119474)
-Extraverze a introverze (revision 13872996)
-Fluoxetin (revision 12955006)
-Fluvoxamin (revision 12955006)
-Gen (revision 13907182)
-Generalizovaná úzkostná porucha (revision 14006709)
-Halucinaci (revision 12188143)
-Hněv (revision 14057864)
-Inteligence (revision 14009781)
-International Standard Serial Number (revision 12869806)
-Interpersonální psychoterapie (revision 13567590)
-Iracionalita (revision 4765977)
-Ján Praško Pavlov (revision 14086840)
-Klinické testování (revision 13530979)
-Kognitivní omyl (revision 13107294)
-Kognitivní psychologie (revision 11629465)
-Kognitivní restrukturalizace (revision 13567360)
-Kognitivně behaviorální terapie (revision 13980494)
-Komorbidita (revision 11351714)
-Lymská borelióza (revision 14068446)
-Malé sebevědomí (revision 13567590)
-Medical Subject Headings (revision 12239331)
-Meditace (revision 13180783)
-Mentální černý filtr (revision 13567590)
-Mezinárodní klasifikace nemocí (revision 12531067)
-Michael Liebowitz (revision 13567590)
-Moclobemid (revision 13567590)
-Moritova terapie (revision 11960292)
-Musturbace (revision 13567590)
-Nervozita (revision 13847097)
-Noradrenalin (revision 14054165)
-Obsedantně kompulzivní porucha (revision 13950365)
-Panická ataka (revision 13253537)
-Panická porucha (revision 13253537)
-Paranoia (revision 14027052)
-Paroxetin (revision 12955006)
-Pohlavnost (revision 13564689)
-Porucha (revision 11039108)
-Pravděpodobnost (revision 13596041)
-Predestinace (revision 12467403)
-Profese (revision 13975485)
-Propanolol (revision 12972658)
-Psychiatr (revision 12767960)
-Psychické trauma (revision 11227535)
-Psychoaktivní droga (revision 13939232)
-Psychodynamická léčba (revision 13567590)
-Psychofarmaka (revision 9928215)
-Psycholog (revision 12358728)
-Psychoterapie (revision 13874178)
-Puberta (revision 12540014)
-RIMA (revision 10234728)
-Remise (revision 9896748)
-Richard Heimberg (revision 13567590)
-Rámování myšlenek (revision 13567590)
-Schizofrenie (revision 13977456)
-Sebevražda (revision 14053884)
-Selektivní abstrakce (revision 13567590)
-Selektivní inhibitor zpětného vychytávání serotoninu (revision 12955027)
-Serotonin (revision 13975104)
-Sertralin (revision 12955006)
-Skupinová terapie (revision 11964235)
-Sociální chování (revision 13507313)
-Sociální dovednost (revision 12226347)
+Benzodiazepiny (revision 19464603)
+Beta-blokátor (revision 19342461)
+Blud (revision 18085659)
+Bohatství (revision 16529725)
+Bupropion (revision 12028550)
+Citalopram (revision 17641873)
+Clonazepam (revision 19414205)
+Crohnova nemoc (revision 19441068)
+DSM-IV (revision 18200634)
+Deprese (psychologie) (revision 19554049)
+Diagnostický a statistický manuál mentálních poruch (revision 18200634)
+Diagnóza (medicína) (revision 18672900)
+Dichotomické myšlení (revision 19472610)
+Digital object identifier (revision 19452419)
+Dopamin (revision 19339677)
+Dystymie (revision 17683683)
+Důkaz kruhem (revision 16799597)
+Elektivní mutismus (revision 19334050)
+Emoce (revision 19268819)
+Escitalopram (revision 19342010)
+Fluoxetin (revision 19342014)
+Fluvoxamin (revision 19342014)
+Gen (revision 18766924)
+Generalizovaná úzkostná porucha (revision 19465410)
+Halucinace (revision 19181320)
+Hněv (revision 19602111)
+Inteligence (revision 19472417)
+International Standard Book Number (revision 19411852)
+International Standard Serial Number (revision 17477154)
+Interpersonální psychoterapie (revision 17446502)
+Introverze (revision 19273893)
+Iracionalita (revision 16731536)
+Jana Vyskočilová (revision 19609212)
+Ján Praško (revision 18740907)
+Ján Praško Pavlov (revision 18740907)
+Kognitivní omyl (revision 19618239)
+Kognitivní psychologie (revision 16289048)
+Kognitivní restrukturalizace (revision 19284546)
+Kognitivně behaviorální terapie (revision 19475205)
+Komorbidita (revision 17525950)
+Lymská borelióza (revision 19051205)
+Medical Subject Headings (revision 18009832)
+Meditace (revision 18651670)
+Mezinárodní klasifikace nemocí (revision 19575331)
+Michael Liebowitz (revision 17336961)
+Moclobemid (revision 19562865)
+Moritova terapie (revision 16391634)
+Musturbace (revision 19562865)
+NDRI (revision 19412768)
+Nervozita (revision 18799061)
+Noradrenalin (revision 19376674)
+Obsedantně kompulzivní porucha (revision 19461977)
+Panická ataka (revision 18158083)
+Panická porucha (revision 18158083)
+Paranoia (revision 19271797)
+Paroxetin (revision 19342014)
+Pohlavnost (revision 19553039)
+Pravděpodobnost (revision 19370061)
+Predestinace (revision 15390515)
+Profese (revision 19148432)
+Propanolol (revision 19342521)
+Psychiatr (revision 18661359)
+Psychické trauma (revision 17566056)
+Psychoaktivní droga (revision 19150920)
+Psychodynamická léčba (revision 19562865)
+Psychofarmaka (revision 19341820)
+Psycholog (revision 18812730)
+Psychoterapie (revision 18403501)
+PubMed (revision 17045891)
+RIMA (revision 13950874)
+Remise (revision 19427721)
+Richard Heimberg (revision 19562865)
+Schizofrenie (revision 19507435)
+Sebevražda (revision 19464374)
+Selektivní abstrakce (revision 17523049)
+Selektivní inhibitor zpětného vychytávání serotoninu (revision 19342041)
+Serotonin (revision 19186450)
+Sertralin (revision 19342014)
+Skupinová psychoterapie (revision 15430379)
+Skupinová terapie (revision 15430379)
+Sociální chování (revision 18867179)
== End of Parsed pages ==
-- Wikipedia parsing ended at: 2016-09-21 03:28:11.731386
+- Wikipedia parsing ended at: 2021-03-16 18:50:25.563305
-47 characters appeared 594800 times.
+48 characters appeared 495093 times.
First 41 characters:
-[ 0] Char o: 8.323806321452588 %
-[ 1] Char e: 8.040013449899126 %
-[ 2] Char n: 6.895595158036315 %
-[ 3] Char a: 6.263113651647613 %
-[ 4] Char i: 5.650470746469401 %
-[ 5] Char t: 5.40383322125084 %
-[ 6] Char s: 4.588937457969065 %
-[ 7] Char v: 3.8685272360457295 %
-[ 8] Char p: 3.6914929388029587 %
-[ 9] Char r: 3.6302958977807664 %
-[10] Char l: 3.6017148621385338 %
-[11] Char í: 3.5733019502353733 %
-[12] Char k: 3.301950235373235 %
-[13] Char u: 3.1782111634162744 %
-[14] Char c: 3.1383658372562206 %
-[15] Char d: 3.120208473436449 %
-[16] Char m: 2.758406186953598 %
-[17] Char h: 2.2747141896435776 %
-[18] Char á: 2.156186953597848 %
-[19] Char z: 2.0260591795561536 %
-[20] Char y: 1.9894082044384667 %
-[21] Char j: 1.8979488903833224 %
-[22] Char b: 1.8189307330195021 %
-[23] Char ě: 1.277236045729657 %
-[24] Char é: 1.2291526563550772 %
-[25] Char č: 0.9502353732347008 %
-[26] Char ž: 0.9214862138533961 %
-[27] Char ř: 0.8955951580363146 %
-[28] Char ý: 0.7646267652992602 %
-[29] Char š: 0.6605581708137189 %
-[30] Char f: 0.6260928043039677 %
-[31] Char ů: 0.5016812373907196 %
-[32] Char g: 0.47041022192333554 %
-[33] Char ú: 0.19502353732347008 %
-[34] Char x: 0.13685272360457296 %
-[35] Char ň: 0.05447209145931405 %
-[36] Char w: 0.04488903833221251 %
-[37] Char ó: 0.03429724277067922 %
-[38] Char ť: 0.02269670477471419 %
-[39] Char ď: 0.012104909213180902 %
-[40] Char q: 0.007229320780094149 %
+[ 0] Char o: 8.197651754316865 %
+[ 1] Char e: 8.02455296277669 %
+[ 2] Char n: 6.99807914876599 %
+[ 3] Char a: 6.436164518585397 %
+[ 4] Char i: 5.469477451711093 %
+[ 5] Char t: 5.3903004082061345 %
+[ 6] Char s: 4.630443169263149 %
+[ 7] Char v: 3.9471372045252107 %
+[ 8] Char r: 3.7742403952388743 %
+[ 9] Char p: 3.6326508352976106 %
+[10] Char l: 3.626389385428596 %
+[11] Char k: 3.4845978432334936 %
+[12] Char í: 3.306247513093499 %
+[13] Char d: 3.2319180436806825 %
+[14] Char c: 3.084269016124243 %
+[15] Char u: 3.0539716780483666 %
+[16] Char m: 2.917835638960761 %
+[17] Char h: 2.260989349475755 %
+[18] Char z: 2.074559729182194 %
+[19] Char á: 2.05597736182899 %
+[20] Char y: 2.00184611780009 %
+[21] Char j: 1.8560149305282037 %
+[22] Char b: 1.743510815139782 %
+[23] Char ě: 1.2797595603250298 %
+[24] Char é: 1.2238104760115776 %
+[25] Char č: 0.9543661493901145 %
+[26] Char ž: 0.9283104386448606 %
+[27] Char ř: 0.8905397571769345 %
+[28] Char ý: 0.7972239559032344 %
+[29] Char š: 0.6172577677325269 %
+[30] Char g: 0.5201043036358826 %
+[31] Char f: 0.5120250134823154 %
+[32] Char ů: 0.5093992441824061 %
+[33] Char ú: 0.18077411718606404 %
+[34] Char x: 0.1575461579945586 %
+[35] Char w: 0.07291559363594315 %
+[36] Char ň: 0.052313403744347016 %
+[37] Char ó: 0.050495563459794425 %
+[38] Char ť: 0.027469586522128164 %
+[39] Char q: 0.010301094945798063 %
+[40] Char ď: 0.010099112691958885 %
-The first 41 characters have an accumulated ratio of 0.9999613315400132.
+The first 41 characters have an accumulated ratio of 0.9999353656787715.
-1025 sequences found.
+1037 sequences found.
-First 512 (typical positive ratio): 0.9786035192432675
-Next 512 (512-1024): 1.6812373907195695e-06
-Rest: 2.0246480655940202e-06
+First 512 (typical positive ratio): 0.9751874547460189
+Next 512 (512-1024): 0.009283104386448606
+Rest: 3.158667139656693e-05
-- Processing end: 2016-09-21 03:28:12.235582
+- Processing end: 2021-03-16 18:50:26.412061
diff --git a/script/BuildLangModelLogs/LangEsperantoModel.log b/script/BuildLangModelLogs/LangEsperantoModel.log
index 5f020cd..05d0464 100644
--- a/script/BuildLangModelLogs/LangEsperantoModel.log
+++ b/script/BuildLangModelLogs/LangEsperantoModel.log
@@ -1,110 +1,157 @@
= Logs of language model for Esperanto (eo) =
- Generated by BuildLangModel.py
-- Started: 2015-12-04 01:22:51.466573
-- Maximum depth: 3
-- Max number of pages: 50
+- Started: 2021-03-16 18:50:26.592918
+- Maximum depth: 4
+- Max number of pages: 100
== Parsed pages ==
-Vikipedio:Ĉefpaĝo (revision 5524911)
-10-a de novembro (revision 5792999)
-12-a de novembro (revision 5793854)
-13-a de novembro (revision 5795088)
-18-a de novembro (revision 5796972)
-2-a de novembro (revision 5772615)
-20-a de novembro (revision 5799664)
-2015 (revision 5791963)
-22-a de novembro (revision 5799355)
-24-a de novembro (revision 5800563)
-4-a de decembro (revision 5806422)
-4-a de novembro (revision 5789811)
-5-a de novembro (revision 5789774)
-6-a de novembro (revision 5790336)
-7-a de novembro (revision 5791066)
-8-a de novembro (revision 5791337)
-9-a de novembro (revision 5791916)
-A Night at the Opera (Queen) (revision 5184272)
-Abdelhamid Abaaoud (revision 5800134)
-André Glucksmann (revision 5792591)
-Anglio (revision 5693468)
-Argentino (revision 5804665)
-Atencoj de novembro 2015 en Parizo (revision 5800135)
-Aung San Suu Kyi (revision 5791362)
-Austin FX4 (revision 5583207)
-Azilo (revision 5751210)
-Aŭstrio (revision 5804014)
-Bahio (revision 5773065)
-Bamako (revision 5798202)
-Bataclan (revision 5795605)
-Bejruto (revision 5774306)
-Birmo (revision 5790386)
-Blonda (revision 5441229)
-Bohemian rhapsody (revision 5654078)
-Cayetano Redondo (revision 5591025)
-Ciro la 2-a (revision 5774667)
-DJ Abdel (revision 5628860)
-Daniela Mercury (revision 5764721)
-Decembro de 2015 (revision 5626904)
-Dilatkoeficiento (revision 5806460)
-Eksproprietigo (revision 5586845)
-Elektroniko (revision 5788966)
-Elle s'appelait Sarah (filmo) (revision 5475154)
-Esperanto (revision 5804190)
-Federaciero (revision 5696168)
-Fondaĵo Vikimedio (revision 5772681)
-Francio (revision 5759775)
-François Hollande (revision 5627721)
+Vikipedio:Ĉefpaĝo (revision 7070684)
+1-a de marto (revision 7133709)
+10-a de marto (revision 7140053)
+1812 (revision 6759865)
+1836 (revision 6759900)
+1870 (revision 6759944)
+2-a de marto (revision 7134407)
+2013 (revision 7120546)
+2021 (revision 7133381)
+20a jarcento (revision 6911173)
+4-a de aprilo (revision 7095124)
+7-a de februaro (revision 7126938)
+7-a de marto (revision 7140031)
+9-a de junio (revision 7096958)
+Advokato (revision 7015897)
+Alĝerio (revision 7136438)
+Amazona arbaro (revision 7057380)
+Anglio (revision 6910536)
+Antikva Egiptio (revision 6715674)
+Batao (revision 6348833)
+Biero en Germanio (revision 5158902)
+Bjalistoko (revision 7095427)
+Charles Dickens (revision 7139853)
+David Copperfield (romano) (revision 6728487)
+Decembro de 2020 (revision 7115650)
+Demotika lingvo (revision 6581652)
+Duolingo (revision 6996800)
+Eduko (revision 7064206)
+Ekvatora Gvineo (revision 7111153)
+El Greco (revision 7130251)
+Emmanuel Macron (revision 7076767)
+Esperantisto (revision 6583368)
+Esperanto (revision 7125932)
+Esperanto kaj Libera Scio (revision 7106401)
+Eŭropa Kosma Agentejo (revision 6998003)
+Fabriko (revision 6775703)
+Februaro de 2021 (revision 7139991)
+Fluganta Spagetmonstro (revision 7072467)
+Fondaĵo Vikimedio (revision 7097854)
+Francaj Armitaj Fortoj (revision 6521662)
+Francio (revision 7035760)
+Grandduklando Flandrensis (revision 7064691)
+Hieroglifoj (revision 6475302)
+Honkongo (revision 7022513)
+Infanlaboro (revision 7043683)
+Internacia Fonetika Alfabeto (revision 6826202)
+Irlanda lingvo (revision 7108415)
+Januaro de 2021 (revision 7119168)
+Kreismo (revision 7029678)
+Landport (revision 6722661)
+Libera scio (revision 6432924)
+Listen to Wikipedia (revision 6980163)
+Listo de originalaj romanoj en Esperanto (revision 7134297)
+Marto de 2021 (revision 7140759)
+Metroo de Parizo (revision 7129616)
+Monda Komerca Organizaĵo (revision 7135765)
+Mutzig (revision 7085274)
+Namacu (revision 6342288)
+Ngozi Okonjo-Iweala (revision 7138302)
+Niĝerio (revision 7135950)
+Novelo (revision 7099911)
+Oktobrofesto (revision 6860497)
+Oseta Vikipedio (revision 7061966)
+Portsmouth (revision 6756801)
+Rolulo (revision 7078410)
+Romano (revision 7102617)
+San-Marino (revision 7075794)
+Sismo (revision 6757493)
+Slovaka Vikipedio (revision 6973132)
+Strasburgo (revision 7139993)
+Svahila Vikipedio (revision 6655220)
+Telegram (aplikaĵo) (revision 6982939)
+Teodoro Obiang Nguema Mbasogo (revision 6521358)
+Verkisto (revision 6694998)
+Vikio (revision 6761946)
+Vikipedio (revision 7075981)
+Vikipedio en Esperanto (revision 7075983)
+Ĉeĥa Vikipedio (revision 5571847)
+Ĉinio (revision 7133172)
+Ĵurnalisto (revision 7129724)
+-771 (revision 6917193)
+-86 (revision 7120146)
+1058 (revision 6758857)
+11-a de marto (revision 7140194)
+1101 (revision 6758901)
+1105 (revision 6758905)
+1131 (revision 6758935)
+1157 (revision 6758962)
+12-a de marto (revision 7141381)
+1290 (revision 6759097)
+13-a de marto (revision 7142227)
+1389 (revision 6759315)
+14-a de marto (revision 7142231)
+1420 (revision 6759383)
+1445 (revision 6759438)
+1456 (revision 6759463)
+1457 (revision 6759465)
+1459 (revision 6759469)
== End of Parsed pages ==
-- Wikipedia parsing ended at: 2015-12-04 01:27:38.176708
+- Wikipedia parsing ended at: 2021-03-16 18:54:42.162702
-56 characters appeared 342524 times.
+55 characters appeared 738091 times.
-First 35 characters:
-[ 0] Char a: 12.557952143499435 %
-[ 1] Char o: 9.84719318938235 %
-[ 2] Char e: 9.10242785906973 %
-[ 3] Char i: 8.362333734278474 %
-[ 4] Char n: 7.6359612757062285 %
-[ 5] Char r: 6.630192336887342 %
-[ 6] Char t: 5.70821314710794 %
-[ 7] Char l: 5.610409781504361 %
-[ 8] Char s: 5.004320865107262 %
-[ 9] Char k: 3.8855671427403626 %
-[10] Char d: 3.7194473963868226 %
-[11] Char j: 3.28531723324497 %
-[12] Char u: 2.8465158645817517 %
-[13] Char m: 2.787833845219605 %
-[14] Char p: 2.6582078920017285 %
-[15] Char g: 1.6825098387266293 %
-[16] Char v: 1.4048650605505015 %
-[17] Char c: 1.3823848839789328 %
-[18] Char b: 1.1406499982482978 %
-[19] Char f: 1.077296773364786 %
-[20] Char z: 0.7342551178895493 %
-[21] Char h: 0.6735294461118053 %
-[22] Char ĝ: 0.53572888323154 %
-[23] Char ŭ: 0.4268314045147202 %
-[24] Char ĉ: 0.33545094650301877 %
-[25] Char y: 0.17079095187490512 %
-[26] Char ŝ: 0.15327393116978666 %
-[27] Char w: 0.1442234704721421 %
-[28] Char ĵ: 0.1039343228503696 %
-[29] Char á: 0.0814541462788009 %
-[30] Char ó: 0.05430276418586727 %
-[31] Char é: 0.053718863495696656 %
-[32] Char q: 0.04350060141771087 %
-[33] Char x: 0.040873048311943105 %
-[34] Char ĥ: 0.03824549520617533 %
+First 32 characters:
+[ 0] Char a: 12.443858548607151 %
+[ 1] Char o: 9.828462886012701 %
+[ 2] Char e: 9.238969178597218 %
+[ 3] Char i: 8.570894374812863 %
+[ 4] Char n: 7.557604685601098 %
+[ 5] Char r: 6.426172382538196 %
+[ 6] Char t: 5.784923539238386 %
+[ 7] Char l: 5.684935868341437 %
+[ 8] Char s: 5.134326255163659 %
+[ 9] Char k: 4.062778166919797 %
+[10] Char d: 3.544278415534128 %
+[11] Char j: 3.39619369427347 %
+[12] Char u: 2.807783864049284 %
+[13] Char m: 2.731370522062998 %
+[14] Char p: 2.685847680028614 %
+[15] Char g: 1.6155189536249595 %
+[16] Char v: 1.417033942969092 %
+[17] Char c: 1.328968921176386 %
+[18] Char b: 1.1882003709569686 %
+[19] Char f: 1.1564969631115947 %
+[20] Char h: 0.6592683016050866 %
+[21] Char z: 0.6408423893530744 %
+[22] Char ĝ: 0.5576548149211953 %
+[23] Char ŭ: 0.44980903438735875 %
+[24] Char ĉ: 0.3391180762263732 %
+[25] Char w: 0.15404604581277917 %
+[26] Char y: 0.13819434189009214 %
+[27] Char ŝ: 0.12938783971082157 %
+[28] Char ĵ: 0.1166522827131072 %
+[29] Char á: 0.04579381133220701 %
+[30] Char é: 0.039155063535526106 %
+[31] Char ĥ: 0.031025984600814804 %
-The first 35 characters have an accumulated ratio of 0.9991971365510156.
+The first 32 characters have an accumulated ratio of 0.9990556719970846.
-989 sequences found.
+1066 sequences found.
-First 512 (typical positive ratio): 0.9942980632768038
-Next 512 (512-1024): 0.0015327393116978665
-Rest: -5.0306980803327406e-17
+First 512 (typical positive ratio): 0.995442680189542
+Next 512 (512-1024): 0.004498090343873587
+Rest: 6.983124116715766e-05
-- Processing end: 2015-12-04 01:27:38.307198
+- Processing end: 2021-03-16 18:54:42.252378
diff --git a/script/BuildLangModelLogs/LangEstonianModel.log b/script/BuildLangModelLogs/LangEstonianModel.log
index f1095eb..31acf96 100644
--- a/script/BuildLangModelLogs/LangEstonianModel.log
+++ b/script/BuildLangModelLogs/LangEstonianModel.log
@@ -1,159 +1,160 @@
= Logs of language model for Estonian (et) =
- Generated by BuildLangModel.py
-- Started: 2016-09-26 23:45:22.351942
-- Maximum depth: 5
+- Started: 2021-03-16 18:58:31.291439
+- Maximum depth: 4
- Max number of pages: 100
== Parsed pages ==
-Harilik pohl (revision 4248853)
-A-vitamiin (revision 4330862)
-Aasta keskmine sademete hulk (revision 4266801)
-Aasta keskmine õhutemperatuur (revision 3902142)
-Ahm (revision 4343671)
-Ain Raal (revision 4464651)
-Alalehed (revision 2892741)
-Alamliik (revision 3522810)
-Alaska (revision 4216575)
-Aleksander Heintalu (revision 4445156)
-Aleuudid (revision 4335893)
-Ameerika jänes (revision 4325220)
-Ameerika valgejänes (revision 4355263)
-Anneli Sihvart (revision 4211078)
+Harilik pohl (revision 5703478)
+A-vitamiin (revision 5556956)
+Aasta keskmine sademete hulk (revision 5284375)
+Aasta keskmine õhutemperatuur (revision 5542687)
+Ahm (revision 5513665)
+Ain Raal (revision 5662146)
+Alalehed (revision 4983554)
+Alamliik (revision 5278935)
+Alaska (revision 5844590)
+Aleksander Heintalu (revision 5754094)
+Aleuudid (revision 4704649)
+Ameerika jänes (revision 5843342)
+Ameerika valgejänes (revision 5411720)
+Anneli Sihvart (revision 3546469)
Arbutiin (revision 4451788)
-Baribal (revision 4268462)
-Bensoehape (revision 3810308)
-Binaarne nomenklatuur (revision 3970950)
-C-vitamiin (revision 4444353)
-Droog (revision 4352968)
-E-vitamiin (revision 4336726)
-Eesti (revision 4474984)
-Eesti Entsüklopeediakirjastus (revision 4012421)
-Eesti köök (revision 4314947)
-Ellips (revision 4272113)
+Baribal (revision 5793838)
+Bensoehape (revision 5172889)
+Binaarne nomenklatuur (revision 5719069)
+C-vitamiin (revision 5487089)
+Droog (revision 5051359)
+E-vitamiin (revision 5553995)
+Eesti (revision 5807277)
+Eesti Entsüklopeediakirjastus (revision 5697753)
+Eesti köök (revision 5622964)
+Ellips (revision 5425749)
Emakakael (botaanika) (revision 3521516)
-Euraasia (revision 3710768)
+Euraasia (revision 5843444)
Fenoloogia (revision 3512905)
-Folaadid (revision 4266628)
-Fosfor (revision 4270122)
-Fotosüntees (revision 4380600)
-Fruktoos (revision 4285660)
-Glükoos (revision 4047315)
+Folaadid (revision 5695132)
+Fosfor (revision 5817280)
+Fotosüntees (revision 5849350)
+Fruktoos (revision 5580398)
+Glükoos (revision 5398752)
Gneiss (revision 4333338)
-Graniit (revision 4435351)
-Gröönimaa (revision 4331557)
-Halljänes (revision 4051603)
-Haned (revision 4127680)
+Graniit (revision 5788916)
+Gröönimaa (revision 5704662)
+Halljänes (revision 5844682)
+Haned (revision 5655933)
Happeline keskkond (revision 2966453)
-Heilongjiang (revision 4342364)
-Hendrik Relve (revision 4342591)
-Hiina (revision 4448121)
-Holland (revision 4307885)
-Hunt (revision 4427752)
-Hõimkond (revision 3489569)
+Heilongjiang (revision 5573413)
+Hendrik Relve (revision 5776793)
+Hiina (revision 5842572)
+Holland (revision 5563481)
+Hunt (revision 5833431)
+Hõimkond (revision 5594301)
Hüdrofiilsus (revision 4309797)
-Ida-Euroopa (revision 4337624)
-Ida-sinilind (revision 4248853)
-Ida-vöötorav (revision 3520679)
-Igihaljus (revision 3536500)
-Ilves (revision 4404632)
-Imetaja (revision 4289188)
-Indiaanlased (revision 4479868)
-Indrek Rohtmets (revision 4218674)
-Itaalia (revision 4404119)
-Jaapan (revision 4465542)
-Jilin (revision 3894473)
-Jood (revision 4025060)
+Ida-Euroopa (revision 5852084)
+Ida-sinilind (revision 3944751)
+Ida-vöötorav (revision 5772003)
+Igihaljus (revision 5718075)
+Ilves (revision 5810469)
+Imetaja (revision 5817468)
+Immuunsus (revision 5465129)
+Indiaanlased (revision 5715264)
+Indrek Rohtmets (revision 5460729)
+Itaalia (revision 5821960)
+Jaapan (revision 5848576)
+Jilin (revision 5551781)
+Jood (revision 5506157)
Juurestik (revision 3341159)
-Jääkaru (revision 4372399)
-Jõhvikas (revision 4391549)
-Kaalium (revision 4486067)
-Kaheidulehelised (revision 4031352)
+Jääkaru (revision 5798648)
+Jõhvikas (revision 5765158)
+Kaalium (revision 5506158)
+Kaheidulehelised (revision 4551109)
Kaheli õiekate (revision 3063362)
Kahesuguline õis (revision 3383221)
-Kaitsestaatus (revision 3527096)
-Kajakas (revision 4456839)
-Kalorsus (revision 3843290)
-Kaltsium (revision 4339861)
-Kanada (revision 4434682)
-Kanalised (revision 3616579)
+Kaitsestaatus (revision 5622492)
+Kajakas (revision 5799897)
+Kalorsus (revision 5843070)
+Kaltsium (revision 5506160)
+Kanada (revision 5846973)
+Kanalised (revision 4824603)
Kanarbikulaadsed (revision 4318215)
-Kanarbikulised (revision 3534760)
-Karboksüülhapped (revision 3659011)
-Karoteen (revision 4347634)
-Kasvuperiood (revision 4231717)
-Katteseemnetaimed (revision 4176294)
+Kanarbikulised (revision 5479568)
+Karboksüülhapped (revision 5328337)
+Karoteen (revision 5479578)
+Kasvuperiood (revision 5279042)
+Katteseemnetaimed (revision 5315975)
Kaukasus (revision 4476003)
-Kesk-Euroopa (revision 3580746)
-Kimalane (revision 4261145)
-Kiudained (toit) (revision 3538655)
+Kesk-Euroopa (revision 5381871)
+Kimalane (revision 5643935)
+Kiudained (toit) (revision 5762236)
Klass (bioloogia) (revision 3489567)
-Kliima (revision 4160781)
-Korea (revision 4329396)
-Kroom (revision 4030460)
+Kliima (revision 5719219)
+Korea (revision 5555270)
+Kroom (revision 5506123)
Kroonlehed (revision 3543291)
-Kuusepüü (revision 4028988)
-Kvertsetiin (revision 4448461)
-Laanemets (revision 4001157)
-Laanepüü (revision 4475093)
-Laiuskraad (revision 3990366)
-Leesikas (revision 4420533)
-Lehed (revision 4471821)
-Leheroots (revision 3595351)
-Liik (bioloogia) (revision 4320981)
-Liiv (revision 4399494)
-Liivakivi (revision 4330598)
-Linnaeus (revision 4276836)
-Linnud (revision 4479668)
+Kuusepüü (revision 5715613)
+Kvertsetiin (revision 5610539)
+Laanemets (revision 5751227)
+Laanepüü (revision 5747330)
+Laiuskraad (revision 4993978)
+Leesikas (revision 5842030)
+Lehed (revision 5725384)
+Leheroots (revision 5532086)
+Liik (bioloogia) (revision 5791564)
+Liiv (revision 5675176)
+Liivakivi (revision 5548801)
+Linnaeus (revision 5635181)
== End of Parsed pages ==
-- Wikipedia parsing ended at: 2016-09-26 23:47:54.476445
+- Wikipedia parsing ended at: 2021-03-16 19:01:52.570995
-55 characters appeared 433559 times.
+55 characters appeared 482798 times.
-First 33 characters:
-[ 0] Char a: 12.486881831538499 %
-[ 1] Char i: 10.26503889897338 %
-[ 2] Char e: 10.177622884082673 %
-[ 3] Char s: 8.710233209320991 %
-[ 4] Char t: 6.56634967789851 %
-[ 5] Char l: 6.051540851418146 %
-[ 6] Char u: 5.423944607308348 %
-[ 7] Char n: 5.131020230233947 %
-[ 8] Char k: 4.663033174262327 %
-[ 9] Char o: 4.526950195936424 %
-[10] Char d: 4.167368224393911 %
-[11] Char r: 3.6740097656835635 %
-[12] Char m: 3.552688330769284 %
-[13] Char v: 2.4700213811730354 %
-[14] Char p: 1.9229216784797456 %
-[15] Char g: 1.865259399528092 %
-[16] Char h: 1.8043680329551455 %
-[17] Char j: 1.6860450365463524 %
-[18] Char ä: 1.0247740215287884 %
-[19] Char b: 0.9255949017319443 %
-[20] Char õ: 0.9246723052687178 %
-[21] Char ü: 0.6536595941959457 %
-[22] Char f: 0.37342091849090897 %
-[23] Char c: 0.34851081398379463 %
-[24] Char ö: 0.24333481717597835 %
-[25] Char y: 0.1287022066200909 %
-[26] Char x: 0.06781084004714467 %
-[27] Char w: 0.04082489349777078 %
-[28] Char q: 0.020989069538401926 %
-[29] Char š: 0.018913227496142396 %
-[30] Char z: 0.017529332801302706 %
-[31] Char ō: 0.010379210211297655 %
-[32] Char ž: 0.009687262863877812 %
+First 34 characters:
+[ 0] Char a: 12.61500669016856 %
+[ 1] Char i: 10.380117564695794 %
+[ 2] Char e: 10.063007717513328 %
+[ 3] Char s: 8.719795856652263 %
+[ 4] Char t: 6.619538606207979 %
+[ 5] Char l: 6.04559256666349 %
+[ 6] Char u: 5.504372429049002 %
+[ 7] Char n: 5.077278696266347 %
+[ 8] Char k: 4.702380705802427 %
+[ 9] Char o: 4.470606754791859 %
+[10] Char d: 4.163438953765343 %
+[11] Char r: 3.6719290469306007 %
+[12] Char m: 3.5747869709485123 %
+[13] Char v: 2.4621063053285224 %
+[14] Char p: 1.8848462503987176 %
+[15] Char g: 1.8341003898110597 %
+[16] Char h: 1.7551853984482124 %
+[17] Char j: 1.7216309926718836 %
+[18] Char ä: 1.033972800218725 %
+[19] Char õ: 0.9384877319292955 %
+[20] Char b: 0.8972696655744226 %
+[21] Char ü: 0.6507897712915132 %
+[22] Char f: 0.34610748180398426 %
+[23] Char c: 0.30426803756436444 %
+[24] Char ö: 0.24275162697442823 %
+[25] Char y: 0.1056342404069611 %
+[26] Char x: 0.05550975770405014 %
+[27] Char w: 0.035211413468987034 %
+[28] Char z: 0.025476493274620024 %
+[29] Char q: 0.019884092311898558 %
+[30] Char š: 0.017605706734493517 %
+[31] Char é: 0.009527794232784725 %
+[32] Char ō: 0.009113542309620172 %
+[33] Char ž: 0.00869929038645562 %
-The first 33 characters have an accumulated ratio of 0.9995410082595447.
+The first 34 characters have an accumulated ratio of 0.9996603134230051.
-853 sequences found.
+869 sequences found.
-First 512 (typical positive ratio): 0.9972721312183132
-Next 512 (512-1024): 9.687262863877811e-05
-Rest: -5.204170427930421e-18
+First 512 (typical positive ratio): 0.9973685549586747
+Next 512 (512-1024): 8.69929038645562e-05
+Rest: -3.122502256758253e-17
-- Processing end: 2016-09-26 23:47:54.561846
+- Processing end: 2021-03-16 19:01:52.649852
diff --git a/script/BuildLangModelLogs/LangFinnishModel.log b/script/BuildLangModelLogs/LangFinnishModel.log
index e99e9aa..f7247f3 100644
--- a/script/BuildLangModelLogs/LangFinnishModel.log
+++ b/script/BuildLangModelLogs/LangFinnishModel.log
@@ -1,156 +1,157 @@
= Logs of language model for Finnish (fi) =
- Generated by BuildLangModel.py
-- Started: 2016-09-21 18:12:24.181917
-- Maximum depth: 5
+- Started: 2021-03-16 19:01:52.812613
+- Maximum depth: 4
- Max number of pages: 100
== Parsed pages ==
-Yhdistynyt kuningaskunta (revision 15843357)
-1. toukokuuta (revision 15910178)
-1700-luku (revision 15493702)
-1707 (revision 15106709)
-1800-luku (revision 15708929)
-2014 (revision 15891601)
-409 (revision 12809782)
-5. marraskuuta (revision 15421719)
-927 (revision 12785964)
-Aasia (revision 15948161)
-Abhasia (revision 15730328)
-Adolf Hitler (revision 15951829)
-Afrikka (revision 15934209)
-Agatha Christie (revision 15760740)
-Aikavyöhyke (revision 15800313)
-Ajoneuvon kansallisuustunnus (revision 15897445)
-Akrotiri ja Dhekelia (revision 14625383)
-Alamaat (revision 15913741)
-Alan Turing (revision 15904871)
-Alankomaat (revision 15936643)
-Albania (revision 15767604)
-Alec Guinness (revision 15363805)
-Alexander Fleming (revision 15023225)
-Alfred Hitchcock (revision 15892843)
-Alfred Tennyson (revision 15856114)
-Allen Jones (revision 12871703)
-Andorra (revision 15913862)
-Andrew Lloyd Webber (revision 14978349)
-Anglit (revision 15902350)
-Anguilla (revision 15854041)
-Anne Brontë (revision 14287992)
-Anthony Eden (revision 14391831)
-Antigua ja Barbuda (revision 15196967)
-Arabian Lawrence (revision 15736417)
-Argentiina (revision 15676474)
-Armenia (revision 15634470)
-Arthur Conan Doyle (revision 15402837)
-Arts and Crafts (revision 15806930)
-Aurinko (revision 15934252)
-Australia (revision 15934255)
-Avara luonto (revision 15815943)
-Azerbaidžan (revision 15946891)
-BBC (revision 15866026)
-BKT (revision 15656549)
-Bahama (revision 15516869)
-Bangladesh (revision 15883994)
-Bank of England (revision 14481173)
-Barbados (revision 15839821)
-Barbara Hepworth (revision 15106880)
-Bath (revision 15869900)
-Beatrix Potter (revision 15057380)
-Belfast (revision 15715934)
-Belgia (revision 15932391)
-Belize (revision 15665086)
-Ben Nevis (revision 15610196)
-Bengalin kieli (revision 15551820)
-Benjamin Britten (revision 15081615)
-Bermuda (revision 15632621)
-Bertrand Russell (revision 14631969)
-Bhutan (revision 15377394)
-Big Ben (revision 14897401)
-Big Brother (revision 14641391)
-Birmingham (revision 15855259)
-Black Sabbath (revision 15839917)
-Bosnia ja Hertsegovina (revision 15934266)
-Botswana (revision 15524955)
-Bristol (revision 15891889)
-Bristolin kanaali (revision 15849713)
-Bristolin kansainvälinen lentoasema (revision 14452870)
-Britannia (provinssi) (revision 14557442)
-Britannian avoin golfturnaus (revision 14293265)
-Britannian kuninkaallinen perhe (revision 15522149)
-Britannian talous (revision 15470242)
-Britannian väestö (revision 15661241)
-Brittein saaret (revision 15805422)
-Brittiläinen Antarktiksen alue (revision 15836227)
-Brittiläinen Intia (revision 15593126)
-Brittiläinen Intian valtameren alue (revision 14272903)
-Brittiläinen imperiumi (revision 15906600)
-Brittiläinen kansainyhteisö (revision 15894379)
-Brittiläinen keittiö (revision 13393533)
-Brittiläinen kulttuuri (revision 15951407)
-Brittiläiset Neitsytsaaret (revision 15910520)
-Brittiläiset merentakaiset alueet (revision 15836213)
-Brunei (revision 15580824)
-Bruttokansantuote (revision 15656549)
-Bulgaria (revision 15944101)
-Burma (revision 15627218)
-Cambridge (revision 14641664)
-Cambridgen yliopisto (revision 15493340)
-Canterburyn tarinoita (revision 15232140)
-Cardiff (revision 15840398)
-Caymansaaret (revision 15914575)
-Channel 4 (revision 15882475)
-Charles Babbage (revision 15203616)
-Charles Chaplin (revision 15674652)
-Charles Darwin (revision 15894085)
-Charles Dickens (revision 15699592)
-Charles Dickensin joulutarina (revision 15116247)
+Yhdistynyt kuningaskunta (revision 19524940)
+2014 (revision 19539649)
+Aasia (revision 19400161)
+Abhasia (revision 19547259)
+Adolf Hitler (revision 19547632)
+Advanced Level (revision 18652085)
+Afrikka (revision 19621405)
+Agatha Christie (revision 19512386)
+Aikavyöhyke (revision 19555749)
+Ajoneuvon kansallisuustunnus (revision 18193201)
+Akrotiri ja Dhekelia (revision 18855449)
+Alamaat (revision 19549275)
+Alan Turing (revision 19549334)
+Alankomaat (revision 19640525)
+Albania (revision 19549481)
+Alec Guinness (revision 19544530)
+Alexander Fleming (revision 19522285)
+Alfred Hitchcock (revision 19402710)
+Alfred Tennyson (revision 19481118)
+Allen Jones (revision 19591974)
+Andorra (revision 19511981)
+Andrew Lloyd Webber (revision 18151455)
+Anglit (revision 19065858)
+Anguilla (revision 19591105)
+Anne Brontë (revision 19340812)
+Anthony Eden (revision 19341665)
+Antigua ja Barbuda (revision 18868418)
+Arabian Lawrence (revision 19429776)
+Argentiina (revision 19507062)
+Armenia (revision 19633290)
+Arthur Conan Doyle (revision 19393798)
+Arts and crafts (revision 19285842)
+Artsakhin tasavalta (revision 19508669)
+Atlantin valtameri (revision 19417172)
+Aurinko (revision 19558951)
+Australia (revision 19585414)
+Avara luonto (revision 19570513)
+Azerbaidžan (revision 19618379)
+BBC (revision 19151226)
+BKT (revision 19395273)
+Bahama (revision 19614968)
+Bangladesh (revision 19529050)
+Bank of England (revision 17954121)
+Barbados (revision 19193877)
+Barbara Hepworth (revision 19016157)
+Bath (revision 19316232)
+Beatrix Potter (revision 19590080)
+Belfast (revision 19638226)
+Belgia (revision 19623003)
+Belize (revision 18839172)
+Ben Nevis (revision 19287404)
+Bengalin kieli (revision 19361714)
+Benjamin Britten (revision 19284581)
+Bermuda (revision 19508737)
+Bertrand Russell (revision 19418381)
+Bhutan (revision 19609977)
+Big Ben (revision 19521754)
+Big Brother (revision 19638747)
+Birmingham (revision 19638219)
+Birminghamin kansainvälinen lentoasema (revision 19638219)
+Black Sabbath (revision 19637531)
+Bodiamin linna (revision 19288333)
+Boris Johnson (revision 18896646)
+Bosnia ja Hertsegovina (revision 19317622)
+Botswana (revision 19174485)
+Brexit (revision 19428746)
+Bristol (revision 19316368)
+Bristolin kansainvälinen lentoasema (revision 19316368)
+Britannia (provinssi) (revision 19620168)
+Britannia (täsmennyssivu) (revision 19524940)
+Britannian alahuone (revision 19561351)
+Britannian avoin golfturnaus (revision 18803777)
+Britannian kuninkaallinen perhe (revision 18877640)
+Britannian merentakaiset alueet (revision 18985200)
+Britannian talous (revision 19363886)
+Britannian väestö (revision 19334304)
+Britannian ylähuone (revision 19561348)
+Britteinsaaret (revision 19149527)
+Brittiläinen Antarktiksen alue (revision 19065469)
+Brittiläinen Intia (revision 19532682)
+Brittiläinen Intian valtameren alue (revision 19386472)
+Brittiläinen imperiumi (revision 18932562)
+Brittiläinen keittiö (revision 18379105)
+Brittiläinen kulttuuri (revision 19490255)
+Brittiläiset Neitsytsaaret (revision 19078289)
+Brittiläiset merentakaiset alueet (revision 18985200)
+Brunei (revision 19566565)
+Bruttokansantuote (revision 19395273)
+Bulgaria (revision 19361771)
+Burma (revision 19618164)
+Cambridge (revision 19030154)
+Cambridgen yliopisto (revision 18847878)
+Canterburyn tarinoita (revision 19505844)
+Cardiff (revision 18124102)
+Caymansaaret (revision 19078996)
+Ceylonin dominio (revision 18848736)
+Channel 4 (revision 19210598)
+Charles Babbage (revision 19265262)
+Charles Chaplin (revision 19446083)
+Charles Darwin (revision 19338522)
== End of Parsed pages ==
-- Wikipedia parsing ended at: 2016-09-21 18:15:05.189221
+- Wikipedia parsing ended at: 2021-03-16 19:06:31.128554
-61 characters appeared 940364 times.
+65 characters appeared 1138649 times.
First 30 characters:
-[ 0] Char a: 12.508773198463574 %
-[ 1] Char i: 10.969475649854738 %
-[ 2] Char n: 8.815841525196626 %
-[ 3] Char t: 8.80169806585535 %
-[ 4] Char e: 7.8206949649284745 %
-[ 5] Char s: 7.595782058862313 %
-[ 6] Char l: 5.963541777439374 %
-[ 7] Char o: 5.439808414613916 %
-[ 8] Char u: 5.0102938861972595 %
-[ 9] Char k: 4.589712068943515 %
-[10] Char r: 3.1231523112326713 %
-[11] Char ä: 3.041800834570443 %
-[12] Char m: 3.0392486313810396 %
-[13] Char v: 2.156292669647073 %
-[14] Char h: 1.996141919512019 %
-[15] Char j: 1.9248929138078446 %
-[16] Char p: 1.6324529650220552 %
-[17] Char y: 1.6323466232224966 %
-[18] Char d: 1.1981530556252684 %
-[19] Char b: 0.6835650875618378 %
-[20] Char g: 0.5793501239945382 %
-[21] Char c: 0.5056552569005194 %
-[22] Char ö: 0.38931732818355447 %
-[23] Char f: 0.215023118707224 %
-[24] Char w: 0.2106631049253268 %
-[25] Char z: 0.06593191572625068 %
-[26] Char x: 0.024458613898447838 %
-[27] Char š: 0.010421496356729947 %
-[28] Char ž: 0.007869293167326695 %
-[29] Char q: 0.007762951367768225 %
+[ 0] Char a: 12.546096294819561 %
+[ 1] Char i: 10.975375203420896 %
+[ 2] Char n: 8.908627680698793 %
+[ 3] Char t: 8.82282424171101 %
+[ 4] Char e: 7.780448584243256 %
+[ 5] Char s: 7.584865924442036 %
+[ 6] Char l: 5.942217487566405 %
+[ 7] Char o: 5.487731513398773 %
+[ 8] Char u: 5.063017663915746 %
+[ 9] Char k: 4.558296718303885 %
+[10] Char r: 3.1709508373519846 %
+[11] Char m: 3.0275352632813095 %
+[12] Char ä: 2.9864339230087587 %
+[13] Char v: 2.178195387691905 %
+[14] Char j: 1.9307969356667418 %
+[15] Char h: 1.9113001460502754 %
+[16] Char p: 1.6500256005142937 %
+[17] Char y: 1.593203875821258 %
+[18] Char d: 1.2042341406350858 %
+[19] Char b: 0.6837049872260899 %
+[20] Char g: 0.5634747845912129 %
+[21] Char c: 0.4858389196319498 %
+[22] Char ö: 0.38018739752109737 %
+[23] Char f: 0.21982191175682764 %
+[24] Char w: 0.19382619226820558 %
+[25] Char z: 0.0598955428758116 %
+[26] Char x: 0.02564442598201904 %
+[27] Char ž: 0.009484924678281015 %
+[28] Char š: 0.009309277924979516 %
+[29] Char q: 0.007201516885361511 %
-The first 30 characters have an accumulated ratio of 0.9996012182516557.
+The first 30 characters have an accumulated ratio of 0.9996056730388382.
-919 sequences found.
+940 sequences found.
-First 512 (typical positive ratio): 0.9985378147555799
-Next 512 (512-1024): 1.0634179955846884e-06
-Rest: 3.881443777498106e-17
+First 512 (typical positive ratio): 0.9985812031154878
+Next 512 (512-1024): 9.484924678281016e-05
+Rest: 2.7321894746634712e-17
-- Processing end: 2016-09-21 18:15:05.307164
+- Processing end: 2021-03-16 19:06:31.204594
diff --git a/script/BuildLangModelLogs/LangGreekModel.log b/script/BuildLangModelLogs/LangGreekModel.log
index a61b2ec..ee210e2 100644
--- a/script/BuildLangModelLogs/LangGreekModel.log
+++ b/script/BuildLangModelLogs/LangGreekModel.log
@@ -1,272 +1,174 @@
= Logs of language model for Greek (el) =
- Generated by BuildLangModel.py
-- Started: 2016-05-25 15:16:42.898905
-- Maximum depth: 5
-- Max number of pages: 200
+- Started: 2021-03-16 18:54:42.415198
+- Maximum depth: 4
+- Max number of pages: 100
== Parsed pages ==
-Πύλη:Κύρια (revision 5511929)
-14 Σεπτεμβρίου (revision 5808678)
-16 Σεπτεμβρίου (revision 5810117)
-1771 (revision 4940722)
-1829 (revision 5863423)
-1921 (revision 5819621)
-1948 (revision 5785943)
-1965 (revision 5846907)
-1970 (revision 5816968)
-1973 (revision 5423504)
-25 Μαΐου (revision 5865973)
-Eurovision (revision 5865484)
-Scorpions (revision 5586116)
-Wiki (revision 5859059)
-Wikimedia (revision 5771416)
-Αγία Πετρούπολη (revision 5782933)
-Αγγλική γλώσσα (revision 5851128)
-Αλεξάντρ Πούσκιν (revision 5790131)
-Βέλος ΙΙ (Αντιτορπιλικό) (revision 5178914)
-Βραζιλία (revision 5857981)
-Γαλλική γλώσσα (revision 5851119)
-Γαλλική εισβολή στην Ρωσία (revision 5858523)
-Γενικές εκλογές στη Δομινικανή Δημοκρατία 2016 (revision 5848770)
-Γηραιότερο πρόσωπο στον κόσμο (revision 5852034)
-Διαγωνισμός Τραγουδιού Eurovision 2016 (revision 5863783)
-Δικτατορία των Συνταγματαρχών (revision 5864405)
-Δομινικανή Δημοκρατία (revision 5848627)
-Εγκυκλοπαίδεια (revision 5566281)
-Ελεύθερο περιεχόμενο (revision 5824058)
-Ελλάδα (revision 5863759)
-Ελληνική γλώσσα (revision 5790854)
-Ιππικό (revision 5376587)
-Ιταλία (revision 5781867)
-Κίεβο (revision 5794613)
-Κατάληψη του Παρισιού (1814) (revision 5729368)
-Κλάους Μάιν (revision 5668218)
-Μάχη της Λειψίας (revision 5729316)
-Μάχη της Σαλτάνοφκα (revision 5865460)
-Μάχη του Μποροντίνο (revision 5670322)
-Μαξ Βερστάπεν (revision 5864745)
-Μπλουζ (revision 5846428)
-Νίκος Καχτίτσης (revision 5723615)
-Νικολάι Νικολάεβιτς Ραέφσκι (revision 5865460)
-Ντίλμα Ρούσεφ (revision 5843412)
-Ομοσπονδιακό Σοβιέτ της Ρωσικής Αυτοκρατορίας (revision 5865460)
-Ουκρανία (revision 5847651)
-Πάτρα (revision 5800331)
-Ποδόσφαιρο (revision 5864952)
-Πριμέρα Ντιβιζιόν (revision 5846965)
-Ρωσική Αυτοκρατορία (revision 5858419)
-Ρωσική γλώσσα (revision 5818960)
-Ρώσοι (revision 5376764)
-Σουζάνα Μούσατ Τζόουνς (revision 5848866)
-Στοκχόλμη (revision 5670508)
-Στρατηγός (revision 5464718)
-Τζακ Στάινμπεργκερ (revision 5820361)
-Τζαμάλα (revision 5863755)
-ΦΚ Μπαρτσελόνα (revision 5862032)
-Φόρμουλα Ένα (revision 5809160)
-10 Σεπτεμβρίου (revision 5841838)
-11 Σεπτεμβρίου (revision 5796866)
-12 Σεπτεμβρίου (revision 5795991)
-1321 (revision 5811404)
-13 Σεπτεμβρίου (revision 5830505)
-1435 (revision 5600729)
-1498 (revision 5831868)
-1523 (revision 5863396)
-1527 (revision 5579042)
-1580 (revision 5742938)
-15 Σεπτεμβρίου (revision 5817369)
-1712 (revision 5699806)
-1741 (revision 5817896)
-1752 (revision 5666171)
-1760 (revision 5490201)
-1769 (revision 5336004)
-17 Σεπτεμβρίου (revision 5843911)
-1812 (revision 5703237)
-1814 (revision 5751122)
-1851 (revision 5854460)
-1878 (revision 5863501)
-1889 (revision 5795061)
-1890 (revision 5705460)
-1898 (revision 5863504)
-18 Σεπτεμβρίου (revision 5661544)
-1901 (revision 5865687)
-1902 (revision 5779111)
-1905 (revision 5862599)
-1910 (revision 5794858)
-1916 (revision 5800363)
-1917 (revision 5865701)
-1925 (revision 5854774)
-1927 (revision 5839595)
-1928 (revision 5814308)
-1933 (revision 5854834)
-1936 (revision 5854290)
-1937 (revision 5794891)
-1943 (revision 5807315)
-1944 (revision 5865804)
-1950 (revision 5807377)
-1956 (revision 5795994)
-1960 (revision 5795065)
-1963 (revision 5863751)
-1966 (revision 5707508)
-1969 (revision 5668647)
-1980 (revision 5832053)
-1981 (revision 5817635)
-1982 (revision 5788879)
-1983 (revision 5812702)
-1984 (revision 5749754)
-1989 (revision 5846909)
-1994 (revision 5863999)
-1999 (revision 5795003)
-19 Σεπτεμβρίου (revision 5850863)
-1 Σεπτεμβρίου (revision 5630491)
-2000 (revision 5779037)
-2001 (revision 5779042)
-2005 (revision 5779066)
-2006 (revision 5808681)
-2009 (revision 5827105)
-2011 (revision 5808660)
-2016 (revision 5801621)
-20 Σεπτεμβρίου (revision 5808561)
-21 Σεπτεμβρίου (revision 5751207)
-22 Σεπτεμβρίου (revision 5807133)
-23 Σεπτεμβρίου (revision 5800012)
-24 Σεπτεμβρίου (revision 5662618)
-258 (revision 4952368)
-25 Σεπτεμβρίου (revision 5817621)
-26 Σεπτεμβρίου (revision 5817637)
-27 Σεπτεμβρίου (revision 5817648)
-28 Σεπτεμβρίου (revision 5817677)
-29 Σεπτεμβρίου (revision 5703562)
-2 Σεπτεμβρίου (revision 5701639)
-30 Σεπτεμβρίου (revision 5838312)
-326 (revision 5818811)
-3 Σεπτεμβρίου (revision 5816313)
-407 (revision 4952524)
-4 Σεπτεμβρίου (revision 5816970)
-5 Σεπτεμβρίου (revision 5817185)
-628 (revision 5398024)
-680 (revision 5365010)
-685 (revision 5819296)
-6 Σεπτεμβρίου (revision 5765157)
-775 (revision 5373211)
-786 (revision 5398031)
-7 Σεπτεμβρίου (revision 5749649)
-81 (revision 5397958)
-891 (revision 4952139)
-8 Σεπτεμβρίου (revision 5788878)
-9 Σεπτεμβρίου (revision 5817240)
-CIA (revision 5857678)
-Miyavi (revision 4944860)
-Άρμεν Κούπτσιος (revision 5766774)
-Έιμι Γουάινχαουζ (revision 5809279)
-Έρβιν Θάλμπεργκ (revision 5716376)
-Ίων Δραγούμης (revision 5818568)
-Αγία Ελένη (revision 5821916)
-Αλεξάντερ φον Χούμπολτ (revision 5773636)
-Αλμπέρτο Κόρντα (revision 5800055)
-Απρίλιος (revision 5766829)
-Αυτοκρατορία των Σασσανιδών (revision 5859880)
-Αύγουστος (revision 5461793)
-Β΄ Παγκόσμιος Πόλεμος (revision 5848530)
-Βέρμαχτ (revision 5212228)
-Βασίλης Λάσκος (revision 5695445)
-Βενεζουέλα (revision 5847962)
-Βρετανική Αυτοκρατορία (revision 5606306)
-Βόρεια Ελλάδα (revision 5670938)
-Γαλλία (revision 5776756)
-Γεώργιος Καρατζαφέρης (revision 5803114)
-Γιάννης Λάτσης (revision 5692530)
-Γιάννος Κρανιδιώτης (revision 5574536)
-Γιώργος Παπασιδέρης (μουσικός) (revision 5722203)
-Γκέοργκ Φρήντριχ Χαίντελ (revision 5807098)
-Γκρέις Κέλι (revision 5807168)
-Γρηγοριανό Hμερολόγιο (revision 5793842)
-Γρηγοριανό ημερολόγιο (revision 5793842)
-Γρηγόρης Λαμπράκης (revision 5752808)
-Δάντης Αλιγκέρι (revision 5648882)
-Δήμος Βιάννου (revision 4816422)
-Δεκέμβριος (revision 5461807)
-Δομιτιανός (revision 5735554)
-Δράμα (πόλη) (revision 5857326)
-Ενιαία Δημοκρατική Αριστερά (revision 5742309)
-Ετόρε Σότσας (revision 5785872)
-Ζιλ Αντριαμαχαζό (revision 5819706)
-Η.Π.Α. (revision 5845171)
-Ηράκλειος (revision 5778827)
-Θεσσαλονίκη (revision 5844955)
-Θεόδωρος Ρούζβελτ (revision 5815087)
-Ιανουάριος (revision 5615044)
-Ιερουσαλήμ (revision 5824734)
-Ιησούς Χριστός (revision 5859687)
-Ιούλιος (revision 5712711)
-Ιούνιος (revision 5461799)
-Ιράκ (revision 5820378)
-Ιράν (revision 5861249)
-Ισιδώρα Ντάνκαν (revision 5044778)
-Ιωάννης ο Χρυσόστομος (revision 5824898)
+Πύλη:Κύρια (revision 7950664)
+16 Μαρτίου (revision 8737120)
+1797 (revision 8019834)
+1839 (revision 8019704)
+1900 (revision 7952521)
+1901 (revision 7905277)
+1935 (revision 8290828)
+Mars 2020 (revision 8718725)
+Perseverance (ρόβερ) (revision 8718754)
+The Economist (revision 8341010)
+Wiki (revision 8595867)
+Wikimedia (revision 8518678)
+Άρθουρ Έβανς (revision 8502931)
+Άρθρουρ Γουέλσλεϋ, Δούκας του Ουέλλινγκτον (revision 8423158)
+Αγγλική γλώσσα (revision 8702613)
+Αδόλφος Χίτλερ (revision 8722090)
+Αντισφαίριση (revision 8557812)
+Αρειανό ελικόπτερο Ingenuity (revision 8718783)
+Αυστραλιανό Όπεν (revision 8078988)
+Βέρμαχτ (revision 8711795)
+Βραβεία Νόμπελ Λογοτεχνίας (revision 8519145)
+Γαλλία (revision 8680274)
+Γενικός Διευθυντής του Παγκόσμιου Οργανισμού Εμπορίου (revision 8694448)
+Γερμανία (revision 8724575)
+Εγκυκλοπαίδεια (revision 8687200)
+Ελεύθερο περιεχόμενο (revision 8707719)
+Ελληνική Βικιπαίδεια (revision 8731090)
+Κνωσός (revision 8697910)
+Κρήτη (revision 8735869)
+Λονδίνο (revision 8666776)
+Ναόμι Οσάκα (revision 8736512)
+Νγκόζι Οκόντζο-Ιουεάλα (revision 8716446)
+Νόβακ Τζόκοβιτς (revision 8735633)
+Ουίλιαμ Μπάντινγκ (revision 8298356)
+Παγκόσμιος Οργανισμός Εμπορίου (revision 8694448)
+Πατριάρχης Σερβίας Πορφύριος (revision 8716966)
+Σερβική Ορθόδοξη Εκκλησία (revision 8703081)
+Συλί Προυντόμ (revision 8736464)
+Συνθήκη των Βερσαλλιών (revision 7991516)
+10 Μαρτίου (revision 8726574)
+1185 (revision 8532989)
+1190 (revision 8729267)
+11 Μαρτίου (revision 8730381)
+1244 (revision 7906151)
+12 Μαρτίου (revision 8730152)
+13 Μαρτίου (revision 8544014)
+1405 (revision 7906083)
+1410 (revision 7906088)
+1465 (revision 7905889)
+1473 (revision 8687951)
+1478 (revision 7905905)
+14 Μαρτίου (revision 8096796)
+15 Μαρτίου (revision 8734431)
+1670 (revision 8120689)
+1751 (revision 8019900)
+1782 (revision 8019823)
+1789 (revision 8019786)
+1792 (revision 8019828)
+1794 (revision 8019829)
+17 Μαρτίου (revision 8233521)
+1802 (revision 8019791)
+1812 (revision 8019794)
+1815 (revision 8728979)
+1859 (revision 8019719)
+1872 (revision 8019620)
+1888 (revision 8678352)
+1892 (revision 8019578)
+1894 (revision 8019646)
+1898 (revision 7905275)
+18 Μαρτίου (revision 8666328)
+1906 (revision 8019564)
+1908 (revision 8110859)
+1911 (revision 8234911)
+1912 (revision 7905254)
+1919 (revision 8188234)
+1920 (revision 8689556)
+1921 (revision 8019599)
+1923 (revision 8640393)
+1924 (revision 8019604)
+1925 (revision 8424340)
+1926 (revision 8019613)
+1927 (revision 7905236)
+1930 (revision 8019616)
+1937 (revision 7905218)
+1939 (revision 8731642)
+1940 (revision 8503734)
+1944 (revision 8556801)
+1945 (revision 8699418)
+1948 (revision 8707830)
+1953 (revision 8660010)
+1955 (revision 8733996)
+1956 (revision 8637553)
+1957 (revision 8582051)
+1959 (revision 8621124)
+1964 (revision 8701289)
+1966 (revision 8596642)
+1967 (revision 8657263)
+1968 (revision 8640882)
+1969 (revision 8709383)
+1970 (revision 8645926)
== End of Parsed pages ==
-- Wikipedia parsing ended at: 2016-05-25 15:21:50.071087
+- Wikipedia parsing ended at: 2021-03-16 18:58:31.004638
-63 characters appeared 1875535 times.
+62 characters appeared 801479 times.
-First 46 characters:
-[ 0] Char α: 9.004097497514042 %
-[ 1] Char ο: 8.311015256980008 %
-[ 2] Char τ: 7.94493304577094 %
-[ 3] Char ι: 6.338831320129989 %
-[ 4] Char ν: 5.836627948825269 %
-[ 5] Char ε: 5.635565318695733 %
-[ 6] Char ρ: 4.717907157157825 %
-[ 7] Char σ: 4.307197679595422 %
-[ 8] Char κ: 4.061294510632965 %
-[ 9] Char ς: 3.766551943845356 %
-[10] Char η: 3.7565281373048225 %
-[11] Char π: 3.4156653968067783 %
-[12] Char υ: 3.30956233821283 %
-[13] Char μ: 3.1442761665338157 %
-[14] Char λ: 3.0899983204792236 %
-[15] Char ί: 2.429973314280992 %
-[16] Char ό: 2.076100952528212 %
-[17] Char ά: 1.922651403466211 %
-[18] Char γ: 1.8994047031913561 %
-[19] Char έ: 1.6641651582081913 %
-[20] Char δ: 1.508582884350332 %
-[21] Char ω: 1.2410325587099147 %
-[22] Char ή: 1.2077087337746297 %
-[23] Char χ: 1.0482342371643292 %
-[24] Char ύ: 0.9225101104484854 %
-[25] Char β: 0.8928652357860558 %
-[26] Char θ: 0.8681256281541001 %
-[27] Char φ: 0.806756472153279 %
-[28] Char ώ: 0.6969211451665791 %
-[29] Char ζ: 0.35515199663029484 %
-[30] Char e: 0.35488540603081253 %
-[31] Char ξ: 0.314736861748781 %
-[32] Char a: 0.2909036621550651 %
-[33] Char i: 0.2884510286398281 %
-[34] Char o: 0.24137112877125727 %
-[35] Char r: 0.23262695710823847 %
-[36] Char n: 0.2206303801315358 %
-[37] Char t: 0.21668483925919804 %
-[38] Char s: 0.2013825388489151 %
-[39] Char l: 0.14865091827131993 %
-[40] Char d: 0.1359078876160669 %
-[41] Char c: 0.12124540464454144 %
-[42] Char h: 0.1166600463334462 %
-[43] Char u: 0.10381037943840024 %
-[44] Char m: 0.09074744006376848 %
-[45] Char ψ: 0.08669526295163779 %
+First 47 characters:
+[ 0] Char α: 8.791371951105393 %
+[ 1] Char ο: 8.656870610458913 %
+[ 2] Char τ: 7.436002690026814 %
+[ 3] Char ι: 6.335661944979219 %
+[ 4] Char ν: 5.906455440504367 %
+[ 5] Char ε: 5.323907426145913 %
+[ 6] Char ρ: 5.098698780629311 %
+[ 7] Char ς: 4.129740142910793 %
+[ 8] Char κ: 4.033542987402041 %
+[ 9] Char σ: 3.9103956560309125 %
+[10] Char υ: 3.7128858023728633 %
+[11] Char η: 3.4742020689250745 %
+[12] Char λ: 3.4385180397739674 %
+[13] Char π: 3.329220104332116 %
+[14] Char μ: 3.3050148537890576 %
+[15] Char ί: 2.7370648513560556 %
+[16] Char ό: 2.185958708837038 %
+[17] Char γ: 2.095251403966916 %
+[18] Char ά: 1.8429678132552443 %
+[19] Char έ: 1.6417148796163092 %
+[20] Char δ: 1.4553094965682194 %
+[21] Char β: 1.2000314418718394 %
+[22] Char ω: 1.121801070271336 %
+[23] Char ή: 1.0494348573075527 %
+[24] Char χ: 0.9217958299593626 %
+[25] Char ύ: 0.8777522555176118 %
+[26] Char φ: 0.8600350102747546 %
+[27] Char θ: 0.7800578680165045 %
+[28] Char ώ: 0.617732966178777 %
+[29] Char ζ: 0.4195992658572464 %
+[30] Char e: 0.30456194111137036 %
+[31] Char ξ: 0.28696946520120925 %
+[32] Char i: 0.25203405204627943 %
+[33] Char a: 0.23631311612656103 %
+[34] Char n: 0.21647479222786872 %
+[35] Char r: 0.1978841616561382 %
+[36] Char o: 0.18915030836740576 %
+[37] Char s: 0.17779629909205355 %
+[38] Char t: 0.16269920983581604 %
+[39] Char l: 0.14585534992183202 %
+[40] Char d: 0.11665932607092637 %
+[41] Char c: 0.10468147013209328 %
+[42] Char h: 0.09257884486056403 %
+[43] Char u: 0.08409453023722394 %
+[44] Char m: 0.08247252891217362 %
+[45] Char ΐ: 0.07161759696760614 %
+[46] Char ψ: 0.06774974765402461 %
-The first 46 characters have an accumulated ratio of 0.993456267145108.
+The first 47 characters have an accumulated ratio of 0.9947858895866266.
-1579 sequences found.
+1390 sequences found.
-First 512 (typical positive ratio): 0.958419074626211
-Next 512 (512-1024): 0.006969211451665791
-Rest: 0.0018920066107342773
+First 512 (typical positive ratio): 0.9624941725288916
+Next 512 (512-1024): 0.00617732966178777
+Rest: 0.0016086054433421051
-- Processing end: 2016-05-25 15:21:50.812982
+- Processing end: 2021-03-16 18:58:31.125842
diff --git a/script/BuildLangModelLogs/LangHungarianModel.log b/script/BuildLangModelLogs/LangHungarianModel.log
index f04ad98..1e4ed44 100644
--- a/script/BuildLangModelLogs/LangHungarianModel.log
+++ b/script/BuildLangModelLogs/LangHungarianModel.log
@@ -1,109 +1,157 @@
= Logs of language model for Hungarian (hu) =
- Generated by BuildLangModel.py
-- Started: 2015-12-12 18:01:21.560682
-- Maximum depth: 2
-- Max number of pages: 50
+- Started: 2021-03-16 19:18:56.191449
+- Maximum depth: 4
+- Max number of pages: 100
== Parsed pages ==
-Kezdőlap (revision 12748721)
-1722 (revision 16471860)
-1780 (revision 16407861)
-1800 (revision 15028835)
-1831 (revision 16469576)
-1848–49-es forradalom és szabadságharc (revision 16955214)
-1875 (revision 16798555)
-1895 (revision 16649417)
-1900 (revision 16961019)
-1905 (revision 16601113)
-1915 (revision 16792868)
-1940 (revision 16936087)
-1950 (revision 16820817)
-1970 (revision 16093156)
-1985 (revision 16463340)
-1995 (revision 16945805)
-1998 (revision 16542908)
-2003 (revision 16943939)
-2015 (revision 16960983)
-73. Golden Globe-gála (revision 16937296)
-Akacuki (revision 16960353)
-Akasztottak erdeje (regény) (revision 16918702)
-Alan Hodgkinson (revision 16953214)
-Alfred Bernhard Nobel (revision 16654409)
-Alkotmány (revision 16784843)
-André-Marie Ampère (revision 16865419)
-Angela Merkel (revision 16960753)
-Anne Baxter (revision 15572176)
-Az irgalmasság rendkívüli szentéve (revision 16951018)
-Az év embereinek listája (revision 16961722)
-Bencések (revision 16853524)
-Boeing 747–400 (revision 16947261)
-Chantal Szent Johanna Franciska (revision 16371923)
-December 12. (revision 15637986)
-December 13. (revision 16546152)
-Dinamó (revision 15949492)
-Dionne Warwick (revision 16522754)
-Elektrodinamika (revision 14888277)
-Elektromosság (revision 16051899)
-Enciklopédia (revision 16556513)
-Eric Maskin (revision 16907781)
-Európai migrációs válság (revision 16922218)
-Eötvös Loránd (revision 16960057)
-Eötvös Loránd Tudományegyetem (revision 16684410)
-Fellner Jakab (revision 16960223)
-Feltaláló (revision 13609621)
-Ferenc pápa (revision 16928970)
-Frank Sinatra (revision 16927399)
-François Jean Dominique Arago (revision 16197941)
-Gabriella (revision 16906500)
+Kezdőlap (revision 21016160)
+1621 (revision 19051984)
+1771 (revision 21393041)
+1821 (revision 23636828)
+1831 (revision 22164941)
+1848–49-es forradalom és szabadságharc (revision 23685544)
+1858 (revision 22166952)
+1871 (revision 23533908)
+1921 (revision 23662365)
+1924 (revision 23246889)
+1941 (revision 23564803)
+1946 (revision 23682260)
+1971 (revision 23593882)
+2003 (revision 23647007)
+2021 (revision 23686129)
+A Nyugat lánya (revision 21595643)
+A magyar zászló és címer napja (revision 23134982)
+A párizsi Notre-Dame (revision 23521460)
+Abja-Paluoja (revision 23589245)
+Antoine-Jean Gros (revision 23684575)
+Arina Szjarhejevna Szabalenka (revision 23614779)
+Aun Szan Szu Kji (revision 23588064)
+Barbora Krejčíková (revision 23683559)
+Bebe Daniels (revision 23684693)
+Berlini Nemzetközi Filmfesztivál (revision 23601008)
+Bohémélet (revision 23533579)
+Borel–Lebesgue-tétel (revision 20175177)
+Brüsszel (revision 23681873)
+Claude Debussy (revision 23666304)
+Covid19-koronavírus-járvány Magyarországon (revision 23684719)
+Covid19-pandémia (revision 23672833)
+Császár Angela (revision 23405485)
+December 22. (revision 23636644)
+EastEnders (revision 23674461)
+Eigel Ernő (revision 23678820)
+Elise Mertens (revision 23668277)
+Első világháború (revision 23681284)
+Enciklopédia (revision 23257786)
+Fahd szaúdi király (revision 23684688)
+Filip Polášek (revision 20343200)
+Finnugor Kulturális Főváros (revision 23593480)
+Georg Neumark (revision 23419386)
+Giacomo Puccini (revision 23685245)
+Gianni Schicchi (revision 21500522)
+Gonda János (revision 23672147)
+Halálozások 2021-ben (revision 23686337)
+Heine-tétel (revision 15274788)
+Heller Bernát (revision 21796754)
+Henrietta (keresztnév) (revision 23599183)
+Hmelnickiji terület (revision 21540657)
+Ivan Dodig (revision 19700630)
+Jankovics József (revision 23686084)
+Jean Frydman (revision 23684355)
+Jászai Mari-díj (revision 23683756)
+Katona Gyula (matematikus) (revision 23651033)
+Kew Gardens (revision 23635430)
+Klasszikus gitár (revision 23640016)
+Kombinatorika (revision 23457078)
+Kurucz György (motorversenyző) (revision 23682502)
+Landerer Lajos (revision 20960777)
+Lucca (revision 22052809)
+Lítium (revision 23671148)
+Magyar Tudományos Akadémia (revision 23644040)
+Magyar Wikipédia (revision 23672081)
+Magyar nyelv (revision 21426463)
+Magyarország (revision 23674944)
+Magyarország címere (revision 23623029)
+Magyarország nemzetiségei (revision 23600289)
+Magyarország zászlaja (revision 23056847)
+Mars (bolygó) (revision 23667637)
+Mercury–Atlas–6 (revision 23639047)
+Mianmar (revision 23673840)
+Michael Jordan (revision 23621635)
+Michal Polák (revision 23684810)
+Mustárgáz (revision 23682684)
+Március 16. (revision 23685754)
+NASA (revision 23080317)
+Nagy Dénes (filmrendező) (revision 23656475)
+Newbery Medal (revision 23594588)
+Nobel-békedíj (revision 23517207)
+Novak Đoković (revision 23685551)
+November 29. (revision 23652299)
+Nyílt tartalom (revision 22335123)
+Olaszország (revision 23657820)
+Országos Rabbiképző – Zsidó Egyetem (revision 23624676)
+Perseverance (revision 23666738)
+Pillangókisasszony (revision 23430673)
+Pánczél Lajos (revision 23532037)
+Rajeev Ram (revision 23615665)
+Richard Strauss (revision 23674657)
+Révész László László (revision 23685649)
+Spinosaurus (revision 23680682)
+Szomszédok (revision 23682854)
+Széchenyi-díj (revision 23683451)
+Természetes fény (film) (revision 22147530)
+Tiltott Város (revision 23663664)
+Tosca (revision 23580069)
+Ukrajna (revision 23683387)
== End of Parsed pages ==
-- Wikipedia parsing ended at: 2015-12-12 18:02:46.729734
+- Wikipedia parsing ended at: 2021-03-16 19:23:30.841696
-55 characters appeared 375370 times.
+56 characters appeared 1168905 times.
First 32 characters:
-[ 0] Char e: 9.710685457015744 %
-[ 1] Char a: 8.803314063457389 %
-[ 2] Char t: 7.322375256413672 %
-[ 3] Char s: 6.666222660308496 %
-[ 4] Char l: 5.73967019207715 %
-[ 5] Char r: 5.4341050163838345 %
-[ 6] Char n: 5.39920611663159 %
-[ 7] Char i: 4.773689959240216 %
-[ 8] Char o: 4.347976663025815 %
-[ 9] Char k: 4.289634227562138 %
-[10] Char z: 4.244611982843594 %
-[11] Char á: 3.7855982097663636 %
-[12] Char m: 3.2144284306151265 %
-[13] Char g: 3.0727016010869277 %
-[14] Char é: 3.0295441830727015 %
-[15] Char b: 2.287609558568879 %
-[16] Char d: 1.9966965926952074 %
-[17] Char v: 1.8832085675466872 %
-[18] Char y: 1.8453792258305137 %
-[19] Char u: 1.5155713029810587 %
-[20] Char h: 1.2960545595012922 %
-[21] Char p: 1.288861656498921 %
-[22] Char j: 1.2363801049631031 %
-[23] Char c: 1.0951860830647095 %
-[24] Char f: 1.0256546873751233 %
-[25] Char ö: 1.020859418706876 %
-[26] Char ó: 0.9955510562911262 %
-[27] Char ő: 0.8399712283879905 %
-[28] Char í: 0.6340410794682579 %
-[29] Char ü: 0.4211844313610571 %
-[30] Char ú: 0.3295415190345526 %
-[31] Char ű: 0.2056637451048299 %
+[ 0] Char e: 9.498462236024313 %
+[ 1] Char a: 9.06651952040585 %
+[ 2] Char t: 7.768381519456244 %
+[ 3] Char s: 6.3276314157266835 %
+[ 4] Char l: 5.860613137936787 %
+[ 5] Char n: 5.5261120450336 %
+[ 6] Char r: 5.029493414777077 %
+[ 7] Char i: 4.7589838352988485 %
+[ 8] Char k: 4.502162280082642 %
+[ 9] Char o: 4.291794457205675 %
+[10] Char z: 4.136777582438265 %
+[11] Char á: 3.7318687147372973 %
+[12] Char é: 3.275287555447192 %
+[13] Char m: 3.2307159264439798 %
+[14] Char g: 3.089215975635317 %
+[15] Char b: 2.120103857884088 %
+[16] Char d: 2.0372913110988486 %
+[17] Char y: 2.0071776577223983 %
+[18] Char v: 1.8980156642327648 %
+[19] Char u: 1.421757970066002 %
+[20] Char h: 1.3363789187316335 %
+[21] Char p: 1.229868979942767 %
+[22] Char j: 1.147227533460803 %
+[23] Char c: 1.0305371266270569 %
+[24] Char ö: 1.0298527254139558 %
+[25] Char f: 0.9665456132020994 %
+[26] Char ó: 0.9550818928826551 %
+[27] Char ő: 0.8821931636873827 %
+[28] Char í: 0.6613026721589864 %
+[29] Char ü: 0.46162861823672585 %
+[30] Char ú: 0.293950321026944 %
+[31] Char ű: 0.23611841851989682 %
-The first 32 characters have an accumulated ratio of 0.9975117883688093.
+The first 32 characters have an accumulated ratio of 0.998090520615448.
-1084 sequences found.
+1122 sequences found.
-First 512 (typical positive ratio): 0.9748272224933486
-Next 512 (512-1024): 5.328076298052588e-06
-Rest: 0.0001889139024889644
+First 512 (typical positive ratio): 0.9736098834669349
+Next 512 (512-1024): 0.0023611841851989683
+Rest: 0.00010464608288375879
-- Processing end: 2015-12-12 18:02:46.902033
+- Processing end: 2021-03-16 19:23:30.943714
diff --git a/script/BuildLangModelLogs/LangIrishModel.log b/script/BuildLangModelLogs/LangIrishModel.log
index 7bee9d8..37e867d 100644
--- a/script/BuildLangModelLogs/LangIrishModel.log
+++ b/script/BuildLangModelLogs/LangIrishModel.log
@@ -1,156 +1,158 @@
= Logs of language model for Irish (ga) =
- Generated by BuildLangModel.py
-- Started: 2016-09-27 00:31:16.489602
-- Maximum depth: 5
+- Started: 2021-03-16 19:06:31.364099
+- Maximum depth: 4
- Max number of pages: 100
== Parsed pages ==
-Tracy Caldwell Dyson (revision 812158)
-14 Lúnasa (revision 716575)
-1969 (revision 810361)
-California (revision 790976)
-Ceimic (revision 759983)
-Ceimic fhisiciúil (revision 656896)
-NASA (revision 806394)
-Rúisis (revision 771746)
-SAM (revision 807668)
-Spáinnis (revision 812323)
-Stáisiún Idirnáisiúnta Spáis (revision 806394)
-Tointeálaí spáis (revision 761309)
+Tracy Caldwell Dyson (revision 972597)
+14 Lúnasa (revision 945830)
+1969 (revision 950246)
+Arcadia (revision 940778)
+California (revision 977165)
+Ceimic (revision 996644)
+Ceimic fhisiciúil (revision 927461)
+Ceimiceoir (revision 927503)
+Fisiceoir (revision 880864)
+IMDb (revision 941231)
+Max Q (revision 910451)
+Medal "For Merit in Space Exploration" (revision 972605)
+NASA (revision 982342)
+Ollscoil California, Davis (revision 972597)
+Rúisis (revision 990076)
+SAM (revision 976971)
+Spáinnis (revision 976986)
+Spásaire (revision 948727)
+Stáisiún Idirnáisiúnta Spáis (revision 810459)
+Stáit Aontaithe Mheiriceá (revision 976971)
+Tointeálaí spáis (revision 884452)
10 Lúnasa (revision 649045)
-11 Lúnasa (revision 776455)
-12 Lúnasa (revision 716531)
-13 Lúnasa (revision 716546)
+11 Lúnasa (revision 855483)
+12 Lúnasa (revision 970783)
+13 Lúnasa (revision 843084)
1598 (revision 703178)
15 Lúnasa (revision 776986)
-16 Lúnasa (revision 648836)
-1740 (revision 791225)
+16 Lúnasa (revision 956751)
+1740 (revision 868712)
1771 (revision 776762)
17 Lúnasa (revision 777131)
-1823 (revision 791774)
-1832 (revision 794492)
-1898 (revision 805176)
+1823 (revision 884394)
+1832 (revision 870502)
+1898 (revision 881354)
18 Lúnasa (revision 777242)
-1911 (revision 801932)
-1956 (revision 797081)
-1962 (revision 801511)
-1966 (revision 807415)
+1911 (revision 884923)
+1956 (revision 922906)
+1962 (revision 948322)
+1966 (revision 983105)
+1983 (revision 950195)
19 Lúnasa (revision 648524)
-1 Lúnasa (revision 647726)
-2001 (revision 801012)
-2004 (revision 795759)
-2016 (revision 812091)
-20 Lúnasa (revision 777924)
-21 Lúnasa (revision 647805)
-22 Lúnasa (revision 778960)
+1 Lúnasa (revision 970005)
+2001 (revision 953347)
+2004 (revision 915512)
+20 Lúnasa (revision 863369)
+21 Lúnasa (revision 987631)
+22 Lúnasa (revision 949242)
23 Lúnasa (revision 778453)
-24 Lúnasa (revision 778495)
-25 Lúnasa (revision 778551)
+24 Lúnasa (revision 855482)
+25 Lúnasa (revision 922966)
26 Lúnasa (revision 649051)
-27 Lúnasa (revision 778763)
-28 Lúnasa (revision 778813)
-29 Lúnasa (revision 778959)
-2 Lúnasa (revision 774393)
+27 Lúnasa (revision 855881)
+28 Lúnasa (revision 855201)
+29 Lúnasa (revision 937884)
+2 Lúnasa (revision 949578)
30 Lúnasa (revision 648308)
-31 Lúnasa (revision 649053)
-3 Lúnasa (revision 647811)
-4 Lúnasa (revision 786284)
-5 Lúnasa (revision 776845)
-6 Lúnasa (revision 647834)
-7 Lúnasa (revision 775859)
+31 Lúnasa (revision 874664)
+3 Lúnasa (revision 954861)
+4 Lúnasa (revision 936315)
+5 Lúnasa (revision 946408)
+6 Lúnasa (revision 936316)
+7 Lúnasa (revision 936317)
8 Lúnasa (revision 648745)
-9 Lúnasa (revision 648522)
-AK Parti (revision 792248)
-An Phacastáin (revision 759339)
-An Tuirc (revision 811970)
-Aoine (revision 717430)
-Bertolt Brecht (revision 800584)
-Czesław Miłosz (revision 780306)
-Céadaoin (revision 717606)
-Dan Boyle (revision 797926)
+9 Lúnasa (revision 868992)
+AK Parti (revision 980611)
+An Phacastáin (revision 975474)
+An Tuirc (revision 975987)
+Aoidh Uí Néill (revision 945830)
+Aoine (revision 871416)
+Bertolt Brecht (revision 996168)
+Czesław Miłosz (revision 968559)
+Céadaoin (revision 841385)
+Dan Boyle (revision 981683)
Domhnach (revision 717663)
-Déardaoin (revision 647860)
+Déardaoin (revision 841384)
Féilire (revision 648837)
-Halle Berry (revision 759955)
-Henry Bagenal (revision 716575)
-Iúil (revision 647071)
+Halle Berry (revision 916135)
+Henry Bagenal (revision 936900)
+Iúil (revision 931127)
Luan (revision 717791)
-Lúnasa (revision 810265)
-Meán Fómhair (revision 779166)
-Pápa Pius VII (revision 758126)
-Satharn (revision 784525)
-Walter Scott (revision 759029)
-Áth Buí (revision 716575)
-11 Márta (revision 716519)
-17 Márta (revision 798614)
-1882 (revision 801198)
-1886 (revision 776624)
-1890 (revision 801200)
-1891 (revision 796677)
-1903 (revision 812849)
-1922 (revision 801227)
-1930í (revision 740221)
-1940í (revision 740219)
-1950í (revision 740217)
-1960í (revision 772724)
-1967 (revision 796983)
-1968 (revision 810926)
-1970 (revision 812852)
-1970í (revision 740213)
-1971 (revision 809746)
-1972 (revision 789490)
-1980í (revision 740211)
-1990í (revision 740208)
-19ú haois (revision 739964)
-1 Bealtaine (revision 647679)
+Lúnasa (revision 970011)
+Meán Fómhair (revision 931128)
+Mila Kunis (revision 916248)
+Pápa Pius VII (revision 972523)
+Satharn (revision 717929)
+Walter Scott (revision 973708)
+Áth Buí (revision 923034)
+10 Bealtaine (revision 974318)
+11 Feabhra (revision 885848)
+11 Meitheamh (revision 937886)
+11 Márta (revision 956107)
+11 Nollaig (revision 949777)
+13 Eanáir (revision 952269)
+14 Eanáir (revision 952327)
+15 Meitheamh (revision 770401)
+16 Nollaig (revision 922996)
+17 Meán Fómhair (revision 974321)
+17 Márta (revision 959908)
+1882 (revision 894229)
+1886 (revision 876620)
== End of Parsed pages ==
-- Wikipedia parsing ended at: 2016-09-27 00:33:40.157338
+- Wikipedia parsing ended at: 2021-03-16 19:09:36.532359
-44 characters appeared 183561 times.
+42 characters appeared 213560 times.
First 31 characters:
-[ 0] Char a: 15.192769705983297 %
-[ 1] Char i: 10.534372769814938 %
-[ 2] Char n: 8.106297089250985 %
-[ 3] Char h: 7.243368689427493 %
-[ 4] Char r: 6.442544985045844 %
-[ 5] Char e: 6.198484427520007 %
-[ 6] Char s: 5.622654049607488 %
-[ 7] Char t: 4.776068990689743 %
-[ 8] Char c: 4.543448771797931 %
-[ 9] Char l: 4.1953356105054995 %
-[10] Char o: 3.9469168287381304 %
-[11] Char d: 3.2169142682813887 %
-[12] Char g: 2.811054635788648 %
-[13] Char m: 2.6269196615838877 %
-[14] Char á: 2.2749930540801153 %
-[15] Char u: 2.1932763495513754 %
-[16] Char b: 2.0478206154902185 %
-[17] Char í: 1.6599386579938005 %
-[18] Char é: 1.2829522611012143 %
-[19] Char f: 1.1494816437042727 %
-[20] Char ú: 1.0525111543301682 %
-[21] Char p: 0.9059658642086281 %
-[22] Char ó: 0.8890777452726886 %
-[23] Char v: 0.2522322279787101 %
-[24] Char y: 0.23479933101257894 %
-[25] Char k: 0.18195586208399386 %
-[26] Char w: 0.1688811893593955 %
-[27] Char j: 0.09697048937410452 %
-[28] Char z: 0.07735848028720697 %
-[29] Char x: 0.0343210159020707 %
-[30] Char q: 0.010895560603831969 %
+[ 0] Char a: 15.363832178310547 %
+[ 1] Char i: 10.505712680277206 %
+[ 2] Char n: 8.10825997377786 %
+[ 3] Char h: 7.447087469563589 %
+[ 4] Char r: 6.299868889305113 %
+[ 5] Char e: 6.046076044203034 %
+[ 6] Char s: 5.528657051882375 %
+[ 7] Char t: 4.9690953362052825 %
+[ 8] Char c: 4.70593744146844 %
+[ 9] Char l: 4.132328151339202 %
+[10] Char o: 3.9469001685708935 %
+[11] Char d: 3.2154897920958985 %
+[12] Char g: 2.7795467315976774 %
+[13] Char m: 2.6760629331335455 %
+[14] Char á: 2.228413560591871 %
+[15] Char u: 2.17550103015546 %
+[16] Char b: 2.0130174189923205 %
+[17] Char í: 1.7522007866641691 %
+[18] Char é: 1.2207342198913653 %
+[19] Char f: 1.1186551788724481 %
+[20] Char ú: 1.0039333208466004 %
+[21] Char ó: 0.8967035025285635 %
+[22] Char p: 0.8475369919460574 %
+[23] Char y: 0.2289754635699569 %
+[24] Char v: 0.22101517138040833 %
+[25] Char k: 0.17606293313354562 %
+[26] Char w: 0.16295186364487732 %
+[27] Char j: 0.09271399138415433 %
+[28] Char z: 0.06836486233377037 %
+[29] Char x: 0.03511893613036149 %
+[30] Char q: 0.01311106948866829 %
-The first 31 characters have an accumulated ratio of 0.9997058198636966.
+The first 31 characters have an accumulated ratio of 0.9997986514328528.
-701 sequences found.
+707 sequences found.
-First 512 (typical positive ratio): 0.9974076651249096
-Next 512 (512-1024): 5.447780301915984e-06
-Rest: -2.7755575615628914e-17
+First 512 (typical positive ratio): 0.9976732191628278
+Next 512 (512-1024): 0.010039333208466004
+Rest: -3.5561831257524545e-17
-- Processing end: 2016-09-27 00:33:40.258886
+- Processing end: 2021-03-16 19:09:36.580170
diff --git a/script/BuildLangModelLogs/LangLatvianModel.log b/script/BuildLangModelLogs/LangLatvianModel.log
index 4dd7a21..3fafa6c 100644
--- a/script/BuildLangModelLogs/LangLatvianModel.log
+++ b/script/BuildLangModelLogs/LangLatvianModel.log
@@ -1,162 +1,165 @@
= Logs of language model for Latvian (lv) =
- Generated by BuildLangModel.py
-- Started: 2016-09-21 00:16:33.485953
-- Maximum depth: 5
+- Started: 2021-03-16 19:26:37.227238
+- Maximum depth: 4
- Max number of pages: 100
== Parsed pages ==
-Zigfrīds Anna Meierovics (revision 2546984)
-1. Saeima (revision 2511127)
-1. Saeimas deputāti (revision 2303859)
-1. Saeimas frakcijas (revision 2429725)
-1. Saeimas vēlēšanas (revision 2464758)
-1887. gads (revision 2583253)
-1919. gada Parīzes miera konference (revision 2482078)
-1920 (revision 2401222)
-1921 (revision 2473337)
-1922 (revision 2486819)
-1923 (revision 2544643)
-1924 (revision 2539361)
-1925 (revision 2486795)
-22. augusts (revision 2583254)
-31. jūlijs (revision 2559648)
-5. februāris (revision 2581966)
-ASV (revision 2549746)
-Agrārā reforma Latvijā (revision 2473423)
-Agudas Izrael (Latvija) (revision 2311143)
-Aigars Kalvītis (revision 2545858)
-Alberts Kviesis (revision 2546934)
-Aleksandrs Bočagovs (revision 2329526)
-Aleksandrs Dauge (revision 2546805)
-Aleksandrs Jaunbērzs (revision 2462254)
-Aleksandrs Kerenskis (revision 2461214)
-Aleksandrs Millerāns (revision 2309419)
-Aleksandrs Neibergs (revision 2491897)
-Alfrēds Birznieks (revision 2567317)
-Alfrēds Jēkabs Bērziņš (revision 2564068)
-Alfrēds Riekstiņš (politiķis) (revision 2586148)
-Andrejs Bērziņš (revision 2564283)
-Andrejs Kurcijs (revision 2564338)
+Zigfrīds Anna Meierovics (revision 3325285)
+1. Saeima (revision 3366185)
+1. Saeimas deputāti (revision 3368139)
+1. Saeimas frakcijas (revision 3366184)
+1. Saeimas vēlēšanas (revision 3330484)
+1887. gads (revision 2773799)
+1919. gada Parīzes miera konference (revision 3359347)
+1920 (revision 3362733)
+1921 (revision 3340387)
+1922 (revision 3337740)
+1923 (revision 3347028)
+1924 (revision 3347028)
+1925 (revision 3347028)
+22. augusts (revision 3327223)
+31. jūlijs (revision 3347080)
+5. februāris (revision 3364814)
+Agrārā reforma Latvijā (revision 3328548)
+Agudas Izrael (Latvija) (revision 3285729)
+Aigars Kalvītis (revision 3169702)
+Alberts Kviesis (revision 3379738)
+Aleksandrs Bočagovs (revision 3004343)
+Aleksandrs Dauge (revision 3062538)
+Aleksandrs Jaunbērzs (revision 3373734)
+Aleksandrs Kerenskis (revision 2758772)
+Aleksandrs Millerāns (revision 3108576)
+Aleksandrs Neibergs (deputāts) (revision 3349399)
+Alfrēds Birznieks (revision 3300916)
+Alfrēds Jēkabs Bērziņš (revision 3351998)
+Alfrēds Riekstiņš (politiķis) (revision 3034089)
+Amerikas Savienotās Valstis (revision 3355214)
+Andrejs Bērziņš (politiķis) (revision 3089135)
+Andrejs Kurcijs (revision 3223696)
Andrejs Petrevics (revision 2460269)
-Andrejs Sīmanis (revision 2547079)
-Andrejs Veckalns (revision 2564224)
-Andrievs Niedra (revision 2546988)
-Andris Bērziņš (politiķis, 1951) (revision 2218488)
-Andris Šķēle (revision 2457423)
-Angļu valoda (revision 2447598)
-Ansis Buševics (revision 2578312)
-Ansis Rudevics (revision 2414854)
-Antante (revision 2581862)
+Andrejs Sīmanis (revision 3210302)
+Andrejs Veckalns (revision 3237365)
+Andrievs Niedra (revision 3374557)
+Andris Bērziņš (politiķis, 1951) (revision 3231604)
+Andris Šķēle (revision 3379347)
+Angļu valoda (revision 3303218)
+Ansis Buševics (revision 2927384)
+Ansis Rudevics (revision 2700953)
+Antante (revision 3373256)
Antons Dzenis (revision 2564295)
-Antons Laizāns (revision 2467408)
-Antons Rubins (1885) (revision 2465396)
-Antons Velkme (revision 2564425)
-Ants Pīps (revision 2564383)
-Apollo (portāls) (revision 2371202)
-Apolonija Laurinoviča (revision 2466232)
-Aprīļa pučs (revision 2150686)
-Apvienotā Karaliste (revision 2566258)
-Aristīds Briāns (revision 2536819)
-Arons Nuroks (revision 2337085)
-Arturs Alberings (revision 2442531)
-Arturs Ozols (inženieris) (revision 2491399)
-Artūrs Balfūrs (revision 2309461)
-Artūrs Vīgants (revision 2461471)
-Artūrs Žers (revision 2564230)
-Arveds Bergs (revision 2564118)
-Arveds Švābe (revision 2586288)
-Arvīds Kalniņš (revision 2545254)
-Aspazija (revision 2574081)
-Augusts Briedis (revision 2546879)
-Augusts Kalniņš (revision 2436647)
-Augusts Kirhenšteins (revision 2547109)
-Austroungārija (revision 2524307)
+Antons Laizāns (revision 3360427)
+Antons Rubins (1885) (revision 3351508)
+Antons Velkme (revision 3279136)
+Ants Pīps (revision 3375003)
+Apollo (portāls) (revision 3232284)
+Apolonija Laurinoviča (revision 3209013)
+Aprīļa pučs (revision 3010427)
+Apvienotā Karaliste (revision 3382180)
+Aristīds Briāns (revision 2767296)
+Arons Nuroks (revision 3062127)
+Arturs Alberings (revision 3325257)
+Arturs Krišjānis Kariņš (revision 3381504)
+Arturs Ozols (inženieris) (revision 3352707)
+Artūrs Balfūrs (revision 3177309)
+Artūrs Reisners (revision 3300906)
+Artūrs Vīgants (revision 3296217)
+Artūrs Žers (revision 3296461)
+Arveds Bergs (revision 3238379)
+Arveds Švābe (revision 3340584)
+Arvīds Kalniņš (ķīmiķis) (revision 3382254)
+Aspazija (revision 3382469)
+Augusts Briedis (revision 3163311)
+Augusts Kalniņš (revision 3310251)
+Augusts Kirhenšteins (revision 3302758)
+Austroungārija (revision 3376635)
Autoritatīvā vadība (revision 2385793)
-Balfūra nota (revision 2538973)
-Baltijas Antante (revision 2541901)
-Baltijas pārkrievošana (revision 2570657)
-Bermontiāde (revision 2499160)
+Balfūra nota (revision 3224093)
+Baltijas Antante (revision 3236261)
+Baltijas pārkrievošana (revision 3311586)
+Bermontiāde (revision 3156269)
Bernards Kublinskis (revision 2441386)
-Bezpartijiskais nacionālais centrs (revision 2438819)
-Beļģija (revision 2579008)
-Brestļitovskas miera līgums (revision 2569020)
-Brizules muiža (revision 2584564)
-Bruno Kalniņš (revision 2566572)
-Brīvības piemineklis (revision 2578595)
-Bulduru konference (revision 2193449)
-Ceire-Cion (revision 2311779)
-Celmiņa 1. Ministru kabinets (revision 2112830)
-Delfi (portāls) (revision 2544918)
-Demokrātiskais Centrs (revision 2113060)
-Demokrātu savienība (revision 2179593)
-Diena (laikraksts) (revision 2548854)
-Donats Bicāns (revision 2479349)
-Dubulti (Jūrmala) (revision 2456811)
-Durbe (revision 2381790)
-Dāvids Komisārs (revision 2574685)
-Džovanni Džoliti (revision 2538055)
-Ebreju bloks (revision 2311643)
-Ebreju nacionāldemokrātu partija (revision 2312288)
-Eduards Grantskalns (revision 2565167)
-Eduards Jaunzems (revision 2452579)
-Eduards Laimiņš (revision 2449521)
-Eduards Radziņš (revision 2564393)
+Berta Vesmane (revision 3299697)
+Bezpartijiskais nacionālais centrs (revision 3286113)
+Beļģija (revision 3308106)
+Brestļitovskas miera līgums (revision 3348377)
+Brizules muiža (revision 3103947)
+Bruno Kalniņš (revision 3297011)
+Brīvības piemineklis (revision 3343774)
+Bulduru konference (revision 3122422)
+Bunds (revision 3368404)
+Ceire-Cion (revision 3285715)
+Celmiņa 1. Ministru kabinets (revision 2925529)
+Delfi (portāls) (revision 3363824)
+Demokrātiskais Centrs (revision 3286115)
+Demokrātu savienība (revision 3339759)
+Diena (laikraksts) (revision 3343800)
+Donats Bicāns (revision 3311441)
+Dubulti (Jūrmala) (revision 3349180)
+Durbe (revision 3380441)
+Dāvids Komisārs (revision 3082713)
+Džovanni Džoliti (revision 3165202)
+Ebreji (revision 3340750)
+Ebreju bloks (revision 3285659)
+Ebreju nacionāldemokrātu partija (revision 3368172)
+Eduards Grantskalns (revision 2932497)
== End of Parsed pages ==
-- Wikipedia parsing ended at: 2016-09-21 00:19:18.361533
+- Wikipedia parsing ended at: 2021-03-16 19:30:28.292124
-55 characters appeared 354745 times.
+55 characters appeared 437791 times.
-First 39 characters:
-[ 0] Char a: 11.905171320244119 %
-[ 1] Char i: 9.3977364022044 %
-[ 2] Char s: 8.224217395594017 %
-[ 3] Char e: 6.367108768270166 %
-[ 4] Char r: 5.854064186951191 %
-[ 5] Char t: 5.831230884156225 %
-[ 6] Char u: 4.939604504644181 %
-[ 7] Char n: 4.463769750102186 %
-[ 8] Char ā: 3.9498794909019157 %
-[ 9] Char l: 3.8030134321836813 %
-[10] Char o: 3.6296494665182033 %
-[11] Char k: 3.524785409237621 %
-[12] Char m: 3.2739009711201 %
-[13] Char d: 3.177775585279567 %
-[14] Char v: 3.0046935122411873 %
-[15] Char p: 2.827101157169234 %
-[16] Char j: 2.8166711299665956 %
-[17] Char b: 2.0279355593454453 %
-[18] Char ī: 1.8855797826607845 %
-[19] Char g: 1.6146809680192813 %
-[20] Char z: 1.5343415692962552 %
-[21] Char ē: 1.4593581304880971 %
-[22] Char c: 1.2231321089796898 %
-[23] Char š: 0.8876798827326671 %
-[24] Char ņ: 0.46596851259355315 %
-[25] Char f: 0.4203019070036223 %
-[26] Char ļ: 0.34700982395805435 %
-[27] Char ū: 0.30162511099522193 %
-[28] Char h: 0.20070755049401684 %
-[29] Char ž: 0.18774048964749326 %
-[30] Char ķ: 0.14207388405756247 %
-[31] Char ģ: 0.1268516821942522 %
-[32] Char č: 0.08287643236691145 %
-[33] Char w: 0.0324176521163089 %
-[34] Char y: 0.02734358482853881 %
-[35] Char x: 0.015785987117506943 %
-[36] Char ö: 0.005074067287770088 %
-[37] Char é: 0.003946496779376736 %
-[38] Char q: 0.0031008188980817205 %
+First 40 characters:
+[ 0] Char a: 11.993622527644469 %
+[ 1] Char i: 9.41179695334075 %
+[ 2] Char s: 8.204599911830075 %
+[ 3] Char e: 6.371761868106014 %
+[ 4] Char t: 5.8011699646635035 %
+[ 5] Char r: 5.772845947038655 %
+[ 6] Char u: 4.945053690002764 %
+[ 7] Char n: 4.437505567725239 %
+[ 8] Char ā: 4.014015820334361 %
+[ 9] Char l: 3.6974263975275874 %
+[10] Char o: 3.597150238355745 %
+[11] Char k: 3.5347917156816835 %
+[12] Char m: 3.307971155185922 %
+[13] Char d: 3.2337348186691823 %
+[14] Char v: 2.977904982057648 %
+[15] Char j: 2.8618678775945603 %
+[16] Char p: 2.8296607285211435 %
+[17] Char b: 2.040242946976982 %
+[18] Char ī: 1.874638811670409 %
+[19] Char g: 1.6240626234892905 %
+[20] Char z: 1.5235580448204737 %
+[21] Char ē: 1.5109949724868716 %
+[22] Char c: 1.216105401892684 %
+[23] Char š: 0.9225863482803439 %
+[24] Char ņ: 0.45478321847639624 %
+[25] Char f: 0.42691603984549703 %
+[26] Char ļ: 0.3277819781585277 %
+[27] Char ū: 0.29420431210326387 %
+[28] Char h: 0.18616189003428577 %
+[29] Char ž: 0.1815935000947941 %
+[30] Char ķ: 0.126772820820894 %
+[31] Char ģ: 0.11649394345703772 %
+[32] Char č: 0.08382995538967224 %
+[33] Char y: 0.029466115109721306 %
+[34] Char w: 0.029466115109721306 %
+[35] Char x: 0.012334652836627522 %
+[36] Char é: 0.0050252289334408425 %
+[37] Char ö: 0.0034262924546187568 %
+[38] Char ü: 0.0027410339636950052 %
+[39] Char q: 0.0025126144667204212 %
-The first 39 characters have an accumulated ratio of 0.9998590536864506.
+The first 40 characters have an accumulated ratio of 0.9998857902515126.
-970 sequences found.
+982 sequences found.
-First 512 (typical positive ratio): 0.9904102202220861
-Next 512 (512-1024): 0.0018774048964749328
-Rest: -1.734723475976807e-17
+First 512 (typical positive ratio): 0.9904642991017133
+Next 512 (512-1024): 0.001815935000947941
+Rest: -5.377642775528102e-17
-- Processing end: 2016-09-21 00:19:18.484318
+- Processing end: 2021-03-16 19:30:28.395006
diff --git a/script/BuildLangModelLogs/LangLithuanianModel.log b/script/BuildLangModelLogs/LangLithuanianModel.log
index 9ea0467..5db032a 100644
--- a/script/BuildLangModelLogs/LangLithuanianModel.log
+++ b/script/BuildLangModelLogs/LangLithuanianModel.log
@@ -1,162 +1,166 @@
= Logs of language model for Lithuanian (lt) =
- Generated by BuildLangModel.py
-- Started: 2016-09-21 00:23:03.857157
-- Maximum depth: 5
+- Started: 2021-03-16 19:23:31.104161
+- Maximum depth: 4
- Max number of pages: 100
== Parsed pages ==
-Karūna (laivas) (revision 5080379)
-1650 (revision 4990868)
-1654 (revision 4991037)
-1664 (revision 4991048)
-1665 (revision 4991050)
-1668 (revision 4991052)
-1669 (revision 4991053)
-1672 (revision 4991056)
-1676 (revision 4991060)
-1718 (revision 4990914)
-1909 (revision 4990667)
-1928 (revision 4990262)
-1932 (revision 4990613)
-1956 (revision 4990635)
-1980 (revision 4990655)
-Baltijos jūra (revision 5052833)
-Burinis laivas (revision 4657401)
-Flagmanas (laivas) (revision 5005271)
-Grimzlė (revision 4487052)
-Kalmaras (Švedija) (revision 4978519)
-Karo laivas (revision 4726931)
-Karolis XI (revision 4944621)
-Karolis XII (revision 4915230)
-Kilis (revision 4325533)
-Koordinačių sistema (revision 5033980)
-Laivo vėliava (revision 4986001)
-Liepos 1 d. (revision 4910200)
-Nyderlandai (revision 5080140)
-Rugpjūčio 10 (revision 4910281)
-Varytuvas (revision 4620792)
-Vaza (laivas) (revision 5079282)
-XVIII a. (revision 4896219)
-XVII a. (revision 4768242)
-Švedija (revision 5057665)
-Švedų kalba (revision 4687559)
-1590 (revision 4990983)
-1596 (revision 4990989)
-1608 (revision 4991000)
-1610 (revision 4991002)
-1623 m. (revision 4991015)
-1634 m. (revision 4991026)
-1643 m. (revision 4990870)
-1644 m. (revision 4990872)
-1645 m. (revision 4990873)
-1646 m. (revision 4990874)
-1647 m. (revision 4913295)
-1648 m. (revision 4990875)
-1649 m. (revision 4990876)
-1651 m. (revision 4991035)
-1652 m. (revision 4991072)
-1653 m. (revision 4991036)
-1654 m. (revision 4991037)
-1655 m. (revision 4991038)
-1662 m. (revision 4991046)
-1668 m. (revision 4991052)
-1677 m. (revision 4991061)
-1702 (revision 4990595)
-1704 (revision 4990863)
-1722 (revision 4990918)
-1723 (revision 4990919)
-1737 (revision 4990931)
-2 tūkstantmetis (revision 4296407)
-ATR (revision 5078529)
-Abiejų Tautų Respublika (revision 5078529)
-Adomas Freitagas (revision 4362991)
-Anglų kalba (revision 4911240)
-Armėnų kalendorius (revision 4817534)
-Bahajų kalendorius (revision 4706296)
-Bajorai (revision 5006456)
+Karūna (laivas) (revision 5105933)
+1650 (revision 5301814)
+1654 (revision 5301823)
+1664 (revision 5301833)
+1665 (revision 5301834)
+1668 (revision 5301872)
+1669 (revision 5301873)
+1672 (revision 5301876)
+1676 (revision 5801857)
+1718 (revision 5301969)
+1909 (revision 6129929)
+1928 (revision 6176161)
+1932 (revision 6195207)
+1956 (revision 6150066)
+1980 (revision 6190258)
+Baltijos jūra (revision 6193053)
+Burinis laivas (revision 6040752)
+Flagmanas (laivas) (revision 5987584)
+Grimzlė (revision 5989647)
+Kalmaras (Švedija) (revision 5604914)
+Karo laivas (revision 5994228)
+Karolis XI (revision 5480144)
+Karolis XII (revision 5880104)
+Kilis (revision 5995782)
+Koordinačių sistema (revision 6044079)
+Laivo vėliava (revision 6208955)
+Liepos 1 d. (revision 5779083)
+Nyderlandai (revision 6196943)
+Olando mūšis (revision 6020430)
+Rugpjūčio 10 (revision 5793253)
+Varytuvas (revision 6020287)
+Vaza (laivas) (revision 6203069)
+XVIII a. (revision 6031323)
+XVII a. (revision 6025004)
+Švedija (revision 6205204)
+Švedų kalba (revision 5560532)
+1590 (revision 5801846)
+1596 (revision 5552466)
+1608 (revision 5637570)
+1610 (revision 5301721)
+1647 m. (revision 5301819)
+1648 m. (revision 5301818)
+1649 m. (revision 5301820)
+1651 m. (revision 5301821)
+1652 m. (revision 5301836)
+1653 m. (revision 5301822)
+1702 (revision 5301912)
+1704 (revision 5301925)
+1722 (revision 5301973)
+1723 (revision 5301974)
+1737 (revision 5302020)
+2 tūkstantmetis (revision 5976362)
+ATR (revision 6212255)
+Abiejų Tautų Respublika (revision 6212255)
+Adomas Freitagas (revision 6152308)
+Armėnų kalendorius (revision 5965695)
+Bahajų kalendorius (revision 6168286)
+Bajorai (revision 6040220)
Berberų kalendorius (revision 4926904)
-Birželio 21 (revision 4910142)
-Bizantijos kalendorius (revision 4927623)
-Budistų kalendorius (revision 4705734)
-Dešimtmetis (revision 4296419)
-Dominikonai (revision 4921895)
-Dominikonų ordinas (revision 4921895)
-Džohoro sultonatas (revision 4934526)
-Džu Ihai (revision 4991072)
-Džu Joulang (revision 4991072)
-Emanuelis Vladislovas Tiškevičius Logoiskis (revision 4939239)
-Filosofas (revision 5078172)
-Gegužės 26 (revision 4910130)
-Grafas (titulas) (revision 5008057)
-Grigaliaus kalendorius (revision 5000317)
-Hebrajų kalendorius (revision 4728592)
-Imperatorius Go-Komijas (revision 4907057)
-Inocentas X (revision 4905150)
+Birželio 21 (revision 6172033)
+Bizantijos kalendorius (revision 5300569)
+Budistų kalendorius (revision 5979182)
+Dešimtmetis (revision 5982040)
+Dominikonai (revision 6068818)
+Dominikonų ordinas (revision 6068818)
+Emanuelis Vladislovas Tiškevičius Logoiskis (revision 5761120)
+Filosofas (revision 5836448)
+Gegužės 26 (revision 6075204)
+Grafas (titulas) (revision 5832187)
+Grigaliaus kalendorius (revision 5989624)
+Hebrajų kalendorius (revision 5990271)
Iraniečių kalendorius (revision 4964854)
-Isaac Titsingh (revision 4990745)
-Japonija (revision 5035249)
-Japonijos imperatorius (revision 4720428)
-Japonų kalendorius (revision 4956765)
-John Churchill (revision 4903704)
-Jonas Kazimieras Vaza (revision 5037754)
+Japonų kalendorius (revision 6082601)
+John Churchill (revision 5350480)
Jurgis Kasakauskis (revision 5047829)
Jurgis Kazimieras Ancuta (revision 5059404)
-Jurgis Mikalojus Tiškevičius (revision 4939554)
+Jurgis Mikalojus Tiškevičius (revision 5481136)
+Kalijugos kalendorius (revision 5741238)
+Kazimieras Tiškevičius Logoiskis (revision 5481143)
+Kinų kalendorius (revision 5995873)
+Koptų kalendorius (revision 5996919)
+Korėjiečių kalendorius (revision 5996955)
+LDK (revision 6130316)
+Lapkričio 14 (revision 5943612)
+Lelija (herbas) (revision 5999126)
+Lietuvių kalba (revision 6201110)
+Lietuvos Didžioji Kunigaikštystė (revision 6130316)
+Metai (revision 5765072)
+Mianmaro kalendorius (revision 5979182)
+Mokslų daktaras (revision 6172930)
+Motiejus Juozapas Ancuta (revision 4951716)
+Musulmonų kalendorius (revision 4705912)
+Nekeliamieji metai, prasidedantys šeštadienį (revision 6004293)
+Profesorius (revision 6009297)
+René Descartes (revision 6201538)
+Saka kalendorius (revision 6109866)
+Senovės indų kalendoriai (revision 6012785)
+Spauda (revision 5345510)
+Stanislovas Kristupas Naruševičius (revision 5481106)
== End of Parsed pages ==
-- Wikipedia parsing ended at: 2016-09-21 00:25:34.773941
+- Wikipedia parsing ended at: 2021-03-16 19:26:36.949228
-60 characters appeared 353051 times.
+68 characters appeared 398895 times.
-First 38 characters:
-[ 0] Char i: 13.032394753165974 %
-[ 1] Char a: 11.167225131779828 %
-[ 2] Char s: 8.586578143101137 %
-[ 3] Char o: 7.018815978428046 %
-[ 4] Char e: 5.525830545728521 %
-[ 5] Char r: 5.469181506354606 %
-[ 6] Char n: 5.142599794363987 %
-[ 7] Char t: 5.105777918770942 %
-[ 8] Char u: 4.270487833202568 %
-[ 9] Char k: 3.9617505686147325 %
-[10] Char l: 3.9051015292408184 %
-[11] Char m: 3.359854525266888 %
-[12] Char d: 3.0372382460324427 %
-[13] Char v: 2.7270847554602593 %
-[14] Char j: 2.4472385009531203 %
-[15] Char p: 2.329125253858508 %
-[16] Char g: 1.9427788053284087 %
-[17] Char ė: 1.5657794482950054 %
-[18] Char b: 1.5074309377398734 %
-[19] Char y: 1.2236192504765602 %
-[20] Char ų: 1.181698961339863 %
-[21] Char š: 0.9630336693565519 %
-[22] Char ž: 0.8171623929687212 %
-[23] Char c: 0.5959478942135839 %
-[24] Char č: 0.48010060869392807 %
-[25] Char f: 0.428266737666796 %
-[26] Char h: 0.42515104050123065 %
-[27] Char z: 0.4010751987673169 %
-[28] Char ū: 0.3685020011273159 %
-[29] Char ą: 0.3526402701026197 %
-[30] Char į: 0.29004308159444386 %
-[31] Char ę: 0.14813723796278724 %
-[32] Char x: 0.08752276583269838 %
-[33] Char w: 0.059198246145740985 %
-[34] Char ō: 0.01812769259965274 %
-[35] Char ö: 0.008780601102956797 %
-[36] Char é: 0.0076476203154785 %
-[37] Char q: 0.007364375118608926 %
+First 40 characters:
+[ 0] Char i: 13.296732222765389 %
+[ 1] Char a: 11.103673899146392 %
+[ 2] Char s: 8.654407801551786 %
+[ 3] Char o: 6.708030935459205 %
+[ 4] Char e: 5.518244149462891 %
+[ 5] Char r: 5.427493450657441 %
+[ 6] Char t: 5.170533599067424 %
+[ 7] Char n: 5.082039133105203 %
+[ 8] Char u: 4.293109715589315 %
+[ 9] Char k: 4.091302222389351 %
+[10] Char l: 3.876208024668146 %
+[11] Char m: 3.384349264844132 %
+[12] Char d: 3.0411511801351234 %
+[13] Char v: 2.8220459018037327 %
+[14] Char j: 2.286817332882087 %
+[15] Char p: 2.243196831246318 %
+[16] Char g: 1.902756364456812 %
+[17] Char ė: 1.5700873663495405 %
+[18] Char b: 1.55980897228594 %
+[19] Char y: 1.2637410847466124 %
+[20] Char ų: 1.1800097770089872 %
+[21] Char š: 0.9924917584828087 %
+[22] Char ž: 0.8423269281389839 %
+[23] Char c: 0.557289512277667 %
+[24] Char č: 0.49461637774351647 %
+[25] Char f: 0.40336429386179323 %
+[26] Char ū: 0.3863172012685043 %
+[27] Char ą: 0.36901941613707867 %
+[28] Char z: 0.362501410145527 %
+[29] Char h: 0.3604958698404342 %
+[30] Char į: 0.3070983592173379 %
+[31] Char ę: 0.15618145125910327 %
+[32] Char x: 0.09777008987327492 %
+[33] Char w: 0.05715789869514534 %
+[34] Char ó: 0.027325486656889657 %
+[35] Char á: 0.014289474673786336 %
+[36] Char é: 0.011531856754283708 %
+[37] Char ã: 0.011030471678010504 %
+[38] Char ö: 0.008523546296644481 %
+[39] Char q: 0.007270083605961468 %
-The first 38 characters have an accumulated ratio of 0.9996629382157253.
+The first 40 characters have an accumulated ratio of 0.9994234071622861.
-1016 sequences found.
+1138 sequences found.
-First 512 (typical positive ratio): 0.9928710196247589
-Next 512 (512-1024): 0.008171623929687212
-Rest: -4.85722573273506e-17
+First 512 (typical positive ratio): 0.9919219576954762
+Next 512 (512-1024): 0.008423269281389839
+Rest: 0.00033781981757727893
-- Processing end: 2016-09-21 00:25:34.935858
+- Processing end: 2021-03-16 19:26:37.062994
diff --git a/script/BuildLangModelLogs/LangMalteseModel.log b/script/BuildLangModelLogs/LangMalteseModel.log
index ad867b3..76b703b 100644
--- a/script/BuildLangModelLogs/LangMalteseModel.log
+++ b/script/BuildLangModelLogs/LangMalteseModel.log
@@ -1,147 +1,147 @@
= Logs of language model for Maltese (mt) =
- Generated by BuildLangModel.py
-- Started: 2016-09-21 02:05:23.411546
-- Maximum depth: 5
+- Started: 2021-03-16 19:30:28.553074
+- Maximum depth: 4
- Max number of pages: 100
== Parsed pages ==
-Unjoni Ewropea (revision 246298)
+Unjoni Ewropea (revision 255663)
1951 (revision 229183)
1952 (revision 229184)
1957 (revision 229188)
1958 (revision 229189)
-1973 (revision 223536)
-1979 (revision 243876)
-1981 (revision 205545)
-1985 (revision 216368)
-1986 (revision 231433)
-1990 (revision 237666)
-1992 (revision 244087)
-1995 (revision 214650)
-1 ta' Mejju (revision 245374)
-2007 (revision 214851)
-2013 (revision 245606)
-Albanija (revision 243079)
-Awstrija (revision 243627)
-Awtonomija (revision 245824)
-Ażores (revision 246298)
-Bank Ċentrali Ewropew (revision 246298)
-Belt kapitali (revision 237400)
-Belġju (revision 244363)
+1973 (revision 252982)
+1979 (revision 252967)
+1981 (revision 253774)
+1985 (revision 252978)
+1986 (revision 252978)
+1990 (revision 257440)
+1992 (revision 249582)
+1995 (revision 252258)
+1 ta' Mejju (revision 258193)
+2007 (revision 258027)
+2013 (revision 248708)
+Albanija (revision 261944)
+Awstrija (revision 261959)
+Awtonomija (revision 262074)
+Ażores (revision 255663)
+Bank Ċentrali Ewropew (revision 255748)
+Belt kapitali (revision 255506)
+Belġju (revision 255745)
Brussell (revision 243311)
-Bulgarija (revision 243622)
-Danimarka (revision 244419)
+Bulgarija (revision 261950)
+Danimarka (revision 256058)
+Dazji doganali (revision 255663)
De facto (revision 215102)
-Estonja (revision 243826)
-European Free Trade Association (revision 246298)
-Ewropa (revision 244177)
-Ex Repubblika Jugoslava tal-Maċedonja (revision 246298)
-Federazzjoni (revision 246226)
-Finlandja (revision 245824)
-Frankfurt (revision 243576)
-Franza (revision 244461)
-Greċja (revision 244423)
-Groenlandja (revision 243829)
-Indja (revision 244873)
-Islanda (revision 243771)
-Isle of Man (revision 246298)
-Istitut tal-Unjoni Ewropea għall-Istudji dwar is-Sigurtà (revision 244412)
-Italja (revision 246323)
-Kilometru kwadru (revision 244871)
-Komunitajiet Ewropej (revision 246298)
-Komunità Ekonomika Ewropea (revision 246298)
-Kroazja (revision 245711)
-Kummissjoni Ewropea (revision 243311)
-Kunsill Ewropew (revision 246298)
-Kunsill tal-Ewropa (revision 243334)
-Kunsill tal-Unjoni Ewropea (revision 243311)
-Latvja (revision 245746)
-Lista ta' pajjiżi skont id-daqs (revision 244419)
-Lista ta' pajjiżi skont il-popolazzjoni (revision 246128)
-Litwanja (revision 243114)
-Liġijiet tal-Unjoni Ewropea (revision 246298)
-Lussemburgu (revision 244239)
+Dħul nazzjonali gross (revision 255663)
+Estonja (revision 255711)
+European Free Trade Association (revision 255663)
+Ewropa (revision 259973)
+Ex Repubblika Jugoslava tal-Maċedonja (revision 255663)
+Federazzjoni (revision 228364)
+Finlandja (revision 258210)
+Frankfurt (revision 261246)
+Franza (revision 259635)
+Greċja (revision 259971)
+Groenlandja (revision 250685)
+Indja (revision 254565)
+Islanda (revision 255630)
+Isle of Man (revision 259978)
+Istati Membri (revision 255663)
+Istitut tal-Unjoni Ewropea għall-Istudji dwar is-Sigurtà (revision 256700)
+Italja (revision 254814)
+Kilometru kwadru (revision 247665)
+Komunitajiet Ewropej (revision 256698)
+Komunità Ekonomika Ewropea (revision 255663)
+Kroazja (revision 249144)
+Kummissjoni Ewropea (revision 258115)
+Kunsill Ewropew (revision 255754)
+Kunsill tal-Ewropa (revision 255754)
+Kunsill tal-Unjoni Ewropea (revision 255754)
+Latvja (revision 255712)
+Lista ta' pajjiżi skont id-daqs (revision 254529)
+Lista ta' pajjiżi skont il-popolazzjoni (revision 260622)
+Litwanja (revision 259637)
+Liġijiet tal-Unjoni Ewropea (revision 255663)
+Lussemburgu (revision 253431)
Lussemburgu (belt) (revision 243587)
Madejra (revision 243625)
-Malta (revision 247210)
-Montenegro (revision 243930)
-Norveġja (revision 243829)
-Olanda (revision 243989)
-Organizzazzjoni Internazzjonali (revision 246724)
-Pajjiżi l-Baxxi (revision 243989)
-Pajjiżi membri tal-Unjoni Ewropea (revision 243625)
-Pajjiżi ġirien li jdawru l-Unjoni Ewropea (revision 246298)
-Parlament Ewropew (revision 243907)
-Patt ta' Stabilità u Tkabbir (revision 246298)
-Politika agrikola komuni (revision 244363)
-Politika reġjonali tal-Unjoni Ewropea (revision 246298)
-Polonja (revision 244530)
+Malta (revision 261973)
+Montenegro (revision 255647)
+Norveġja (revision 261168)
+Olanda (revision 261407)
+Organizzazzjoni Internazzjonali (revision 258039)
+Organizzazzjonijiet mhux governattivi (revision 233500)
+Pajjiżi l-Baxxi (revision 261407)
+Pajjiżi membri tal-Unjoni Ewropea (revision 255663)
+Pajjiżi ġirien li jdawru l-Unjoni Ewropea (revision 255663)
+Parlament Ewropew (revision 255748)
+Politika agrikola komuni (revision 255745)
+Politika reġjonali tal-Unjoni Ewropea (revision 255663)
+Polonja (revision 261762)
Portugall (revision 243625)
-Relazzjonijiet ta' terzi pajjiżi ma l-UE (revision 246298)
-Renju Unit (revision 247318)
-Repubblika Federali tal-Ġermanja (revision 244859)
-Repubblika tal-Irlanda (revision 243686)
-Repubblika Ċeka (revision 246832)
-Rumanija (revision 243623)
-Segretarjat tal-Parlament Ewropew (revision 246298)
-Serbja (revision 243728)
-Slovakkja (revision 243831)
-Slovenja (revision 244588)
-Spanja (revision 246856)
-Stati Uniti tal-Amerika (revision 243926)
-Stati membri tal-Unjoni Ewropea (revision 243114)
+Qorti tal-Ġustizzja tal-Unjoni Ewropea (revision 255663)
+Relazzjonijiet ta' terzi pajjiżi ma l-UE (revision 255663)
+Renju Unit (revision 254529)
+Repubblika Federali tal-Ġermanja (revision 258687)
+Repubblika tal-Irlanda (revision 250619)
+Repubblika Ċeka (revision 255669)
+Rumanija (revision 261954)
+Segretarjat tal-Parlament Ewropew (revision 255663)
+Serbja (revision 259975)
+Slovakkja (revision 255727)
+Slovenja (revision 261963)
+Spanja (revision 258290)
+Stati membri tal-Unjoni Ewropea (revision 255663)
Strasburgu (revision 243503)
-Sui generis (revision 247150)
-Suq komuni (revision 246298)
-Svezja (revision 244871)
== End of Parsed pages ==
-- Wikipedia parsing ended at: 2016-09-21 02:07:45.508113
+- Wikipedia parsing ended at: 2021-03-16 19:33:28.445834
-48 characters appeared 474337 times.
+49 characters appeared 643393 times.
First 31 characters:
-[ 0] Char a: 12.326257492036252 %
-[ 1] Char i: 12.069899670487438 %
-[ 2] Char t: 8.064941170518008 %
-[ 3] Char l: 7.795301652622502 %
-[ 4] Char e: 6.615971345267184 %
-[ 5] Char n: 6.128132530247482 %
-[ 6] Char r: 5.579577389071483 %
-[ 7] Char u: 4.376424356522894 %
-[ 8] Char o: 3.8337721915009797 %
-[ 9] Char j: 3.7378488289971057 %
-[10] Char m: 3.6084049947611088 %
-[11] Char s: 3.3533120966738834 %
-[12] Char k: 2.588033402412209 %
-[13] Char d: 2.3173397816320462 %
-[14] Char p: 2.0555006250830106 %
-[15] Char b: 2.017131280081461 %
-[16] Char f: 2.004692866042497 %
-[17] Char ħ: 1.6372326004507345 %
-[18] Char w: 1.4801712706366992 %
-[19] Char g: 1.4763765002519307 %
-[20] Char z: 1.3150987588992635 %
-[21] Char ż: 0.9910675321554084 %
-[22] Char h: 0.9750451683086075 %
-[23] Char ġ: 0.7640137708000851 %
-[24] Char ċ: 0.6723068198348432 %
-[25] Char x: 0.5892435125237964 %
-[26] Char v: 0.5668965313690478 %
-[27] Char q: 0.5647883255997318 %
-[28] Char c: 0.2759641352034524 %
-[29] Char à: 0.10730767365817974 %
-[30] Char y: 0.059029761540845424 %
+[ 0] Char i: 12.115145797358691 %
+[ 1] Char a: 12.109705887381429 %
+[ 2] Char t: 8.033037350421903 %
+[ 3] Char l: 7.963095650714261 %
+[ 4] Char e: 6.5463876666361 %
+[ 5] Char n: 5.990118014961307 %
+[ 6] Char r: 5.530834186881113 %
+[ 7] Char u: 4.447514971409388 %
+[ 8] Char o: 3.9081867536637795 %
+[ 9] Char j: 3.7945703481386963 %
+[10] Char m: 3.619405246870886 %
+[11] Char s: 3.4255890256810377 %
+[12] Char k: 2.5824029792055554 %
+[13] Char d: 2.3040350143691337 %
+[14] Char p: 2.1852895508654897 %
+[15] Char b: 2.0524003214209667 %
+[16] Char f: 1.9347428399127748 %
+[17] Char ħ: 1.6223365812186332 %
+[18] Char g: 1.4863388317871036 %
+[19] Char w: 1.4324060100125429 %
+[20] Char z: 1.3761417982477273 %
+[21] Char ż: 0.9421924080616357 %
+[22] Char h: 0.9235412881395973 %
+[23] Char ġ: 0.7990450626599915 %
+[24] Char ċ: 0.6618039052336597 %
+[25] Char v: 0.6143989754318122 %
+[26] Char x: 0.610357899448704 %
+[27] Char q: 0.5511405936962324 %
+[28] Char c: 0.24153200299039623 %
+[29] Char à: 0.08936994962643362 %
+[30] Char y: 0.061082417744675495 %
-The first 31 characters have an accumulated ratio of 0.9994708403519017.
+The first 31 characters have an accumulated ratio of 0.9995414933019164.
-870 sequences found.
+888 sequences found.
-First 512 (typical positive ratio): 0.9959115850692665
-Next 512 (512-1024): 2.108205769315908e-06
-Rest: -4.423544863740858e-17
+First 512 (typical positive ratio): 0.9960434044151966
+Next 512 (512-1024): 0.009421924080616357
+Rest: 1.5612511283791264e-17
-- Processing end: 2016-09-21 02:07:45.646198
+- Processing end: 2021-03-16 19:33:28.518739
diff --git a/script/BuildLangModelLogs/LangPolishModel.log b/script/BuildLangModelLogs/LangPolishModel.log
index f90f2de..f92700b 100644
--- a/script/BuildLangModelLogs/LangPolishModel.log
+++ b/script/BuildLangModelLogs/LangPolishModel.log
@@ -1,154 +1,163 @@
= Logs of language model for Polish (pl) =
- Generated by BuildLangModel.py
-- Started: 2016-09-21 17:06:43.735784
-- Maximum depth: 5
+- Started: 2021-03-16 19:33:28.678083
+- Maximum depth: 4
- Max number of pages: 100
== Parsed pages ==
-Krasnyj Krym (revision 46884814)
-1913 (revision 46708474)
-1915 (revision 46743905)
-1917 (revision 46559521)
-1925 (revision 46809935)
-1928 (revision 46875978)
-1929 (revision 46760445)
-1935 (revision 46487358)
-1936 (revision 46874348)
-1939 (revision 46789269)
-1941 (revision 46856112)
-1942 (revision 46851808)
-1943 (revision 46768330)
-1944 (revision 46866229)
-1949 (revision 46882598)
-1953 (revision 46437607)
-1957 (revision 46591716)
-1959 (revision 46255886)
-Admirał Butakow (revision 45993412)
-Admirał Spiridow (revision 45993412)
-Aparat torpedowy (revision 46633263)
-Askold (revision 45787848)
-Avro 504 (revision 44668646)
-Ałmaz (1903) (revision 46472283)
-Batumi (revision 46594611)
-Bomba głębinowa (revision 46011227)
-Brest (revision 45771242)
+Krasnyj Krym (revision 62415649)
+11 grudnia (revision 62631194)
+1913 (revision 62510480)
+1915 (revision 62045210)
+1925 (revision 62586144)
+1929 (revision 62587250)
+1935 (revision 62643677)
+1936 (revision 62578718)
+1939 (revision 62647877)
+1941 (revision 62626183)
+1942 (revision 62634977)
+1943 (revision 62605793)
+1944 (revision 62629763)
+1949 (revision 62629889)
+1953 (revision 62544578)
+1957 (revision 62605043)
+1959 (revision 62544562)
+45 mm armata morska 21-K (revision 61708436)
+76 mm armata przeciwlotnicza wz. 1914/15 (revision 62529098)
+7 grudnia (revision 62636554)
+AG Vulcan Stettin (revision 56402035)
+Admirał Butakow (revision 61255818)
+Aurora (1900) (revision 60525374)
+Avro 504 (revision 62119913)
+Bomba głębinowa (revision 62280686)
+Brest (revision 59991108)
Burta (revision 45569092)
-Cagliari (revision 46235605)
-Cesariewicz (revision 40031486)
-Czerwona Ukraina (revision 45993524)
-Daty nowego i starego porządku (revision 45622575)
-Drednot (revision 45789788)
-Działo przeciwlotnicze (revision 45160162)
-Flota Bałtycka Marynarki Wojennej Rosji (revision 45700667)
-Gromoboj (revision 44328986)
-Hulk (okręt) (revision 46020688)
-II wojna światowa (revision 46871591)
-I wojna światowa (revision 46869119)
-Imperator Nikołaj I (okręt lotniczy) (revision 45520638)
-Imperium Rosyjskie (revision 46604959)
-Impierator Nikołaj I (1916) (revision 46534166)
-Język rosyjski (revision 46433952)
-Kanonierka (revision 41091952)
-Kanonierki typu Ardagan (revision 46534166)
-Kanonierki typu Bobr (revision 45788694)
-Kanonierki typu Chiwiniec (revision 46534166)
-Kanonierki typu Groziaszczij (revision 46534166)
-Kanonierki typu Mandżur (revision 46534166)
-Karabin maszynowy DSzK (revision 45587452)
-Karabin maszynowy Vickers 12,7 mm (revision 44572918)
-Kocioł parowy (revision 46716473)
-Konstrukcyjna linia wodna (revision 37082620)
-Kontrtorpedowce typu Biesstrasznyj (revision 46534166)
-Kontrtorpedowce typu Brawyj (revision 46534166)
-Kontrtorpedowce typu Grozowoj (revision 46534166)
-Kontrtorpedowce typu Prytkij (revision 46534166)
-Koń mechaniczny (revision 44722357)
-Krab (1915) (revision 42791389)
-Kronsztad (revision 46425497)
-Krążownik lekki (revision 40661490)
-Krążownik liniowy (revision 40601776)
-Krążownik pancernopokładowy (revision 40055901)
-Krążownik pancerny (revision 40324458)
-Krążowniki lekkie typu Swietłana (revision 45993412)
-Krążowniki liniowe typu Borodino (revision 45990866)
-Krążowniki typu Admirał Nachimow (revision 45993521)
-Krążowniki typu Bajan (revision 45991279)
-Krążowniki typu Diana (revision 45991349)
-Krążowniki typu Izumrud (revision 45991349)
-Lend-Lease Act (revision 46877263)
-Marynarka Wojenna Związku Socjalistycznych Republik Radzieckich (revision 45795993)
+Cagliari (revision 57357802)
+Czerwona Ukraina (revision 62415654)
+Daty nowego i starego porządku (revision 60118095)
+Działo przeciwlotnicze (revision 57354362)
+Długość całkowita (statek) (revision 57603162)
+Flota Bałtycka (revision 62436950)
+Flota Czarnomorska (revision 62138173)
+Gwardia (wojsko) (revision 53610648)
+Hulk (okręt) (revision 61976707)
+II wojna światowa (revision 62628019)
+I wojna światowa (revision 61897062)
+Imperium Rosyjskie (revision 62512980)
+Język rosyjski (revision 62485083)
+Karabin maszynowy DSzK (revision 62495075)
+Karabin maszynowy Vickers 12,7 mm (revision 51917495)
+Kocioł parowy (revision 62570204)
+Konstrukcyjna linia wodna (revision 59497856)
+Koń mechaniczny (revision 57660802)
+Kronsztad (revision 58913101)
+Krążownik lekki (revision 58075663)
+Krążowniki lekkie typu Swietłana (revision 61255818)
+Krążowniki typu Admirał Nachimow (revision 56872613)
+Lend-Lease Act (revision 61097607)
+Marynarka Wojenna Związku Socjalistycznych Republik Radzieckich (revision 62606797)
Maszyna sterowa (revision 28497888)
-Mecidiye (1903) (revision 43956539)
-Mila morska (revision 45754209)
-Mina morska (revision 45781427)
-Morze Czarne (revision 46729213)
-Nadbudówka (revision 45292731)
-Neapol (revision 46823083)
-Niszczyciel (revision 45799132)
-Niszczyciele rakietowe projektu 61 (revision 46498775)
-Niszczyciele typu Finn (revision 46620140)
-Niszczyciele typu Lejtienant Szestakow (revision 46620140)
-Niszczyciele typu Ochotnik (revision 46620140)
-Niszczyciele typu Ukraina (revision 46620140)
-Noworosyjsk (revision 44721836)
-Odessa (revision 45629804)
-Oerlikon 20 mm (revision 45493862)
-Okres międzywojenny (revision 46668249)
-Okręt-baza wodnosamolotów (revision 45115462)
+Mila morska (revision 61023950)
+Mina morska (revision 61000099)
+Morze Czarne (revision 61790806)
+Nadbudówka (revision 57496460)
+Neapol (revision 61681555)
+Niszczyciele rakietowe projektu 61 (revision 61591760)
+Noworosyjsk (revision 62635030)
+Obrona Odessy (revision 61668078)
+Odessa (revision 62609713)
+Oerlikon 20 mm (revision 60068925)
+Operacja desantowa kerczeńsko-teodozyjska (revision 60265054)
+Parawan (trał) (revision 54434173)
+Petersburg (revision 62601352)
+Poti (revision 62387800)
+Radar (revision 61897200)
+Rangi okrętów (revision 59334819)
+Rewolucja październikowa (revision 62498820)
+Rosyjska Federacyjna Socjalistyczna Republika Radziecka (revision 62401382)
+Rosyjska marynarka wojenna (revision 62145039)
+Salwa burtowa (revision 45535265)
+Sewastopol (revision 61699516)
+Siewastopol (1911) (revision 61344180)
+Stal Kruppa (revision 44611245)
+Sudak (revision 56397428)
+Szalupa (revision 50176935)
+Szerokość całkowita (revision 59927053)
+Tallinn (revision 62370993)
+Tarmo (revision 60930043)
+Teodozja (miasto) (revision 61289639)
+Tuapse (revision 54506404)
+Turbina parowa (revision 58882974)
+Typ okrętu (revision 58157719)
+Wielka wojna ojczyźniana (revision 62540748)
+Wielkokalibrowy karabin maszynowy (revision 60069207)
+Wodnosamolot (revision 61361212)
+Wojna domowa w Rosji (revision 61724197)
+Wyporność (revision 61495676)
+Wyrzutnia torpedowa (revision 59771268)
+Węzeł (jednostka prędkości) (revision 62033661)
+Zatoka Biskajska (revision 59124431)
+Związek Socjalistycznych Republik Radzieckich (revision 62525734)
+Śruba okrętowa (revision 62489877)
+Śródokręcie (revision 45285929)
+Świnoujście (revision 62151792)
+(691) Lehigh (revision 60266839)
+1066 (revision 62500082)
== End of Parsed pages ==
-- Wikipedia parsing ended at: 2016-09-21 17:21:04.404471
+- Wikipedia parsing ended at: 2021-03-16 19:54:55.177499
-78 characters appeared 1159291 times.
+86 characters appeared 1860467 times.
-First 37 characters:
-[ 0] Char a: 9.685575062689178 %
-[ 1] Char i: 8.815819324052374 %
-[ 2] Char o: 7.920185699707839 %
-[ 3] Char e: 6.871613770830621 %
-[ 4] Char r: 5.8672067668945935 %
-[ 5] Char n: 5.763608964444647 %
-[ 6] Char s: 4.736688199942896 %
-[ 7] Char k: 4.722196583946568 %
-[ 8] Char z: 4.519227700378939 %
-[ 9] Char w: 4.279512219106333 %
-[10] Char t: 4.0191806888865695 %
-[11] Char c: 3.6891513864939864 %
-[12] Char y: 3.565282573572986 %
-[13] Char p: 3.0190004062828053 %
-[14] Char d: 2.851052928039638 %
-[15] Char l: 2.7930002044352973 %
-[16] Char m: 2.7530620008263673 %
-[17] Char u: 2.348504387595522 %
-[18] Char j: 1.881236031332944 %
-[19] Char ł: 1.6885320424293815 %
-[20] Char b: 1.394559260789569 %
-[21] Char g: 1.3928340684090534 %
-[22] Char h: 1.163901039514669 %
-[23] Char ę: 0.8066136975099435 %
-[24] Char ó: 0.5971753425153823 %
-[25] Char ą: 0.563275312238256 %
-[26] Char f: 0.5245447432956868 %
-[27] Char ż: 0.4545019326467643 %
-[28] Char ś: 0.39567287247119143 %
-[29] Char ń: 0.3857530162832283 %
-[30] Char ć: 0.1397405828217419 %
-[31] Char v: 0.12455888987320698 %
-[32] Char ź: 0.10204512930748191 %
-[33] Char x: 0.05468859846233603 %
-[34] Char é: 0.020961087423261287 %
-[35] Char á: 0.01707940456710179 %
-[36] Char q: 0.011386269711401192 %
+First 38 characters:
+[ 0] Char a: 9.71455016401796 %
+[ 1] Char i: 8.783547356658302 %
+[ 2] Char o: 7.7947633578021005 %
+[ 3] Char e: 6.889130524755344 %
+[ 4] Char r: 6.010641414225568 %
+[ 5] Char n: 5.536996893790645 %
+[ 6] Char k: 5.05394613287954 %
+[ 7] Char s: 5.034864902199287 %
+[ 8] Char z: 4.529185414199769 %
+[ 9] Char w: 4.033180916404322 %
+[10] Char t: 4.019743430009777 %
+[11] Char c: 3.6763887776563626 %
+[12] Char y: 3.5020777041463247 %
+[13] Char p: 3.0798181316841413 %
+[14] Char l: 2.971941990908734 %
+[15] Char d: 2.804779660160594 %
+[16] Char m: 2.7137810022967352 %
+[17] Char u: 2.3359726348277072 %
+[18] Char j: 1.8645856121070676 %
+[19] Char ł: 1.5818608983658402 %
+[20] Char g: 1.402981079481657 %
+[21] Char b: 1.3551436279170768 %
+[22] Char h: 1.1977100373185872 %
+[23] Char ę: 0.6938042975231488 %
+[24] Char ą: 0.5616331813464038 %
+[25] Char ó: 0.5564194366253205 %
+[26] Char f: 0.5355107077954083 %
+[27] Char ń: 0.43010706451659714 %
+[28] Char ż: 0.42290457180912105 %
+[29] Char ś: 0.3628658825982939 %
+[30] Char v: 0.1491023490338716 %
+[31] Char ć: 0.12942986895225767 %
+[32] Char ź: 0.08433366461216459 %
+[33] Char x: 0.0421399573332932 %
+[34] Char é: 0.02617622349657371 %
+[35] Char á: 0.02246747725167928 %
+[36] Char í: 0.014136235687061367 %
+[37] Char q: 0.013114986721075944 %
-The first 37 characters have an accumulated ratio of 0.9993892818972973.
+The first 38 characters have an accumulated ratio of 0.9993173756911571.
-1321 sequences found.
+1547 sequences found.
-First 512 (typical positive ratio): 0.9894531815946438
-Next 512 (512-1024): 1.7251923805153322e-06
-Rest: 0.0003530230403650733
+First 512 (typical positive ratio): 0.9881622113600178
+Next 512 (512-1024): 0.0042290457180912105
+Rest: 0.0005488849902139173
-- Processing end: 2016-09-21 17:21:04.878014
+- Processing end: 2021-03-16 19:54:55.605846
diff --git a/script/BuildLangModelLogs/LangPortugueseModel.log b/script/BuildLangModelLogs/LangPortugueseModel.log
index dce6f36..e1f91e2 100644
--- a/script/BuildLangModelLogs/LangPortugueseModel.log
+++ b/script/BuildLangModelLogs/LangPortugueseModel.log
@@ -1,166 +1,166 @@
= Logs of language model for Portuguese (pt) =
- Generated by BuildLangModel.py
-- Started: 2016-09-20 23:44:39.722451
-- Maximum depth: 5
+- Started: 2021-03-16 19:54:55.771448
+- Maximum depth: 4
- Max number of pages: 100
== Parsed pages ==
-Papagaio-das-mascarenhas (revision 46763149)
-Albinismo (revision 46498446)
-Alfred Newton (revision 43617011)
-Alphonse Milne-Edwards (revision 39740747)
-Animalia (revision 46727732)
-Asa (revision 46338820)
-August von Pelzeln (revision 34726241)
-Aves (revision 46728980)
-Bico (revision 45311553)
-Carl Wilhelm Hahn (revision 45025566)
-Carlos Lineu (revision 46625396)
-Carolus Linnaeus (revision 46625396)
-Cauda (revision 43275401)
-Charles Lucien Bonaparte (revision 45529712)
-Chordata (revision 46640101)
-Cladograma (revision 46700307)
-Classe (biologia) (revision 46701409)
-Classificação científica (revision 46306288)
-Coleção Leverian (revision 45026647)
-Comores (revision 46181501)
+Papagaio-das-mascarenhas (revision 58875640)
+Albinismo (revision 60544601)
+Alfred Newton (revision 55613591)
+Alphonse Milne-Edwards (revision 55360216)
+Animalia (revision 59086849)
+Asa (revision 59016280)
+August von Pelzeln (revision 55658828)
+Aves (revision 59780941)
+Bico (revision 59270926)
+BirdLife International (revision 60296296)
+Carl Wilhelm Hahn (revision 58280895)
+Carlos Lineu (revision 60424490)
+Carolus Linnaeus (revision 60424490)
+Cauda (revision 56806253)
+Charles Lucien Bonaparte (revision 52587707)
+Chordata (revision 60632448)
+Cladograma (revision 55578666)
+Classe (biologia) (revision 56051821)
+Classificação científica (revision 59003514)
+Coleção Leverian (revision 49939876)
+Comores (revision 60033304)
Coracopsinae (revision 36946101)
-Coracopsis nigra (revision 44338845)
-Coracopsis vasa (revision 42905822)
-Cylindraspis indica (revision 42905410)
-Cúlmen (revision 45311553)
-Digital object identifier (revision 42172651)
-Eclectus roratus (revision 44380798)
-Edward Newton (revision 39261469)
-Endemismo (revision 45260961)
-Epíteto específico (revision 35101647)
-Espécie (revision 45685675)
-Esquilo-vermelho (revision 43489595)
-Estado de conservação (revision 46662839)
-Extinção (revision 46526607)
-Família (biologia) (revision 46636004)
-Filo (revision 46704246)
-França (revision 46740839)
+Coracopsis nigra (revision 49364496)
+Coracopsis vasa (revision 55904306)
+Cylindraspis indica (revision 55039606)
+Cúlmen (revision 59270926)
+Digital object identifier (revision 59704276)
+EBird (revision 54789725)
+Eclectus roratus (revision 60346158)
+Edward Newton (revision 52355291)
+Enciclopédia da Vida (revision 53360339)
+Endemismo (revision 59148596)
+Epíteto específico (revision 58254455)
+Espécie (revision 60480387)
+Esquilo-vermelho (revision 59084882)
+Estado de conservação (revision 60507425)
+Extinção (revision 60618960)
+Família (biologia) (revision 58605859)
+Filo (revision 58307920)
+Fossilworks Paleobiology Database (revision 60618977)
+França (revision 60657760)
François-Nicolas Martinet (revision 43679514)
-François Levaillant (revision 40142351)
-Fredrik Hasselqvist (revision 44381122)
-Fregilupus varius (revision 46555765)
-Fumigação (revision 42458244)
-George Robert Gray (revision 39047844)
-Georges-Louis Leclerc, conde de Buffon (revision 45622418)
-Género (biologia) (revision 45296588)
-Hermann Schlegel (revision 43137605)
-Herpetologista (revision 46207704)
-Histoire Naturelle (revision 44293456)
-Holótipo (revision 44029660)
-Ilha da Reunião (revision 45458206)
-Ilha vulcânica (revision 37924535)
-Ilhas Mascarenhas (revision 45858660)
-Ilhas Molucas (revision 45476933)
-International Standard Book Number (revision 46326494)
+François Levaillant (revision 49358726)
+Fredrik Hasselqvist (revision 52281786)
+Fregilupus varius (revision 54591191)
+Fumigação (revision 50600995)
+George Robert Gray (revision 60662109)
+Georges-Louis Leclerc, conde de Buffon (revision 53113664)
+Global Biodiversity Information Facility (revision 59909217)
+Género (biologia) (revision 60485207)
+Hermann Schlegel (revision 58280671)
+Herpetologista (revision 57406279)
+Histoire Naturelle (revision 50957493)
+Holótipo (revision 55228464)
+INaturalist (revision 54028036)
+ITIS (revision 59095296)
+IUCN (revision 58907792)
+Ilha da Reunião (revision 60519224)
+Ilha vulcânica (revision 59932533)
+Ilhas Mascarenhas (revision 60149877)
+Ilhas Molucas (revision 58541748)
+International Standard Book Number (revision 59096583)
Jacques Barraband (revision 45007769)
Jean Feuilley (revision 43140791)
-Johann Georg Wagler (revision 34585234)
-John Gerrard Keulemans (revision 39664498)
+Johann Georg Wagler (revision 58641840)
+John Gerrard Keulemans (revision 49649801)
Julian Hume (revision 41876605)
-Leiolopisma (revision 43997173)
-Lionel Walter Rothschild (revision 46022922)
-Lista Vermelha da IUCN (revision 46569884)
-Lista Vermelha da União Internacional para a Conservação da Natureza e dos Recursos Naturais (revision 46569884)
-Lista Vermelha de Espécies Ameaçadas da IUCN (revision 46569884)
-Lista de aves extintas (revision 45507420)
-Londres (revision 46310311)
-Língua inglesa (revision 46609785)
-Madagascar (revision 46617630)
+Leiolopisma (revision 49675967)
+Lionel Walter Rothschild (revision 60408276)
+Lista Vermelha da IUCN (revision 59379270)
+Lista Vermelha da União Internacional para a Conservação da Natureza e dos Recursos Naturais (revision 58907792)
+Lista Vermelha de Espécies Ameaçadas da IUCN (revision 59379270)
+Lista de aves extintas (revision 56678269)
+Londres (revision 60339639)
+Língua inglesa (revision 60421609)
+Madagascar (revision 60519261)
Mascarenotus grucheti (revision 43145662)
-Mathurin Jacques Brisson (revision 36018826)
-Maurício (revision 46723599)
-Maximiliano I José da Baviera (revision 46372080)
-Melanina (revision 46762903)
-Museu Nacional de História Natural (França) (revision 43731807)
-Naturhistorisches Museum (revision 46694247)
-Nesoenas duboisi (revision 43995805)
-Nome científico (revision 46671641)
-Nomenclatura binomial (revision 46671641)
-Nycticorax duboisi (revision 43816214)
-Nível do mar (revision 46414695)
-Ordem (biologia) (revision 46360024)
-Otto Finsch (revision 42362273)
-Papagaio (revision 46738207)
-Papagaio-cinzento (revision 46673943)
-Papagaio-cinzento-de-maurício (revision 46664408)
-Pedro Mascarenhas (c. 1484-1555) (revision 45541977)
-Periquito-de-maurício (revision 43010883)
-Periquito-de-reunião (revision 43048764)
-Peter Mundy (revision 43563846)
-Piton des Neiges (revision 45632497)
-Pleistoceno (revision 45916874)
-Plumagem (revision 34951058)
-Ponto quente (revision 45375495)
-Porphyrio coerulescens (revision 43672493)
-Praslin (revision 40728143)
-Psitacídeos (revision 46598835)
-Psittaciformes (revision 46598835)
-Psittacula (revision 42856453)
-Psittaculinae (revision 46760737)
-Psittaculini (revision 43015966)
-Psittrichasiidae (revision 44385977)
+Mathurin Jacques Brisson (revision 51922685)
+Maurício (revision 60625767)
+Maximiliano I José da Baviera (revision 58499194)
+Melanina (revision 59475698)
+Museu Nacional de História Natural (França) (revision 59928766)
+National Center for Biotechnology Information (revision 59213569)
+Naturhistorisches Museum (revision 51807264)
+Nesoenas duboisi (revision 57384381)
+Nome científico (revision 60480452)
+Nomenclatura binomial (revision 60480452)
+Nycticorax duboisi (revision 57384378)
+Nível do mar (revision 59494064)
+Ordem (biologia) (revision 56361837)
+Otto Finsch (revision 52466524)
+Papagaio (revision 60655174)
+Papagaio-cinzento (revision 59484957)
+Papagaio-cinzento-de-maurício (revision 58875653)
+Pedro Mascarenhas (c. 1484-1555) (revision 49518171)
+Periquito-de-maurício (revision 54615644)
+Periquito-de-reunião (revision 54615645)
+Peter Mundy (revision 58162914)
+Piton des Neiges (revision 57212555)
+Pleistoceno (revision 59637437)
+Plumagem (revision 56296594)
== End of Parsed pages ==
-- Wikipedia parsing ended at: 2016-09-20 23:47:27.346826
+- Wikipedia parsing ended at: 2021-03-16 19:59:19.802576
-51 characters appeared 558324 times.
+51 characters appeared 713201 times.
First 38 characters:
-[ 0] Char a: 11.864795351802895 %
-[ 1] Char e: 11.44604208309154 %
-[ 2] Char o: 9.868284365350585 %
-[ 3] Char s: 8.346587286235232 %
-[ 4] Char i: 7.118089138206489 %
-[ 5] Char r: 6.394136737808154 %
-[ 6] Char n: 5.568272186042513 %
-[ 7] Char d: 5.243192125002687 %
-[ 8] Char t: 4.80061756256224 %
-[ 9] Char m: 4.498105042949971 %
-[10] Char c: 3.9747530107965985 %
-[11] Char u: 3.7229279056605127 %
-[12] Char l: 3.207814817202914 %
-[13] Char p: 2.77562848811801 %
-[14] Char g: 1.3850380782484721 %
-[15] Char v: 1.3210967108703908 %
-[16] Char f: 1.122466524813549 %
-[17] Char b: 0.9702251739133549 %
-[18] Char h: 0.9130898904578704 %
-[19] Char é: 0.7026386112723079 %
-[20] Char ã: 0.7022803963290133 %
-[21] Char q: 0.5903382265494588 %
-[22] Char ç: 0.5856814322866293 %
-[23] Char í: 0.41391736697688086 %
-[24] Char x: 0.3913498255493226 %
-[25] Char á: 0.34567742027926435 %
-[26] Char z: 0.3170202248156984 %
-[27] Char ó: 0.22925756370852768 %
-[28] Char j: 0.20454073262120204 %
-[29] Char ê: 0.20239144296143458 %
-[30] Char õ: 0.16155493942585308 %
-[31] Char y: 0.15080849112701586 %
-[32] Char w: 0.09241945537000021 %
-[33] Char ú: 0.08794176857881804 %
-[34] Char k: 0.08364318925928313 %
-[35] Char â: 0.07898639499645367 %
-[36] Char à: 0.06859816164091102 %
-[37] Char ô: 0.031164700066627977 %
+[ 0] Char a: 11.984419539512704 %
+[ 1] Char e: 11.434925077222271 %
+[ 2] Char o: 9.885712442915812 %
+[ 3] Char s: 8.280835276450818 %
+[ 4] Char i: 7.116787553578866 %
+[ 5] Char r: 6.403664605069258 %
+[ 6] Char n: 5.615948379208667 %
+[ 7] Char d: 5.256442433479482 %
+[ 8] Char t: 4.736673111787561 %
+[ 9] Char m: 4.516118177063689 %
+[10] Char c: 3.973213722358774 %
+[11] Char u: 3.7191478979978996 %
+[12] Char l: 3.1644655573954608 %
+[13] Char p: 2.783647246708852 %
+[14] Char g: 1.3397345208433526 %
+[15] Char v: 1.3255730151808536 %
+[16] Char f: 1.1414734415683656 %
+[17] Char b: 0.9920064610116923 %
+[18] Char h: 0.868759297869745 %
+[19] Char ã: 0.7190118914583687 %
+[20] Char é: 0.6653103402827534 %
+[21] Char ç: 0.6455403175261952 %
+[22] Char q: 0.5922594051326344 %
+[23] Char í: 0.41138472884923044 %
+[24] Char x: 0.3736674513916834 %
+[25] Char á: 0.3452042271393338 %
+[26] Char z: 0.3241722880366124 %
+[27] Char ó: 0.2204147217965202 %
+[28] Char ê: 0.204150022223749 %
+[29] Char j: 0.2023272541681798 %
+[30] Char õ: 0.17863126944578034 %
+[31] Char y: 0.13222079049244184 %
+[32] Char ú: 0.08819393130407838 %
+[33] Char â: 0.08300605299207375 %
+[34] Char w: 0.08174413664591049 %
+[35] Char k: 0.07445306442363374 %
+[36] Char à: 0.06688156634665403 %
+[37] Char ô: 0.034492380128463083 %
-The first 38 characters have an accumulated ratio of 0.9998137282294869.
+The first 38 characters have an accumulated ratio of 0.9998261359700841.
-891 sequences found.
+929 sequences found.
-First 512 (typical positive ratio): 0.9953179582313172
-Next 512 (512-1024): 1.7910747164728723e-06
-Rest: 2.42861286636753e-17
+First 512 (typical positive ratio): 0.9952990712503466
+Next 512 (512-1024): 0.0008819393130407837
+Rest: -7.806255641895632e-18
-- Processing end: 2016-09-20 23:47:27.489355
+- Processing end: 2021-03-16 19:59:19.891534
diff --git a/script/BuildLangModelLogs/LangRomanianModel.log b/script/BuildLangModelLogs/LangRomanianModel.log
index 5d30cbc..c66f99f 100644
--- a/script/BuildLangModelLogs/LangRomanianModel.log
+++ b/script/BuildLangModelLogs/LangRomanianModel.log
@@ -1,153 +1,155 @@
= Logs of language model for Romanian (ro) =
- Generated by BuildLangModel.py
-- Started: 2016-09-28 18:53:56.086095
-- Maximum depth: 5
+- Started: 2021-03-16 19:59:20.080997
+- Maximum depth: 4
- Max number of pages: 100
== Parsed pages ==
-The Loving Kind (revision 10166481)
-12 ianuarie (revision 10711676)
-13 decembrie (revision 9938353)
-2007 (revision 10716321)
-2008 (revision 10752084)
-2009 (revision 10654003)
-21 noiembrie (revision 10447643)
-25 ianuarie (revision 10228199)
-31 ianuarie (revision 10718063)
-4 Music (revision 9701591)
-Billboard (revision 10505294)
+The Loving Kind (revision 12020391)
+12 ianuarie (revision 13977250)
+13 decembrie (revision 13958824)
+2007 (revision 13956975)
+2008 (revision 13894929)
+2009 (revision 13949957)
+21 noiembrie (revision 13705857)
+25 ianuarie (revision 13882659)
+31 ianuarie (revision 13887860)
+4 Music (revision 13955370)
+Billboard (revision 13092896)
Biology (revision 10112430)
-Bulgaria (revision 10481051)
-CD (revision 10477531)
-Call The Shots (revision 10101027)
-Call the Shots (revision 10101027)
-Can't Speak French (revision 9721506)
+Bulgaria (revision 13779617)
+CD (revision 13258410)
+Call The Shots (revision 13085752)
+Call the Shots (revision 13085752)
+Can't Speak French (revision 12018260)
Casă de discuri (revision 10611348)
-Channel 4 (revision 7953101)
-Chemistry (revision 10112479)
-Cheryl Cole (revision 10475016)
-Chitară (revision 10468266)
-Croația (revision 10737746)
-Dance (revision 10231736)
-Descărcare digitală (revision 10100743)
-Digital Spy (revision 9044016)
-Discografia Girls Aloud (revision 10172788)
-Estonia (revision 10749810)
-Europa (revision 10752724)
-Fascination Records (revision 9655292)
-Fiona Phillips (revision 5384082)
-Gen muzical (revision 10534645)
+Channel 4 (revision 13980413)
+Chemistry (revision 13003795)
+Cheryl Cole (revision 13707613)
+Chitară (revision 13704508)
+Croația (revision 13662573)
+Dance (revision 12713318)
+Descărcare digitală (revision 10785925)
+Digital Spy (revision 12038314)
+Discografia formației Girls Aloud (revision 13332557)
+Estonia (revision 13885094)
+Europa (revision 13985083)
+Fascination Records (revision 9653126)
+Gen muzical (revision 13743085)
Girls A Live (revision 10112444)
-Girls Aloud (revision 10112446)
-Good Morning Television (revision 10166481)
-Heat World (revision 10166481)
+Girls Aloud (revision 12017377)
+Good Morning Television (revision 13079309)
+Heat World (revision 12994549)
I'll Stand By You (cântec de Girls Aloud) (revision 10112432)
-ITunes (revision 10744174)
+ITunes (revision 13985408)
I Think We're Alone Now (revision 10112427)
-Irlanda (revision 10573806)
+Irlanda (revision 13830248)
+Jewels & Stone (revision 8842892)
Jump (cântec de Girls Aloud) (revision 10112438)
-Lady GaGa (revision 10753010)
+Lady GaGa (revision 13982113)
Life Got Cold (revision 10112437)
-Limba engleză (revision 10756676)
+Limba engleză (revision 13983069)
Long Hot Summer (revision 10112429)
Love Machine (revision 10112433)
-MSN Search (revision 10653298)
-MTV (revision 10170766)
+MSN Search (revision 13651565)
+MTV (revision 12996766)
Mixed Up (revision 10112443)
-Muzică electronică (revision 10608432)
-Muzică pop (revision 10740529)
+Muzică electronică (revision 13450013)
+Muzică pop (revision 13648051)
Nadine Coyle (revision 10316187)
-Neil Tennant (revision 10499980)
+Neil Tennant (revision 13355922)
No Good Advice (revision 10112436)
Out Of Control (revision 10112484)
Out of Control (revision 10112484)
-Pet Shop Boys (revision 10612741)
-Poker Face (revision 10496402)
-PopJustice (revision 10625677)
-Regatul Unit (revision 10752338)
-Regatul Unit al Marii Britanii și Irlandei de Nord (revision 10752338)
-Regatul Unit al Marii Britanii și al Irlandei de Nord (revision 10752338)
-Republica Irlanda (revision 10573806)
-Romanian Top 100 (revision 10736281)
-România (revision 10732435)
-Sarah Harding (revision 10633651)
-Sarah Hearding (revision 10112425)
+Pet Shop Boys (revision 13165657)
+Poker Face (revision 13083515)
+PopJustice (revision 12061987)
+Regatul Unit (revision 13957992)
+Regatul Unit al Marii Britanii și Irlandei de Nord (revision 13957992)
+Regatul Unit al Marii Britanii și al Irlandei de Nord (revision 13957992)
+Republica Irlanda (revision 13830248)
+Romanian Top 100 (revision 13882522)
+România (revision 13906545)
+Sarah Harding (revision 10139259)
+Sarah Hearding (revision 12017812)
See the Day (revision 10112431)
-Sexy! No No No... (revision 10112425)
-Slant Magazine (revision 7697473)
-Slovenia (revision 10521499)
+Sexy! No No No... (revision 12017812)
+Slant Magazine (revision 12008416)
+Slovenia (revision 13726273)
Something Kinda Ooooh (revision 10112426)
Sound of the Underground (album) (revision 10112476)
Sound of the Underground (cântec) (revision 10112434)
-Tangled Up (revision 10112482)
-The Guardian (revision 9752334)
-The Paul O'Grady Show (revision 10101027)
-The Promise (revision 10166482)
+Tangled Up (revision 13010794)
+The Guardian (revision 12369330)
+The Paul O'Grady Show (revision 12720320)
+The Promise (revision 12178852)
The Show (revision 10112441)
The Sound of Girls Aloud (revision 10112480)
-Tonalitate (revision 9966362)
-Turneul Out of Control (revision 10112446)
-UK Mix (revision 9721468)
+Times Online (revision 12014967)
+Tonalitate (revision 12509051)
+Turneul Out of Control (revision 10112484)
+UK Mix (revision 13757304)
UK Singles Chart (revision 10226705)
-Ungaria (revision 10737745)
-Uniunea Europeană (revision 10751590)
-Untouchable (revision 10112410)
+Ungaria (revision 13960307)
+Uniunea Europeană (revision 13689726)
+Untouchable (revision 12020867)
+Utah Saints (revision 12270967)
Wake Me Up (revision 10112439)
What Will The Neighbours Say? (revision 10112478)
-Whole Lotta History (revision 10475020)
-Wideboys (revision 10166481)
-Wikimedia Commons (revision 9703907)
-Xenomania (revision 10112484)
+Whole Lotta History (revision 12369785)
+Wideboys (revision 12030035)
+Wikimedia Commons (revision 13278756)
+Xenomania (revision 12020867)
== End of Parsed pages ==
-- Wikipedia parsing ended at: 2016-09-28 18:58:13.756622
+- Wikipedia parsing ended at: 2021-03-16 20:04:01.198792
-60 characters appeared 883554 times.
+63 characters appeared 1198090 times.
First 33 characters:
-[ 0] Char e: 11.67014127036944 %
-[ 1] Char i: 10.97567324690964 %
-[ 2] Char a: 10.080198833348046 %
-[ 3] Char r: 7.490657050955572 %
-[ 4] Char n: 7.18246988865423 %
-[ 5] Char t: 6.516296683620921 %
-[ 6] Char l: 5.595130574928075 %
-[ 7] Char u: 5.551217016730161 %
-[ 8] Char o: 4.922732509840938 %
-[ 9] Char c: 4.495707110148333 %
-[10] Char s: 3.8308920563994957 %
-[11] Char d: 3.590499279048027 %
-[12] Char m: 2.971408651876399 %
-[13] Char p: 2.902369294915761 %
-[14] Char ă: 2.1349006399156134 %
-[15] Char g: 1.2248261000459508 %
-[16] Char f: 1.1199089133205216 %
-[17] Char b: 1.0781457613230203 %
-[18] Char ț: 1.0323081554721047 %
-[19] Char ș: 0.9732285745975912 %
-[20] Char î: 0.97017273420753 %
-[21] Char v: 0.9693804792915882 %
-[22] Char z: 0.7369102510995367 %
-[23] Char h: 0.533413916976212 %
-[24] Char â: 0.4986678799484808 %
-[25] Char x: 0.22081276300033725 %
-[26] Char j: 0.20055367300696958 %
-[27] Char k: 0.1901411798260208 %
-[28] Char y: 0.15471606715605385 %
-[29] Char w: 0.11827234102273318 %
-[30] Char á: 0.016297815413658927 %
-[31] Char é: 0.013355154297303842 %
-[32] Char q: 0.00520624659047438 %
+[ 0] Char e: 11.456985702242736 %
+[ 1] Char i: 11.0956605931107 %
+[ 2] Char a: 10.273852548639919 %
+[ 3] Char r: 7.454949127361049 %
+[ 4] Char n: 7.243779682661569 %
+[ 5] Char t: 6.464122060947007 %
+[ 6] Char l: 5.642480948843576 %
+[ 7] Char u: 5.4753816491248575 %
+[ 8] Char o: 4.928594679865453 %
+[ 9] Char c: 4.4603493894448665 %
+[10] Char s: 3.768080862038745 %
+[11] Char d: 3.7479655117729047 %
+[12] Char m: 2.9085461025465533 %
+[13] Char p: 2.8108906676460035 %
+[14] Char ă: 2.1405737465465866 %
+[15] Char g: 1.262509494278393 %
+[16] Char f: 1.0879817042125384 %
+[17] Char b: 1.0721231293141584 %
+[18] Char ț: 1.016534650986153 %
+[19] Char ș: 1.0140306654758826 %
+[20] Char v: 0.9768882137402032 %
+[21] Char î: 0.9654533465766345 %
+[22] Char z: 0.7075428390187716 %
+[23] Char h: 0.5414451335041608 %
+[24] Char â: 0.45664349088966605 %
+[25] Char x: 0.22627682394477877 %
+[26] Char j: 0.22452403408758942 %
+[27] Char k: 0.20132043502574934 %
+[28] Char y: 0.16918595431061106 %
+[29] Char w: 0.12970644943201262 %
+[30] Char á: 0.012937258469730987 %
+[31] Char é: 0.012019130449298466 %
+[32] Char q: 0.007428490347135858 %
-The first 33 characters have an accumulated ratio of 0.9996661211425673.
+The first 33 characters have an accumulated ratio of 0.9995676451685602.
-981 sequences found.
+1066 sequences found.
-First 512 (typical positive ratio): 0.997762564143313
-Next 512 (512-1024): 1.1317927370596478e-06
-Rest: 3.0357660829594124e-18
+First 512 (typical positive ratio): 0.9975318123681904
+Next 512 (512-1024): 0.01016534650986153
+Rest: 4.3355868061878584e-05
-- Processing end: 2016-09-28 18:58:13.862425
+- Processing end: 2021-03-16 20:04:01.293047
diff --git a/script/BuildLangModelLogs/LangSlovakModel.log b/script/BuildLangModelLogs/LangSlovakModel.log
index 2c4902e..4dc3fe5 100644
--- a/script/BuildLangModelLogs/LangSlovakModel.log
+++ b/script/BuildLangModelLogs/LangSlovakModel.log
@@ -1,158 +1,156 @@
= Logs of language model for Slovak (sk) =
- Generated by BuildLangModel.py
-- Started: 2016-09-21 13:26:28.712674
-- Maximum depth: 5
+- Started: 2021-03-16 20:04:01.478267
+- Maximum depth: 4
- Max number of pages: 100
== Parsed pages ==
-Dôkaz (matematika) (revision 6358810)
-1825 (revision 6122752)
-1839 (revision 6165808)
-1847 (revision 5941780)
-1852 (revision 5941777)
-1878 (revision 6221358)
-1955 (revision 6226609)
-1976 (revision 6310709)
-1983 (revision 6356952)
-1993 (revision 6348358)
-1995 (revision 6277350)
-2012 (revision 6291145)
-Adrien-Marie Legendre (revision 6060342)
-Algebra (revision 6319238)
+Dôkaz (matematika) (revision 7170221)
+1825 (revision 6937105)
+1839 (revision 6804159)
+1847 (revision 7167629)
+1852 (revision 6923466)
+1878 (revision 7159904)
+1955 (revision 7061181)
+1976 (revision 7100059)
+1983 (revision 7174204)
+1993 (revision 7122277)
+1995 (revision 7133683)
+2012 (revision 7135523)
+Adrien-Marie Legendre (revision 6556308)
Algebraická geometria (revision 5964212)
-Algebraická rovnica (revision 5288111)
-Algebrické číslo (revision 6106622)
-Algoritmus (revision 6286937)
-Andrew Wiles (revision 5791970)
-Arabi (revision 6044956)
-Arabčina (revision 6322514)
-Aristoteles (revision 6359959)
+Algebraická rovnica (revision 6586551)
+Algebrické číslo (revision 6382942)
+Algoritmus (revision 7100698)
+Andrew Wiles (revision 6813255)
+Arabi (revision 7124298)
+Arabčina (revision 7148041)
+Aristoteles (revision 7150270)
Arthur Cayley (revision 6332355)
-Axióma (revision 6338092)
-Babylonia (revision 6168813)
-Bernard Bolzano (revision 6261374)
-Boh (revision 6282272)
-Bolzanova veta (revision 6345299)
-Bytie (revision 5274918)
-Byzantská ríša (revision 6359782)
-Caroline Blundenová (revision 6358810)
+Axióma (revision 7073489)
+Babylonia (revision 6432954)
+Bernard Bolzano (revision 6903631)
+Boh (revision 7166677)
+Bolzanova veta (revision 6852875)
+Bytie (revision 6569833)
+Byzantská ríša (revision 7168566)
+Caroline Blundenová (revision 7170221)
Cauchyho postupnosť (revision 6215169)
-Celé číslo (revision 6302805)
-Charles Hermite (revision 5751036)
-Daniel Marcus (revision 5657431)
+Celé číslo (revision 7047567)
+Charles Hermite (revision 6412828)
+Daniel Marcus (revision 5291472)
David Hilbert (revision 5968866)
Dedukcia (revision 6338099)
-Definícia (revision 6106684)
-Derivácia (funkcia) (revision 5970574)
-Desiatková číselná sústava (revision 5924486)
-Diofantická rovnica (revision 6327292)
-Dynastia Chan (revision 6342042)
+Definícia (revision 6965423)
+Derivácia (funkcia) (revision 7014993)
+Desiatková číselná sústava (revision 7047888)
+Diofantická rovnica (revision 6060359)
+Dynastia Chan (revision 7025657)
Dôkaz (logika) (revision 5495754)
-Dôkaz sporom (revision 5940134)
-Dôkaz výpočtom (revision 6358810)
-Energia (revision 6277761)
+Dôkaz sporom (revision 7051518)
+Energia (revision 6975312)
Eric Weisstein (revision 6054413)
Ernst Kummer (revision 6001344)
-Európa (revision 6295124)
+Európa (revision 7164742)
Experiment (revision 6354302)
-Fenomén (filozofia) (revision 5420897)
-Filozofia (revision 6296369)
+Fenomén (filozofia) (revision 6558128)
+Filozofia (revision 6942330)
Formula (logika) (revision 3916562)
-Formálny dôkaz (revision 6358810)
-Formálny jazyk (revision 5623029)
-Gabriel Cramer (revision 5923903)
-Galoisova teória (revision 6353573)
-Gentzenovský kalkul (revision 6358810)
-Geometria (revision 5970028)
-Geometrický dôkaz (revision 6358810)
-Georg Ferdinand Cantor (revision 6186696)
-Giordano Bruno (revision 6312876)
-Gottlob Frege (revision 5968855)
-Gödelova veta o neúplnosti (revision 5323549)
-Hardvér (revision 6214401)
-Henri Poincaré (revision 6315506)
-Hilbertovský kalkul (revision 6358810)
-Hmotnosť (revision 5979540)
-Hypotéza (revision 5983410)
-Idea (revision 5960449)
-India (revision 6362189)
+Formálny dôkaz (revision 7170221)
+Formálny jazyk (revision 6505890)
+Gabriel Cramer (revision 7068001)
+Galoisova teória (revision 6749172)
+Gentzenovský kalkul (revision 7170221)
+Geometria (revision 7010499)
+Geometrický dôkaz (revision 7170221)
+Georg Ferdinand Cantor (revision 6697670)
+Giordano Bruno (revision 7072808)
+Gottlob Frege (revision 6580699)
+Gödelova veta o neúplnosti (revision 6968373)
+Hardvér (revision 6946820)
+Henri Poincaré (revision 6830074)
+Hilbertovský kalkul (revision 7170221)
+Hmotnosť (revision 7021343)
+Hypotéza (revision 6850461)
+Idea (revision 6113421)
+India (revision 6976622)
Intuícia (revision 5837951)
-Jazyk (lingvistika) (revision 6073293)
-John Taylor (revision 6355518)
-Kardinálne číslo (revision 6090126)
+Jazyk (lingvistika) (revision 6462864)
+John Taylor (revision 6741201)
+Kardinálne číslo (revision 7154031)
Kenneth Appel (revision 5968422)
Klasická mechanika (revision 6295646)
-Konečná množina (revision 5276494)
-Konfucianizmus (revision 5968816)
-Kresťanstvo (revision 6289571)
-Langlandsov program (revision 6088475)
-Latinčina (revision 6121105)
-Leonhard Euler (revision 6339382)
-Lineárna algebra (revision 5473535)
+Konečná množina (revision 6850487)
+Konfucianizmus (revision 6948500)
+Kresťanstvo (revision 7150939)
+Latinčina (revision 7110742)
+Leonhard Euler (revision 7016638)
+Lineárna algebra (revision 6564030)
Logická axióma (revision 5495754)
Logický kalkul (revision 1608550)
== End of Parsed pages ==
-- Wikipedia parsing ended at: 2016-09-21 13:33:10.330458
+- Wikipedia parsing ended at: 2021-03-16 20:13:09.022092
-62 characters appeared 550293 times.
+64 characters appeared 535286 times.
-First 45 characters:
-[ 0] Char o: 8.867094438780795 %
-[ 1] Char a: 8.59705647718579 %
-[ 2] Char e: 8.562347694773512 %
-[ 3] Char n: 6.0867574183207855 %
-[ 4] Char i: 5.828531346028389 %
-[ 5] Char t: 5.366595613609477 %
-[ 6] Char r: 4.977711873492848 %
-[ 7] Char k: 4.264273759615332 %
-[ 8] Char s: 4.257731790155426 %
-[ 9] Char v: 4.117079446767449 %
-[10] Char l: 3.5979014815743615 %
-[11] Char d: 3.416361829061972 %
-[12] Char m: 3.2513588215732345 %
-[13] Char p: 2.878466562358598 %
-[14] Char u: 2.5987973679476206 %
-[15] Char c: 2.419438371921867 %
-[16] Char z: 2.127412124086623 %
-[17] Char h: 2.0687161203213558 %
-[18] Char j: 2.0312815173007834 %
-[19] Char y: 1.6700194260148686 %
-[20] Char b: 1.6574806512167153 %
-[21] Char á: 1.6422160558102683 %
-[22] Char ý: 1.2564215790497062 %
-[23] Char í: 1.1326693234331529 %
-[24] Char č: 0.9473135220691523 %
-[25] Char é: 0.8913433389121795 %
-[26] Char ž: 0.7668641978000811 %
-[27] Char ú: 0.6949025337411161 %
-[28] Char š: 0.6785476100913513 %
-[29] Char f: 0.6514711253822963 %
-[30] Char g: 0.6096752093884531 %
-[31] Char ť: 0.46375294615777407 %
-[32] Char ô: 0.4172322744428877 %
-[33] Char ľ: 0.36053520579036985 %
-[34] Char x: 0.23114958758334195 %
-[35] Char ó: 0.2251527822450949 %
-[36] Char ň: 0.09304134342977287 %
-[37] Char w: 0.09013380144759246 %
-[38] Char ä: 0.0694175648245571 %
-[39] Char ď: 0.06560141597294532 %
-[40] Char q: 0.01726353051919614 %
-[41] Char ě: 0.009994675563745132 %
-[42] Char ĺ: 0.009267790068200032 %
-[43] Char ö: 0.008904347320427481 %
-[44] Char ŕ: 0.00599680533824708 %
+First 46 characters:
+[ 0] Char o: 8.787265125559047 %
+[ 1] Char a: 8.624174740232323 %
+[ 2] Char e: 8.577470735270492 %
+[ 3] Char n: 6.100103496074995 %
+[ 4] Char i: 5.884891441210867 %
+[ 5] Char t: 5.302772723366575 %
+[ 6] Char r: 5.02273550961542 %
+[ 7] Char s: 4.340670221152805 %
+[ 8] Char k: 4.253240323864252 %
+[ 9] Char v: 4.073896944810811 %
+[10] Char l: 3.6208680966810265 %
+[11] Char d: 3.3796886150581185 %
+[12] Char m: 3.248356953105443 %
+[13] Char p: 2.8470761424733695 %
+[14] Char u: 2.6178528861206907 %
+[15] Char c: 2.426740097816868 %
+[16] Char z: 2.104856095619912 %
+[17] Char h: 2.080570013039758 %
+[18] Char j: 2.0389100406138025 %
+[19] Char á: 1.675926514050433 %
+[20] Char b: 1.6690143213160817 %
+[21] Char y: 1.6607944164427988 %
+[22] Char ý: 1.2490519086992748 %
+[23] Char í: 1.1096871578931637 %
+[24] Char č: 0.9322119390381964 %
+[25] Char é: 0.8785957413420117 %
+[26] Char ž: 0.7489454235679618 %
+[27] Char ú: 0.702615050645823 %
+[28] Char f: 0.6794498641847535 %
+[29] Char š: 0.6790762321450589 %
+[30] Char g: 0.6219105300717748 %
+[31] Char ť: 0.4550838243481055 %
+[32] Char ô: 0.38428055282596596 %
+[33] Char ľ: 0.3648516867618432 %
+[34] Char ó: 0.23090460053130477 %
+[35] Char x: 0.22922325635267876 %
+[36] Char ň: 0.09434209002290364 %
+[37] Char w: 0.08855079340763629 %
+[38] Char ä: 0.07005600744275023 %
+[39] Char ď: 0.06706695112519288 %
+[40] Char q: 0.018121153925191393 %
+[41] Char ĺ: 0.010274881091603367 %
+[42] Char ě: 0.010274881091603367 %
+[43] Char ö: 0.010088065071756034 %
+[44] Char ř: 0.007285824774046024 %
+[45] Char ŕ: 0.006351744674809354 %
-The first 45 characters have an accumulated ratio of 0.9998128269848972.
+The first 46 characters have an accumulated ratio of 0.9998617561453131.
-1181 sequences found.
+1198 sequences found.
-First 512 (typical positive ratio): 0.9733303573968434
-Next 512 (512-1024): 1.8172137388627513e-06
-Rest: 0.0003522983638913346
+First 512 (typical positive ratio): 0.9724967373205526
+Next 512 (512-1024): 0.007489454235679618
+Rest: 0.00042527339003644096
-- Processing end: 2016-09-21 13:33:10.831531
+- Processing end: 2021-03-16 20:13:09.628753
diff --git a/script/BuildLangModelLogs/LangSloveneModel.log b/script/BuildLangModelLogs/LangSloveneModel.log
index e494190..9ec9020 100644
--- a/script/BuildLangModelLogs/LangSloveneModel.log
+++ b/script/BuildLangModelLogs/LangSloveneModel.log
@@ -1,148 +1,146 @@
= Logs of language model for Slovene (sl) =
- Generated by BuildLangModel.py
-- Started: 2016-09-28 22:00:35.243966
-- Maximum depth: 5
+- Started: 2021-03-16 20:13:09.868611
+- Maximum depth: 4
- Max number of pages: 100
== Parsed pages ==
-XCOM: Enemy Unknown (revision 4704271)
-1UP.com (revision 4547348)
+XCOM: Enemy Unknown (revision 5360018)
+1UP.com (revision 5138164)
2K Games (revision 4110089)
-Android (operacijski sistem) (revision 4619359)
-Animator videoigre (revision 4702643)
-App Store (revision 3903089)
-Artefakt (revision 4484504)
-Athlon (revision 4524746)
-Avstralazija (revision 4623530)
-Avtopsija (revision 4541344)
-Bralno-pisalni pomnilnik (revision 4256388)
-Civilization (serija) (revision 4645770)
-Deus Ex: Human Revolution (revision 4694860)
-Digitalna distribucija (revision 4696215)
+Android (operacijski sistem) (revision 5423518)
+Animator videoigre (revision 5438736)
+App Store (revision 4916505)
+Artefakt (revision 4871634)
+Athlon (revision 5138170)
+Avstralazija (revision 5234981)
+Avtopsija (revision 5394899)
+Bralno-pisalni pomnilnik (revision 5307992)
+Civilization (serija) (revision 5138157)
+Deus Ex: Human Revolution (revision 5312201)
DirectX (revision 4477913)
-Dishonored (revision 4619444)
-Edge (magazine) (revision 4690049)
-Electronic Entertainment Expo (revision 4538691)
-Enoigralska videoigra (revision 4610359)
-Eurogamer (revision 4694860)
-Evropa (revision 4687833)
+Dishonored (revision 5359830)
+Edge (magazine) (revision 5356455)
+Enoigralska videoigra (revision 5116872)
+Eurogamer (revision 5312201)
+Evropa (revision 5448355)
Fantasy Flight Games (revision 4649361)
Firaxis Games (revision 4110089)
+Francoska narodna knjižnica (revision 4596643)
GameRankings (revision 3934020)
-GameSpot (revision 4238015)
-GameSpy (revision 4538691)
-GameTrailers (revision 4704271)
-Game Informer (revision 4704271)
-GamesTM (revision 4704271)
-Grafična kartica (revision 4257980)
-Granata (revision 3859332)
-Holograf (revision 4477482)
-IGN (revision 4576233)
-IOS (revision 4597264)
-Igra igranja vlog (revision 4642276)
-Igra na deski (revision 4649363)
-Igralna konzola (revision 4649866)
-Igralni pogon (revision 4622773)
-Intel (revision 4626025)
-International Standard Book Number (revision 4015087)
-Izdelovalec videoigre (revision 3851747)
-Joker (revija) (revision 3867772)
+GameSpot (revision 5116871)
+GameSpy (revision 5168684)
+GameTrailers (revision 5298120)
+Game Informer (revision 5360018)
+GamesTM (revision 5360018)
+Grafična kartica (revision 5374734)
+Granata (revision 4837685)
+Holografija (revision 4760425)
+IGN (revision 5370204)
+IOS (revision 5404204)
+Igra igranja vlog (revision 4768087)
+Igra na deski (revision 5431955)
+Igralna konzola (revision 4773547)
+Igralni pogon (revision 4771045)
+Intel (revision 5366957)
+International Standard Book Number (revision 4765322)
+Izdelovalec videoigre (revision 5438736)
+Joker (revija) (revision 5351778)
Kotaku (revision 4613535)
-Kristal (revision 4156234)
-Linux (revision 4524740)
-Lovec prestreznik (revision 4102792)
-MTV (revision 4621758)
-Mac OS X (revision 4601645)
-Machinima (revision 4601716)
-Major (revision 4245802)
-Mednarodna različica (revision 4116054)
+Kristal (revision 5068718)
+Linux (revision 5457231)
+Lovec prestreznik (revision 4758667)
+MTV (revision 5406174)
+Mac OS X (revision 5212452)
+Machinima (revision 5295004)
+Major (revision 4758895)
+Mednarodna različica (revision 5032649)
Metacritic (revision 3934020)
-Michael McCann (skladatelj) (revision 4694860)
-MicroProse (revision 4382810)
-Microsoft Windows (revision 4691357)
-Nezemeljsko življenje (revision 4620576)
-NowGamer (revision 4704271)
-OS X (revision 4601645)
+Michael McCann (skladatelj) (revision 5312201)
+MicroProse (revision 5116826)
+Microsoft Windows (revision 5460799)
+Možje v črnem (revision 5262890)
+Nezemeljsko življenje (revision 5386002)
+Normativna kontrola (revision 5316351)
+NowGamer (revision 5363253)
+OS X (revision 5212452)
Ognjena ekipa (revision 4694450)
-Operacijski sistem (revision 4698515)
-Ostrostrelec (revision 4529694)
-Pilot (revision 4069093)
-PlayStation 3 (revision 4382944)
-PlayStation Network (revision 4382944)
-PlayStation Vita (revision 3944025)
-Pogon igre (revision 4622773)
-Procesor (revision 4702518)
-Producent videoiger (revision 4599904)
-Razvijalec videoiger (revision 4093281)
-Računalniška miška (revision 4385579)
-Računalniška platforma (revision 4673669)
-Severna Amerika (revision 4643798)
+Operacijski sistem (revision 5309675)
+Ostrostrelec (revision 4810396)
+Pilot (revision 4758828)
+PlayStation 3 (revision 5245525)
+PlayStation Network (revision 4784984)
+PlayStation Vita (revision 5245581)
+Procesor (revision 5262718)
+Producent videoiger (revision 5368686)
+Razvijalec videoiger (revision 5171689)
+Računalniška miška (revision 5169871)
+Računalniško okolje (revision 5250619)
+Severna Amerika (revision 5400891)
Sid Meier (revision 4061487)
Stealth (revision 4618630)
-Steam (revision 4696215)
-Strateška videoigra (revision 4236795)
-Tablični računalnik (revision 4409985)
-Take-Two Interactive (revision 4110089)
-Telepatija (revision 4481192)
-The Bureau: XCOM Declassified (revision 4704271)
-The Guardian (revision 3929479)
-Trdi disk (revision 4644623)
-UFO: Enemy Unknown (revision 4704271)
-Unreal Engine (revision 4622773)
-Unreal Engine 3 (revision 4622773)
-Uporabniški vmesnik (revision 4552473)
-Valve Corporation (revision 4110105)
+Steam (revision 5171704)
+Strateška videoigra (revision 5245834)
+Tablični računalnik (revision 5312221)
+Telepatija (revision 4846742)
+The Bureau: XCOM Declassified (revision 5360018)
+The Guardian (revision 5361337)
+Trdi disk (revision 5329681)
+UFO: Enemy Unknown (revision 5360018)
+Unreal Engine (revision 4771045)
+Unreal Engine 3 (revision 4771045)
+Uporabniški vmesnik (revision 5118420)
+Valve Corporation (revision 5168680)
Večigralska videoigra (revision 4618639)
-VideoGamer.com (revision 4704271)
-Vohunski satelit (revision 4215166)
-Vojaška taktika (revision 3970259)
-Vojaški čini (revision 4363026)
+VideoGamer.com (revision 5363253)
+Vohunski satelit (revision 5450401)
+Vojaška taktika (revision 4759159)
== End of Parsed pages ==
-- Wikipedia parsing ended at: 2016-09-28 22:06:46.133919
+- Wikipedia parsing ended at: 2021-03-16 20:20:05.416719
-41 characters appeared 411226 times.
+41 characters appeared 318060 times.
First 29 characters:
-[ 0] Char a: 10.090315301075321 %
-[ 1] Char e: 9.90477255815537 %
-[ 2] Char i: 9.666703953543793 %
-[ 3] Char o: 9.177921629468953 %
-[ 4] Char n: 7.28309980400072 %
-[ 5] Char r: 5.808241696779873 %
-[ 6] Char s: 4.575586174025961 %
-[ 7] Char t: 4.4963110309173056 %
-[ 8] Char j: 4.343840126840229 %
-[ 9] Char l: 4.2672399118732764 %
-[10] Char v: 3.802775116359374 %
-[11] Char p: 3.5216644861949393 %
-[12] Char k: 3.5136397017698293 %
-[13] Char d: 3.0387183689747244 %
-[14] Char m: 2.9487435132992563 %
-[15] Char z: 2.350775485985808 %
-[16] Char u: 1.9719083910064055 %
-[17] Char g: 1.9342162217369525 %
-[18] Char b: 1.5392995579073308 %
-[19] Char c: 1.2924766430138173 %
-[20] Char h: 1.1864522184881305 %
-[21] Char č: 1.137087635509428 %
-[22] Char š: 0.6932927392723223 %
-[23] Char ž: 0.45303555709026183 %
-[24] Char f: 0.40707542811009034 %
-[25] Char x: 0.19381070263067024 %
-[26] Char y: 0.19040624863213904 %
-[27] Char w: 0.18919037220409216 %
-[28] Char q: 0.011186063138031156 %
+[ 0] Char a: 10.018235553040308 %
+[ 1] Char e: 9.988995786958435 %
+[ 2] Char i: 9.602590706156072 %
+[ 3] Char o: 9.246054203609381 %
+[ 4] Char n: 7.188580770923725 %
+[ 5] Char r: 5.758976293781048 %
+[ 6] Char s: 4.588442432245488 %
+[ 7] Char t: 4.5786958435515315 %
+[ 8] Char l: 4.357668364459536 %
+[ 9] Char j: 4.260202477519965 %
+[10] Char v: 3.809344148902723 %
+[11] Char p: 3.4980821228698984 %
+[12] Char k: 3.4751304785260646 %
+[13] Char d: 3.143746462931522 %
+[14] Char m: 2.928692699490662 %
+[15] Char z: 2.332893164811671 %
+[16] Char u: 1.9908193422624663 %
+[17] Char g: 1.9298245614035088 %
+[18] Char b: 1.5607118153807458 %
+[19] Char c: 1.2903225806451613 %
+[20] Char h: 1.2145507137018172 %
+[21] Char č: 1.1001068980695468 %
+[22] Char š: 0.6841476450984091 %
+[23] Char ž: 0.44331258253159783 %
+[24] Char f: 0.4203609381877633 %
+[25] Char w: 0.2021631138778847 %
+[26] Char y: 0.19618939822674966 %
+[27] Char x: 0.16726403823178018 %
+[28] Char q: 0.011004213041564485 %
-The first 29 characters have an accumulated ratio of 0.9998978663800442.
+The first 29 characters have an accumulated ratio of 0.9998710935043701.
-727 sequences found.
+698 sequences found.
-First 512 (typical positive ratio): 0.9983524317161332
-Next 512 (512-1024): 2.4317528560937295e-06
-Rest: -3.859759734048396e-17
+First 512 (typical positive ratio): 0.998296272473889
+Next 512 (512-1024): 0.004433125825315978
+Rest: -2.8189256484623115e-17
-- Processing end: 2016-09-28 22:06:46.601266
+- Processing end: 2021-03-16 20:20:05.900813
diff --git a/script/BuildLangModelLogs/LangSwedishModel.log b/script/BuildLangModelLogs/LangSwedishModel.log
index 029e510..26104e1 100644
--- a/script/BuildLangModelLogs/LangSwedishModel.log
+++ b/script/BuildLangModelLogs/LangSwedishModel.log
@@ -1,151 +1,150 @@
= Logs of language model for Swedish (sv) =
- Generated by BuildLangModel.py
-- Started: 2016-09-28 22:26:37.221506
-- Maximum depth: 5
+- Started: 2021-03-16 20:20:06.144954
+- Maximum depth: 4
- Max number of pages: 100
== Parsed pages ==
-Kakapo (revision 36509929)
-Akut hotad (revision 32517788)
-Aotearoa (revision 36575359)
-Art (revision 36771341)
-Artepitet (revision 36771341)
-Auckland (revision 35752058)
-Auktorsnamn (revision 35976965)
-BBC (revision 36508743)
-Basalomsättning (revision 30567523)
-Beilschmiedia tawa (revision 29101923)
-Berguv (revision 36295501)
-Betesmark (revision 34292168)
-Biotop (revision 35528052)
-BirdLife International (revision 36124283)
-Bonaparte (revision 37325183)
-British Museum (revision 36420244)
-Bröstben (revision 30602527)
-Dacrydium cupressinum (revision 32986501)
-Digital object identifier (revision 27637223)
-Djur (revision 37300775)
-Djurpark (revision 37147093)
-Domän (biologi) (revision 33377709)
-Don Merton (revision 36509929)
-Douglas Adams (revision 36556245)
-Däggdjur (revision 37328286)
-Ekologisk nisch (revision 33898643)
-Ekosystem (revision 36598266)
-Endemisk (revision 30647109)
-Eukaryoter (revision 37095313)
-Evolution (revision 37093592)
-Familj (biologi) (revision 30280200)
-Femininum (revision 30597527)
-Fjäder (biologi) (revision 36364943)
-Fjäderdräkt (revision 36364943)
-Fladdermöss (revision 37307257)
-Flygg (revision 36479633)
-Frukter (revision 34088588)
-Frö (revision 37333131)
-Fågelläte (revision 34034723)
-Fåglar (revision 37387306)
-Fåglarnas liv (revision 36509929)
-Genitiv (revision 37388438)
-George Edward Grey (revision 36509929)
-George Robert Gray (revision 20426710)
-Haasts örn (revision 29175076)
-Hauturu/Little Barrier Island (revision 36509929)
-Hermelin (revision 36578682)
-Hertz (revision 37104488)
-Hjortdjur (revision 36493550)
-Hund (revision 37351832)
-Husdjur (revision 37384850)
-Huskatt (revision 32922967)
-Hāngi (revision 29609696)
-IUCN (revision 30570280)
-Iller (revision 30663158)
-Infraröd (revision 36770733)
-Internationella naturvårdsunionen (revision 30570280)
-Jordbruk (revision 37352625)
-Kahurangi National Park (revision 35956142)
-Kamouflage (revision 36579595)
-Kaniner (revision 36877621)
-Kapiti Island (revision 37395588)
-Katt (revision 36734686)
-Kelp (revision 30312471)
-Kivier (revision 36373234)
-Klass (biologi) (revision 30280201)
-Kroppsfett (revision 35066611)
-Könsdimorfism (revision 30816932)
-Könsfördelning (revision 24769321)
-Lamm- och fårkött (revision 36187205)
+Kakapo (revision 48946696)
+Akut hotad (revision 45694757)
+Aotearoa (revision 48764847)
+Arkive (revision 45404194)
+Art (revision 48819963)
+Artepitet (revision 48819963)
+Auckland (revision 48740415)
+Auktorsnamn (revision 46648298)
+BBC (revision 48945370)
+Basalomsättning (revision 48638233)
+Beilschmiedia tawa (revision 47662851)
+Berguv (revision 47572081)
+Betesmark (revision 47837257)
+Biodiversity Heritage Library (revision 48152021)
+Biotop (revision 48969696)
+BirdLife International (revision 47616784)
+British Museum (revision 48501908)
+Bröstben (revision 48379566)
+CITES (revision 47938046)
+Dacrydium cupressinum (revision 47442085)
+Digital object identifier (revision 47511062)
+Djur (revision 48964290)
+Djurpark (revision 48242363)
+Domän (biologi) (revision 48975224)
+Don Merton (revision 48407169)
+Douglas Adams (revision 47251802)
+Däggdjur (revision 48794669)
+Ekologisk nisch (revision 48844778)
+Ekosystem (revision 48570659)
+Endemisk (revision 48546826)
+Eukaryoter (revision 48898436)
+Evolution (revision 49003401)
+Familj (biologi) (revision 48771961)
+Femininum (revision 46628147)
+Fjäder (biologi) (revision 48641138)
+Fjäderdräkt (revision 48641138)
+Fladdermöss (revision 48746998)
+Flygg (revision 48763776)
+Fossilworks (revision 43519389)
+Frukter (revision 48807025)
+Frö (revision 46332448)
+Fylum (revision 48212330)
+Fågelläte (revision 48681377)
+Fåglar (revision 48837894)
+Fåglarnas liv (revision 48837894)
+Genitiv (revision 48658908)
+George Edward Grey (revision 46365447)
+George Robert Gray (revision 43056128)
+Global Biodiversity Information Facility (revision 40116158)
+Haasts örn (revision 48440980)
+Hauturu/Little Barrier Island (revision 20537378)
+Hermelin (revision 48863152)
+Hertz (revision 48548540)
+Hjortdjur (revision 48740321)
+Hund (revision 48989960)
+Husdjur (revision 48155297)
+Huskatt (revision 47647609)
+Hāngi (revision 46574175)
+IUCN (revision 49006187)
+Iller (revision 48765500)
+Inaturalist (revision 48552803)
+Infraröd (revision 48615998)
+Integrated Taxonomic Information System (revision 48591706)
+Internationella naturvårdsunionen (revision 49006187)
+Internet Archive (revision 48979443)
+Jordbruk (revision 48448896)
+Kahurangi National Park (revision 47659423)
+Kamouflage (revision 47671382)
+Kaniner (revision 48911042)
+Kapiti Island (revision 48553791)
+Katt (revision 48986224)
+Kelp (revision 46077553)
+Kivier (revision 48467049)
+Klass (biologi) (revision 44944834)
+Kroppsfett (revision 39272827)
+Könsdimorfism (revision 48346350)
+Könsfördelning (revision 45646592)
+Lamm- och fårkött (revision 48351109)
Lek (fortplantningsbeteende) (revision 30508235)
-Mandel (revision 36577529)
-Maori (revision 32560474)
-Maorier (revision 35862066)
-Maoripapegojor (revision 36545138)
-Mark Carwardine (revision 20375916)
-Markpapegoja (revision 36295722)
-Maskulinum (revision 32704551)
-Masterton (revision 29859631)
-Metrosideros umbellata (revision 29071212)
-Milford Sound (revision 20284758)
-Morrhår (revision 36533839)
-Muskelmage (revision 31196380)
-Mustela (revision 20934105)
-Mårddjur (revision 37306347)
-Māori (revision 32560474)
-NHNZ (revision 36509929)
-Nattpapegoja (revision 33486517)
-Nordön (revision 24810231)
-Nya Zeeland (revision 36575359)
-Näbb (revision 23648463)
-Ollonår (revision 36509929)
-Ordning (biologi) (revision 30280196)
+Mandel (revision 48952857)
+Maori (revision 48297968)
+Maorier (revision 48066510)
+Maoripapegojor (revision 46078328)
+Mark Carwardine (revision 48869810)
+Markpapegoja (revision 47342275)
+Maskulinum (revision 46628162)
+Masterton (revision 48262093)
+Metrosideros umbellata (revision 46936435)
+Milford Sound (revision 45323524)
+Morrhår (revision 48980591)
+Muskelmage (revision 41849238)
+Mustela (revision 48294935)
+Mårddjur (revision 48435918)
== End of Parsed pages ==
-- Wikipedia parsing ended at: 2016-09-28 22:29:21.480287
+- Wikipedia parsing ended at: 2021-03-16 20:24:13.933499
-48 characters appeared 594415 times.
+49 characters appeared 513356 times.
-First 31 characters:
-[ 0] Char a: 10.070741821791172 %
-[ 1] Char e: 9.737136512369304 %
-[ 2] Char r: 9.110638190489809 %
-[ 3] Char n: 8.378826240925951 %
-[ 4] Char t: 7.481305148759705 %
-[ 5] Char s: 5.828587771169974 %
-[ 6] Char i: 5.359891658184939 %
-[ 7] Char l: 5.173489901836259 %
-[ 8] Char o: 4.694195133029954 %
-[ 9] Char d: 4.597293136949774 %
-[10] Char k: 3.297359588839447 %
-[11] Char m: 3.1898589369379975 %
-[12] Char g: 3.004466576381821 %
-[13] Char v: 2.2324470277499726 %
-[14] Char f: 2.1988005013332437 %
-[15] Char p: 2.06017681249632 %
-[16] Char u: 2.0499146219392173 %
-[17] Char ä: 2.0475593650900468 %
-[18] Char h: 2.028380845032511 %
-[19] Char å: 1.5443755625278637 %
-[20] Char c: 1.442594820117258 %
-[21] Char ö: 1.3515809661600062 %
-[22] Char b: 1.268642278542769 %
-[23] Char j: 0.7302978558751041 %
-[24] Char y: 0.6699023409570755 %
-[25] Char x: 0.2111319532649748 %
-[26] Char w: 0.10262190557102362 %
-[27] Char z: 0.09151855185350302 %
-[28] Char é: 0.021197311642539303 %
-[29] Char ā: 0.011103353717520588 %
-[30] Char q: 0.007570468443764037 %
+First 30 characters:
+[ 0] Char a: 9.801969783152433 %
+[ 1] Char e: 9.753075838209742 %
+[ 2] Char r: 9.263357202409244 %
+[ 3] Char n: 8.249635730370347 %
+[ 4] Char t: 7.409088429861539 %
+[ 5] Char s: 6.03207131113691 %
+[ 6] Char i: 5.692346052252238 %
+[ 7] Char l: 5.428981057979258 %
+[ 8] Char o: 4.548890049010823 %
+[ 9] Char d: 4.4466218374773065 %
+[10] Char m: 3.3119316809387636 %
+[11] Char k: 3.0742798369942106 %
+[12] Char g: 3.073890243807416 %
+[13] Char f: 2.2676271437365103 %
+[14] Char v: 2.2645103982421557 %
+[15] Char u: 2.116464987260303 %
+[16] Char ä: 2.0311440793523405 %
+[17] Char h: 1.9354989519943275 %
+[18] Char p: 1.8753068046346004 %
+[19] Char å: 1.4903887360817833 %
+[20] Char c: 1.4510398242155542 %
+[21] Char b: 1.3084487178488222 %
+[22] Char ö: 1.2946181597176227 %
+[23] Char j: 0.7221109717233265 %
+[24] Char y: 0.6866579917250407 %
+[25] Char x: 0.22323689603316216 %
+[26] Char w: 0.12096868449964547 %
+[27] Char z: 0.07947701010604727 %
+[28] Char é: 0.01577852406517115 %
+[29] Char q: 0.013635761537802226 %
-The first 31 characters have an accumulated ratio of 0.999936071599808.
+The first 30 characters have an accumulated ratio of 0.9998305269637442.
-748 sequences found.
+752 sequences found.
-First 512 (typical positive ratio): 0.997323508584682
-Next 512 (512-1024): 1.6823263208364526e-06
-Rest: 1.7780915628762273e-17
+First 512 (typical positive ratio): 0.996987580875875
+Next 512 (512-1024): 0.012946181597176228
+Rest: 4.640385298237959e-17
-- Processing end: 2016-09-28 22:29:21.590354
+- Processing end: 2021-03-16 20:24:14.019931
diff --git a/script/BuildLangModelLogs/LangThaiModel.log b/script/BuildLangModelLogs/LangThaiModel.log
index b7024c3..ea7437c 100644
--- a/script/BuildLangModelLogs/LangThaiModel.log
+++ b/script/BuildLangModelLogs/LangThaiModel.log
@@ -1,141 +1,192 @@
= Logs of language model for Thai (th) =
- Generated by BuildLangModel.py
-- Started: 2015-12-04 03:01:52.148282
-- Maximum depth: 3
-- Max number of pages: 50
+- Started: 2021-03-16 20:24:14.258574
+- Maximum depth: 4
+- Max number of pages: 100
== Parsed pages ==
-หน้าหลัก (revision 5512633)
-26 พฤศจิกายน (revision 5570053)
-27 พฤศจิกายน (revision 5888433)
-28 พฤศจิกายน (revision 6110206)
-กล้องโทรทรรศน์อวกาศฮับเบิล (revision 5830742)
-การประชุมสภาสงฆ์แห่งแคลมงต์ (revision 5463877)
-ความเอนเอียงเพื่อยืนยัน (revision 6231756)
-คัมภีร์พระเวท (revision 6109417)
-คาบสมุทรไซนาย (revision 5661104)
-จักรวรรดิโรมันตะวันออก (revision 6150148)
-ชาวมุสลิม (revision 6242838)
-ซุคฮอย ซู-24 (revision 6015891)
-ดาวอังคาร (revision 6235017)
-ดาวเคราะห์นอกระบบ (revision 5823077)
-ดินแดนศักดิ์สิทธิ์ (revision 6179072)
-ทฤษฎี (revision 5606447)
-ทะกะอะกิ คะจิตะ (revision 6177601)
-ท่าอากาศยานนานาชาติตริภูวัน (revision 6010470)
-นกกาเหว่า (revision 6142782)
-ประเทศอัฟกานิสถาน (revision 6216996)
-ประเทศเนปาล (revision 6206980)
-ปรากฏการณ์การวางกรอบ (revision 6046655)
-ปารีส (revision 6222115)
-พ.ศ. 1638 (revision 4723508)
-พ.ศ. 2438 (revision 5737055)
-พ.ศ. 2515 (revision 6197082)
-พ.ศ. 2544 (revision 6189598)
-พินัยกรรม (revision 5607889)
-มูลนิธิวิกิมีเดีย (revision 5816103)
-ระบบสุริยะ (revision 6201228)
-รางวัลโนเบล (revision 5828030)
-รางวัลโนเบลสาขาฟิสิกส์ (revision 6177103)
-รายชื่อบทความวันนี้ในอดีต (revision 5410610)
-ลักกีสไตรก์ (เพลง) (revision 6195816)
-ลุฟต์ฮันซา (revision 6116038)
-วิกฤตการณ์ผู้ย้ายถิ่นยุโรป (revision 6219634)
-วิกิพีเดีย (revision 6086299)
-วิกิพีเดียภาษาไทย (revision 6209148)
-สงครามครูเสด (revision 6228828)
-สงครามอังกฤษ–แซนซิบาร์ (revision 5829349)
-สติ (จิตวิทยา) (revision 6039161)
-สมมติฐาน (revision 6221744)
-สมเด็จพระราชินีมารีแห่งโรมาเนีย (revision 6211695)
-สมเด็จพระสันตะปาปาเออร์บันที่ 2 (revision 5828365)
-สารานุกรม (revision 6070482)
-อัลเฟรด โนเบล (revision 6214514)
-อาร์เธอร์ แมคโดนัลด์ (revision 6188035)
-เซนต์ปีเตอร์สเบิร์ก (revision 6162201)
-เทือกเขาฮินดูกูช (revision 5218921)
-เนื้อหาเสรี (revision 6160507)
+หน้าหลัก (revision 9018985)
+14 มีนาคม (revision 9303173)
+15 มีนาคม (revision 8874275)
+16 มีนาคม (revision 9306392)
+กองทัพพม่า (revision 9251263)
+การปฏิวัติเดือนกุมภาพันธ์ (revision 8956448)
+การประท้วงในประเทศพม่า พ.ศ. 2564 (revision 9304791)
+การประท้วงในประเทศรัสเซีย พ.ศ. 2564 (revision 9236735)
+การระบาดทั่วของโควิด-19 (revision 9289675)
+การระบาดทั่วของโควิด-19 ในประเทศไทย (revision 9305901)
+การระบาดทั่วของไวรัสโคโรนา พ.ศ. 2562–2563 เรียงตามประเทศและดินแดน (revision 9284827)
+การลอบสังหารจูเลียส ซีซาร์ (revision 8174866)
+จักรพรรดินิโคลัสที่ 2 แห่งรัสเซีย (revision 9299709)
+จักรวรรดิรัสเซีย (revision 9260038)
+จังหวัดสมุทรสาคร (revision 9227141)
+จูเลียส ซีซาร์ (revision 9304624)
+ชาวพม่าในไทย (revision 9289634)
+ซีแอตเทิล (revision 9295680)
+นิวซีแลนด์ (revision 9099286)
+บาตา (ประเทศอิเควทอเรียลกินี) (revision 8750850)
+บุคคลที่เสียชีวิตในปี พ.ศ. 2564 (revision 9306385)
+บูโพรพิออน (revision 9180305)
+ประเทศอิเควทอเรียลกินี (revision 9043997)
+ผลกระทบทางเศรษฐกิจและสังคมของการระบาดทั่วของไวรัสโคโรนา พ.ศ. 2562–2563 (revision 9289675)
+ผู้เผด็จการโรมัน (revision 6381320)
+พ.ศ. 2435 (revision 9191544)
+พ.ศ. 2460 (revision 9193829)
+พ.ศ. 2515 (revision 9293724)
+พ.ศ. 2554 (revision 9304980)
+พ.ศ. 500 (revision 5354846)
+พระคเณศ (revision 9259789)
+ฟรานซิส ฟอร์ด คอปโปลา (revision 8699907)
+ฟุตบอล (revision 9267162)
+มหาวิทยาลัยเคมบริดจ์ (revision 9278720)
+มาริโอ พูโซ (revision 4707337)
+มาร์คัส จูนิอัส บรูตัสผู้ลูก (revision 7223903)
+มาร์เกเรเธ ซัมบีเรีย (revision 9294258)
+มูลนิธิวิกิมีเดีย (revision 9155482)
+รัฐประหารในประเทศพม่า พ.ศ. 2564 (revision 9293480)
+รัฐอุตตราขัณฑ์ (revision 9246094)
+รางวัลโนเบลสาขาสรีรวิทยาหรือการแพทย์ (revision 9088756)
+ราชวงศ์โรมานอฟ (revision 8702698)
+รายชื่อบทความวันนี้ในอดีต (revision 8925803)
+วลาดีมีร์ ปูติน (revision 9137037)
+วัคซีนโรคติดเชื้อไวรัสโคโรนา 2019 (revision 9297189)
+วิกิพีเดีย (revision 9235310)
+วิกิพีเดียภาษาไทย (revision 9176821)
+วุฒิสภาโรมัน (revision 9281945)
+ศกุนตลา เทวี (revision 9296935)
+สงครามกลางเมืองซีเรีย (revision 8541828)
+สถาปัตยกรรมกอทิก (revision 8232804)
+สถาปัตยกรรมฟื้นฟูกอทิก (revision 6453482)
+สหรัฐ (revision 9288976)
+สาธารณรัฐโรมัน (revision 9050973)
+สารานุกรม (revision 9290003)
+สโมสรฟุตบอลบีจี ปทุม ยูไนเต็ด (revision 9292580)
+สโมสรฟุตบอลลิเวอร์พูล (revision 9262545)
+อองซานซูจี (revision 9292643)
+อะเลกเซย์ นาวัลนืย (revision 9230310)
+อาหรับสปริง (revision 8171494)
+อิตส์อะวันเดอร์ฟูลไลฟ์ (revision 9291334)
+อุทกภัยจากธารน้ำแข็งแตกในรัฐอุตตราขัณฑ์ พ.ศ. 2564 (revision 9300387)
+อู่ เหลียนเต๋อ (revision 9295504)
+เดนมาร์ก (revision 9103140)
+เดอะก็อดฟาเธอร์ (revision 8942413)
+เดอะก็อดฟาเธอร์ (นวนิยาย) (revision 4707337)
+เนื้อหาเสรี (revision 9063375)
+เบนจามิน เมานต์ฟอร์ต (revision 8820016)
+เบอร์มิงแฮม (revision 8949103)
+เหตุระเบิดที่บาตา พ.ศ. 2564 (revision 9301940)
+เอจออฟเอ็มไพร์ส (revision 8812026)
+แคว้นแคนเทอร์เบอรี (revision 8763458)
+แผ่นดินไหวในเกาะซูลาเวซี พ.ศ. 2564 (revision 9213896)
+โรคติดเชื้อไวรัสโคโรนา 2019 (revision 9303763)
+ไครสต์เชิร์ช (revision 9065152)
+ไทยลีก ฤดูกาล 2563–64 (revision 9306310)
+ไวรัสโคโรนาสายพันธุ์ใหม่ (SARS-CoV-2) (revision 9239363)
+0 มกราคม (revision 8811984)
+10 กรกฎาคม (revision 9204508)
+10 กันยายน (revision 9223073)
+10 กุมภาพันธ์ (revision 8791647)
+10 ตุลาคม (revision 9299190)
+10 ธันวาคม (revision 9187465)
+10 พฤศจิกายน (revision 9255261)
+10 พฤษภาคม (revision 9293733)
+10 มกราคม (revision 9256728)
+10 มิถุนายน (revision 8950621)
+10 มีนาคม (revision 9296320)
+10 สิงหาคม (revision 9287893)
+10 เมษายน (revision 9239957)
+11 กรกฎาคม (revision 9272225)
+11 กันยายน (revision 9263121)
+11 กุมภาพันธ์ (revision 9255762)
+11 ตุลาคม (revision 8872097)
+11 ธันวาคม (revision 9299195)
+11 พฤศจิกายน (revision 9301626)
+11 พฤษภาคม (revision 9295172)
+11 มกราคม (revision 9273530)
+11 มิถุนายน (revision 9261737)
+11 มีนาคม (revision 9204281)
+11 สิงหาคม (revision 9281431)
== End of Parsed pages ==
-- Wikipedia parsing ended at: 2015-12-04 03:05:06.181487
+- Wikipedia parsing ended at: 2021-03-16 20:29:56.645650
-105 characters appeared 401052 times.
+106 characters appeared 708244 times.
First 64 characters:
-[ 0] Char า: 8.857704237854442 %
-[ 1] Char น: 6.7679502907353655 %
-[ 2] Char ร: 6.739026360671434 %
-[ 3] Char ก: 5.388079351306065 %
-[ 4] Char อ: 5.099837427565503 %
-[ 5] Char ง: 4.861713692987443 %
-[ 6] Char เ: 4.5198627609387305 %
-[ 7] Char ม: 4.133628556895365 %
-[ 8] Char ว: 3.864336794231172 %
-[ 9] Char ด: 3.3152808114658447 %
-[10] Char ย: 3.195844927839781 %
-[11] Char ล: 3.1312647736453125 %
-[12] Char ท: 2.69615910156289 %
-[13] Char ส: 2.6001615750575984 %
-[14] Char ะ: 2.392457835891605 %
-[15] Char ค: 2.384229476476866 %
-[16] Char บ: 2.3321165335168503 %
-[17] Char ต: 2.196473275285998 %
-[18] Char ห: 1.983782651626223 %
-[19] Char ป: 1.9192024974317545 %
-[20] Char แ: 1.7813151411787 %
-[21] Char จ: 1.76261432432702 %
-[22] Char พ: 1.5075351824701035 %
-[23] Char ข: 1.3519443862641254 %
-[24] Char ใ: 1.3295034060421091 %
-[25] Char ไ: 1.2227840778751882 %
-[26] Char ช: 1.0407627938521689 %
-[27] Char โ: 0.9382823175049619 %
-[28] Char ศ: 0.8078752879925796 %
-[29] Char ำ: 0.7393056262030859 %
-[30] Char ถ: 0.599672860377208 %
-[31] Char ซ: 0.541076967575277 %
-[32] Char e: 0.43734977010462484 %
-[33] Char ผ: 0.43585370475649043 %
-[34] Char ณ: 0.4019428901987772 %
-[35] Char a: 0.3897250231890129 %
-[36] Char i: 0.3657879776188624 %
-[37] Char ษ: 0.3647906007201061 %
-[38] Char ภ: 0.34185093204871186 %
-[39] Char ธ: 0.3181632307032505 %
-[40] Char o: 0.3176645422538723 %
-[41] Char n: 0.3139243788835363 %
-[42] Char ญ: 0.29248077556027646 %
-[43] Char r: 0.28350438347147006 %
-[44] Char t: 0.2705384837876385 %
-[45] Char s: 0.2488455362396896 %
-[46] Char l: 0.19598456060560726 %
-[47] Char ฟ: 0.19473783948216192 %
-[48] Char c: 0.16356981139602844 %
-[49] Char ฐ: 0.15833358267755804 %
-[50] Char ฤ: 0.15284800973439852 %
-[51] Char ๆ: 0.14910784636406252 %
-[52] Char d: 0.13090571796176056 %
-[53] Char ฮ: 0.1244227681198448 %
-[54] Char h: 0.12043326052481973 %
-[55] Char u: 0.12043326052481973 %
-[56] Char m: 0.09599752650529109 %
-[57] Char y: 0.08951457666337533 %
-[58] Char ฏ: 0.08677179019179557 %
-[59] Char p: 0.08253293837208142 %
-[60] Char f: 0.08153556147332515 %
-[61] Char S: 0.07604998853016566 %
-[62] Char ฝ: 0.07330720205858592 %
-[63] Char ฉ: 0.0673229406660483 %
+[ 0] Char า: 8.374373803378496 %
+[ 1] Char น: 7.171539751836938 %
+[ 2] Char ร: 6.9999887044577855 %
+[ 3] Char ก: 5.42581370262226 %
+[ 4] Char อ: 5.080028916588068 %
+[ 5] Char เ: 4.61507616019338 %
+[ 6] Char ง: 4.240911324345847 %
+[ 7] Char ม: 4.100846600888959 %
+[ 8] Char ว: 3.364377248518872 %
+[ 9] Char ย: 3.31594761127521 %
+[10] Char ล: 3.300981017841309 %
+[11] Char ด: 2.779550550375294 %
+[12] Char ส: 2.7203901480280805 %
+[13] Char ท: 2.6363795528094838 %
+[14] Char ต: 2.4035501889179436 %
+[15] Char ค: 2.3338002157448563 %
+[16] Char ะ: 2.3099383828172213 %
+[17] Char บ: 2.1609784198665998 %
+[18] Char ป: 2.104077125962239 %
+[19] Char แ: 1.9566703000660788 %
+[20] Char ห: 1.8754835904010483 %
+[21] Char พ: 1.6814826528710445 %
+[22] Char จ: 1.4730798990178526 %
+[23] Char ช: 1.385680641134976 %
+[24] Char ใ: 1.3149423080181406 %
+[25] Char ข: 1.2114469024799364 %
+[26] Char ศ: 1.095808789061397 %
+[27] Char โ: 1.0651696308052028 %
+[28] Char ไ: 1.045967209040952 %
+[29] Char ซ: 0.7435290662540028 %
+[30] Char ำ: 0.6989116745076556 %
+[31] Char ผ: 0.550375294390069 %
+[32] Char ถ: 0.47314202450003107 %
+[33] Char ธ: 0.461422899452731 %
+[34] Char ภ: 0.42386522159029943 %
+[35] Char ณ: 0.4122872908206776 %
+[36] Char ษ: 0.40988698810014623 %
+[37] Char a: 0.4049451883814053 %
+[38] Char e: 0.38193052111984005 %
+[39] Char i: 0.33717193509581445 %
+[40] Char ฐ: 0.31359249072353595 %
+[41] Char ญ: 0.29749634306820816 %
+[42] Char n: 0.29213096051643217 %
+[43] Char ฟ: 0.29071901773964903 %
+[44] Char o: 0.28874229785215266 %
+[45] Char r: 0.2702458474762935 %
+[46] Char t: 0.2569735853745319 %
+[47] Char s: 0.19682482308357005 %
+[48] Char l: 0.17070388171308193 %
+[49] Char h: 0.13385217523904192 %
+[50] Char u: 0.12919276407565755 %
+[51] Char c: 0.12834559840958765 %
+[52] Char ฮ: 0.12269782730245507 %
+[53] Char ฤ: 0.11690886191764421 %
+[54] Char d: 0.1139437820863996 %
+[55] Char ฉ: 0.10886078808998029 %
+[56] Char S: 0.1009539085399947 %
+[57] Char C: 0.09883599437481996 %
+[58] Char m: 0.09544733171054044 %
+[59] Char ฏ: 0.08005715544360419 %
+[60] Char ๆ: 0.07906879549985599 %
+[61] Char y: 0.0773744641677162 %
+[62] Char ฝ: 0.07412699578111498 %
+[63] Char ฒ: 0.07059713883915714 %
-The first 64 characters have an accumulated ratio of 0.989480167160368.
+The first 64 characters have an accumulated ratio of 0.9874944228260318.
-2324 sequences found.
+2704 sequences found.
-First 512 (typical positive ratio): 0.8815720594354438
-Next 512 (512-1024): 7.480326740672033e-06
-Rest: 0.026341928296264486
+First 512 (typical positive ratio): 0.8690353564146914
+Next 512 (512-1024): 0.0007906879549985598
+Rest: 0.03156084221511464
-- Processing end: 2015-12-04 03:05:06.800467
+- Processing end: 2021-03-16 20:29:57.119132
diff --git a/script/BuildLangModelLogs/LangTurkishModel.log b/script/BuildLangModelLogs/LangTurkishModel.log
index 51b31ad..b683c86 100644
--- a/script/BuildLangModelLogs/LangTurkishModel.log
+++ b/script/BuildLangModelLogs/LangTurkishModel.log
@@ -1,113 +1,161 @@
= Logs of language model for Turkish (tr) =
- Generated by BuildLangModel.py
-- Started: 2015-12-04 02:22:03.929245
-- Maximum depth: 3
-- Max number of pages: 50
+- Started: 2021-03-16 20:29:57.369383
+- Maximum depth: 4
+- Max number of pages: 100
== Parsed pages ==
-Ana_Sayfa (revision 16293313)
-1048 (revision 12894005)
-1131 (revision 14840814)
-16. yüzyıl (revision 15185081)
-1859 (revision 16014427)
-1866 (revision 16120346)
-1869 (revision 12888270)
-1892 (revision 13955858)
-1895 (revision 15334635)
-1902 (revision 16283638)
-1906 (revision 15874323)
-1918 (revision 16099474)
-1926 (revision 16180584)
-1927 (revision 15370980)
-1940 (revision 15370990)
-1943 (revision 16091797)
-1944 (revision 16247827)
-1945 (revision 16281147)
-1948 (revision 15443886)
-1961 (revision 15799529)
-1964 (revision 16085332)
-1975 (revision 15006928)
-1980 (revision 16213240)
-1981 (revision 16295456)
-1983 (revision 16327128)
-1993 (revision 16300456)
-2002 (revision 16297206)
-2015 (revision 16328338)
-24 Ekim (revision 16213661)
-4 Aralık (revision 16341162)
-ABD (revision 16325951)
-ABD Senatosu (revision 15970439)
-Adam Horowitz (revision 14362106)
-Akçe (revision 16261547)
-Altın Takım (revision 13503001)
-American Broadcasting Company (revision 16055235)
-Amerika Birleşik Devletleri (revision 16325951)
-Ana Sayfa/Kardeş projeler (revision 16293313)
-Ana Sayfa/Kategoriler (revision 16293313)
-Aptullah Kuran (revision 15744893)
-Avrupa (revision 16299756)
-Ayasofya (revision 16305207)
-BM Güvenlik Konseyi (revision 16085518)
-Birleşmiş Milletler (revision 16258474)
-Budapeşte (revision 16219173)
-CIA (revision 16054325)
-Charlie Pace (revision 16129416)
-Cuma (revision 14197127)
-Desmond Hume (revision 16035300)
-Diğerleri (Lost) (revision 16329444)
+Ana_Sayfa (revision 25131171)
+15 Mart (revision 25133274)
+16 Mart (revision 25130723)
+17 Mart (revision 25101714)
+1920 (revision 24886521)
+1921 (revision 24934034)
+1926 (revision 24937098)
+1968 (revision 25060729)
+2003 (revision 25043871)
+Abdullah Cevdet (revision 25117345)
+Afganistan (revision 25053860)
+Albanya (revision 25130585)
+Anaheim, Kaliforniya (revision 25012994)
+Azerbaycan Yahudileri (revision 25132094)
+Georg Ohm (revision 24888782)
+Haldun Taner (revision 25064462)
+Hazar Kağanlığı (revision 25113376)
+Interscope Records (revision 24937048)
+Kaliforniya (revision 25130601)
+Kamil Rıfkı Urga (revision 25105741)
+Kuzey Lefkoşa (revision 24753125)
+Kâbil (revision 24861920)
+Latin Grammy Ödülleri (revision 22281504)
+Lefkoşa (revision 24897461)
+Moskova Antlaşması (revision 25031021)
+Mustafa Kemal Atatürk (revision 25133394)
+My Lai Katliamı (revision 25132972)
+Nar (revision 25023035)
+Natalia Oreiro (revision 25131895)
+No Doubt (revision 24925807)
+Osmanlı-Venedik Savaşı (1570-1573) (revision 24483832)
+Osmanlı İmparatorluğu (revision 25136006)
+Rachel Corrie (revision 24929876)
+Robert H. Goddard (revision 24930216)
+Rock müzik (revision 24864552)
+Selimiye, Lefkoşa (revision 24306825)
+Selimiye Meydanı (revision 24185756)
+Selma Lagerlöf (revision 25097031)
+Sovyetler Birliği (revision 25004103)
+Sıcak çikolata (revision 24978056)
+The Beacon Street Collection (revision 24950711)
+Türbe (revision 25041350)
+Türkiye Büyük Millet Meclisi (revision 25113834)
+Türkçe (revision 25069652)
+Vietnam Savaşı (revision 24942314)
+Vikipedi (revision 25130148)
+Yılın günleri listesi (revision 24802413)
+Ziya Gökalp (revision 24942014)
+Özgür içerik (revision 24349743)
+İstanbul (revision 25106647)
+İtilaf Devletleri (revision 25043005)
+İttihat ve Terakki (revision 25125484)
+İttik Dede Türbesi (revision 25133559)
+0 Mart (revision 24329470)
+0 Ocak (revision 23186786)
+10 Aralık (revision 24772485)
+10 Ağustos (revision 24980345)
+10 Ekim (revision 24850081)
+10 Eylül (revision 25090510)
+10 Haziran (revision 25121277)
+10 Kasım (revision 24973976)
+10 Mart (revision 25105572)
+10 Mayıs (revision 25120763)
+10 Nisan (revision 25021557)
+10 Ocak (revision 25093298)
+10 Temmuz (revision 24907247)
+10 Şubat (revision 25005286)
+11 Aralık (revision 24822783)
+11 Ağustos (revision 24750760)
+11 Ekim (revision 25021451)
+11 Eylül (revision 24878760)
+11 Haziran (revision 24946135)
+11 Kasım (revision 24751390)
+11 Mart (revision 25101669)
+11 Mayıs (revision 25123240)
+11 Nisan (revision 25114265)
+11 Ocak (revision 25121144)
+11 Temmuz (revision 25018276)
+11 Şubat (revision 25044631)
+12 Aralık (revision 25120395)
+12 Ağustos (revision 24964866)
+12 Ekim (revision 24822300)
+12 Eylül (revision 25105547)
+12 Haziran (revision 24891411)
+12 Kasım (revision 25105520)
+12 Mart (revision 25105618)
+12 Mayıs (revision 25084509)
+12 Nisan (revision 25133262)
+12 Ocak (revision 25105557)
+12 Temmuz (revision 25132218)
+12 Şubat (revision 25121399)
+13 Aralık (revision 24801826)
+13 Ağustos (revision 25136701)
+13 Ekim (revision 25121155)
+13 Eylül (revision 24750978)
+13 Haziran (revision 24815847)
+13 Kasım (revision 25084464)
+13 Mart (revision 25125469)
+13 Mayıs (revision 24897682)
+13 Nisan (revision 25084441)
+13 Ocak (revision 24756340)
== End of Parsed pages ==
-- Wikipedia parsing ended at: 2015-12-04 02:24:44.728803
+- Wikipedia parsing ended at: 2021-03-16 20:34:51.082747
-48 characters appeared 267623 times.
+54 characters appeared 913820 times.
-First 36 characters:
-[ 0] Char a: 12.311722086666691 %
-[ 1] Char e: 8.716365932673948 %
-[ 2] Char i: 8.507863673899479 %
-[ 3] Char n: 7.322987934519828 %
-[ 4] Char r: 6.979220769515326 %
-[ 5] Char l: 6.609297407173524 %
-[ 6] Char ı: 4.514933320379788 %
-[ 7] Char d: 4.3475336574210734 %
-[ 8] Char t: 4.2634601659797555 %
-[ 9] Char k: 4.240293248338147 %
-[10] Char s: 3.929781819948211 %
-[11] Char m: 3.429451130881875 %
-[12] Char u: 3.0998830444319063 %
-[13] Char y: 2.9212735826143494 %
-[14] Char o: 2.7135186437638024 %
-[15] Char b: 2.3129551645411643 %
-[16] Char ü: 1.8305601536489764 %
-[17] Char ş: 1.5988909772328985 %
-[18] Char z: 1.2267256551193282 %
-[19] Char h: 1.1983274980102607 %
-[20] Char v: 1.194964558352608 %
-[21] Char c: 1.143773143563894 %
-[22] Char g: 1.1004285879763698 %
-[23] Char p: 1.0178497363828969 %
-[24] Char ç: 0.8295251155543433 %
-[25] Char ğ: 0.8205572764672693 %
-[26] Char f: 0.7047226882592303 %
-[27] Char ö: 0.6710932916827029 %
-[28] Char j: 0.1296600068006113 %
-[29] Char w: 0.11359262843627041 %
-[30] Char â: 0.07846859201189733 %
-[31] Char î: 0.04147625577771716 %
-[32] Char x: 0.024287897527492032 %
-[33] Char é: 0.014946398478456635 %
-[34] Char q: 0.01083613889688106 %
-[35] Char û: 0.009341499049035397 %
+First 33 characters:
+[ 0] Char a: 12.104681447112123 %
+[ 1] Char e: 8.960189096320939 %
+[ 2] Char i: 8.522575561926857 %
+[ 3] Char n: 7.2878685080212735 %
+[ 4] Char r: 6.9632969293734 %
+[ 5] Char l: 6.837889299862117 %
+[ 6] Char ı: 4.501433542710818 %
+[ 7] Char k: 4.343196690814383 %
+[ 8] Char t: 4.3038016239522 %
+[ 9] Char d: 4.30150357838524 %
+[10] Char s: 3.781816988028277 %
+[11] Char m: 3.4274802477511983 %
+[12] Char u: 3.119761003261036 %
+[13] Char y: 2.903635289225449 %
+[14] Char o: 2.639688341248824 %
+[15] Char b: 2.1207677660808475 %
+[16] Char ü: 1.8651375544417939 %
+[17] Char ş: 1.4568514587117813 %
+[18] Char v: 1.4549911361099561 %
+[19] Char h: 1.216869843076317 %
+[20] Char z: 1.1867763892232606 %
+[21] Char g: 1.1811954214177847 %
+[22] Char c: 1.125714035586877 %
+[23] Char p: 0.8964566325972293 %
+[24] Char ç: 0.8571709964763301 %
+[25] Char ö: 0.7883390602087939 %
+[26] Char ğ: 0.7411744107154582 %
+[27] Char f: 0.7040773894202359 %
+[28] Char j: 0.13821102624149176 %
+[29] Char w: 0.07933728743078505 %
+[30] Char â: 0.05865487732813902 %
+[31] Char î: 0.03994222056860213 %
+[32] Char û: 0.028014269768663416 %
-The first 36 characters have an accumulated ratio of 0.99980569681978.
+The first 33 characters have an accumulated ratio of 0.9993849992339848.
-935 sequences found.
+1097 sequences found.
-First 512 (typical positive ratio): 0.991865243864388
-Next 512 (512-1024): 3.7365996196141585e-06
-Rest: 2.949029909160572e-17
+First 512 (typical positive ratio): 0.9923593121944019
+Next 512 (512-1024): 0.014568514587117814
+Rest: 9.536163614441446e-05
-- Processing end: 2015-12-04 02:24:44.883537
+- Processing end: 2021-03-16 20:34:51.176659
diff --git a/script/BuildLangModelLogs/LangVietnameseModel.log b/script/BuildLangModelLogs/LangVietnameseModel.log
index 6732b1a..1c111ad 100644
--- a/script/BuildLangModelLogs/LangVietnameseModel.log
+++ b/script/BuildLangModelLogs/LangVietnameseModel.log
@@ -1,121 +1,179 @@
= Logs of language model for Vietnamese (vi) =
- Generated by BuildLangModel.py
-- Started: 2016-02-13 03:37:17.480303
-- Maximum depth: 3
-- Max number of pages: 40
+- Started: 2021-03-16 20:34:51.373194
+- Maximum depth: 4
+- Max number of pages: 100
== Parsed pages ==
-Chữ_Quốc_ngữ (revision 22887853)
-1651 (revision 21455247)
-1773 (revision 21354755)
-1815 (revision 21361292)
-1838 (revision 21361314)
-1865 (revision 21361338)
-1869 (revision 21361342)
-1888 (revision 21389506)
-1902 (revision 21354811)
-1918 (revision 21354828)
-1919 (revision 21354829)
-1938 (revision 21354849)
-1945 (revision 21354857)
-22 tháng 2 (revision 21376086)
-26 tháng 11 (revision 22579845)
-28 tháng 12 (revision 22475308)
-A (revision 22549334)
-ASCII (revision 22528409)
-Alexandre de Rhodes (revision 22859954)
-Antonio Barbosa (revision 22145269)
-B (revision 22836557)
-BBC (revision 22863903)
-Biên khảo (revision 22531516)
-Bán nguyên âm (revision 22655600)
-Bình luận (revision 22117664)
-Bảng chữ cái Bồ Đào Nha (revision 22887853)
-Bảng chữ cái Hy Lạp (revision 21362081)
-Bảng chữ cái Latinh (revision 22442448)
-Bắc Kỳ (revision 22393289)
-Bồ Đào Nha (revision 22620858)
-C (revision 21341881)
-Cao Xuân Dục (revision 22620201)
-Chính tả (revision 22187359)
-Chính tả tiếng Việt (revision 20897580)
-Chữ Hán (revision 22889609)
-Chữ Nôm (revision 22781506)
-Chữ cái (revision 22169220)
-Công giáo (revision 22173119)
-D (revision 21447691)
+Chữ_Quốc_ngữ (revision 64521024)
+1651 (revision 26251708)
+1838 (revision 63252802)
+1865 (revision 64100421)
+1869 (revision 59848285)
+1888 (revision 64474933)
+1902 (revision 64405865)
+1918 (revision 64446780)
+1919 (revision 64400438)
+1938 (revision 63147818)
+22 tháng 2 (revision 64199177)
+26 tháng 11 (revision 60306925)
+28 tháng 12 (revision 64197178)
+A (revision 64396586)
+ASCII (revision 64542934)
+Alexandre de Rhodes (revision 64481737)
+Antonio Barbosa (revision 28290803)
+B (revision 63753684)
+BBC (revision 64477721)
+Biên khảo (revision 64480018)
+Bàn phím máy tính (revision 63261029)
+Bá Đa Lộc (revision 64107557)
+Bán nguyên âm (revision 64296580)
+Bình luận (revision 26758605)
+Bảng chữ cái Bồ Đào Nha (revision 64521024)
+Bảng chữ cái Hy Lạp (revision 64540140)
+Bảng chữ cái Latinh (revision 64566174)
+Bảng chữ cái Latinh cơ bản của ISO (revision 64566174)
+Bảng chữ cái Phoenicia (revision 64540140)
+Bảng mẫu tự ngữ âm quốc tế (revision 64494501)
+Bắc Kỳ (revision 64538623)
+Bồ Đào Nha (revision 64477762)
+Bộ Giáo dục và Đào tạo (Việt Nam) (revision 64439920)
+Bộ gõ tiếng Việt (revision 64399872)
+C (revision 64341946)
+Cao Xuân Dục (revision 64403009)
+Chiều cao (revision 63620682)
+Christoforo Borri (revision 39684524)
+Chính tả (revision 64168374)
+Chính tả tiếng Việt (revision 64566759)
+Chủ tịch Hồ Chí Minh (revision 64592392)
+Chữ Hán (revision 64488663)
+Chữ Latinh (revision 64566174)
+Chữ Nôm (revision 64497361)
+Chữ b đuôi quặp (revision 63724573)
+Chữ cái (revision 63906900)
+Chữ số La Mã (revision 64606955)
+Chữ tượng hình Ai Cập (revision 64545532)
+Chữ viết tiếng Việt (revision 64521025)
+Các dân tộc Việt Nam (revision 64521289)
+Công giáo tại Việt Nam (revision 64479778)
+Cư Jút (revision 64446849)
+Cư Kuin (revision 64351798)
+Cư Ê Wi (revision 64324496)
+Cải cách giáo dục của Cộng hòa Xã hội chủ nghĩa Việt Nam (revision 63800666)
+Cổ tự học (revision 63417312)
+D (revision 64521463)
+Danh sách các chữ cái Latinh (revision 64566174)
+De facto (revision 64458216)
+Di chúc Hồ Chí Minh (revision 64479855)
+Du ký (revision 64306751)
+Dòng Tên (revision 64563470)
+Dấu câu (revision 64430387)
+Dấu huyền (revision 64200881)
+Dấu hỏi (revision 64314350)
+Dấu ngã (revision 64005169)
+Dấu nặng (revision 64200881)
+Dấu phụ (revision 43648394)
+Dấu sắc (revision 64200881)
+Dấu âm ngắn (revision 64560651)
+E (revision 63474436)
+Ea H'leo (revision 64600906)
+Ea Wy (revision 64564116)
+F (revision 64556895)
+Francesco Buzomi (revision 64573844)
+Francisco de Pina (revision 64573938)
+G (revision 63840275)
+Gaspar do Amaral (revision 61771486)
+Gemeinsame Normdatei (revision 63835749)
+Gen (revision 64577144)
+Gia Định báo (revision 64521887)
+Giovanni Filippo de Marini (revision 64381034)
+Girolamo Maiorica (revision 64500026)
+Giáo hội Công giáo Rôma (revision 64587044)
+H (revision 63175940)
+Hiến pháp nước Cộng hòa Xã hội chủ nghĩa Việt Nam 2013 (revision 64587062)
+Hoàng Phê (revision 63792712)
+Hán học (revision 64209708)
+Hệ chữ viết Latinh (revision 64566174)
+Hệ thống chữ nổi tiếng Việt (revision 64158849)
+Hồ Chí Minh (revision 64592392)
+Hồ Dzếnh (revision 64471051)
+Hội Trí Tri (revision 64593204)
+I (revision 55105217)
+IPA (revision 64494501)
+ISBN (revision 64594093)
+ISO/IEC 646 (revision 64542934)
+J (revision 64280732)
== End of Parsed pages ==
-- Wikipedia parsing ended at: 2016-02-13 03:42:06.560479
+- Wikipedia parsing ended at: 2021-03-16 20:57:28.725327
-101 characters appeared 222814 times.
+107 characters appeared 961999 times.
-First 55 characters:
-[ 0] Char n: 11.262308472537633 %
-[ 1] Char h: 8.881398834902654 %
-[ 2] Char t: 7.022898022565907 %
-[ 3] Char c: 6.365398942615815 %
-[ 4] Char i: 6.198443544840091 %
-[ 5] Char g: 5.591210606155808 %
-[ 6] Char a: 3.5998635633308496 %
-[ 7] Char u: 2.8499106878382867 %
-[ 8] Char m: 2.615185760320267 %
-[ 9] Char o: 2.6012728105056238 %
-[10] Char đ: 2.222032726848403 %
-[11] Char r: 2.1102803234985235 %
-[12] Char à: 2.0447548179198796 %
-[13] Char v: 1.9437737305555307 %
-[14] Char l: 1.9119085874316697 %
-[15] Char á: 1.7539292863105551 %
-[16] Char p: 1.6453185167897888 %
-[17] Char b: 1.541195795596327 %
-[18] Char ư: 1.4397659033992478 %
-[19] Char s: 1.3760356171515256 %
-[20] Char y: 1.280440187779942 %
-[21] Char e: 1.2454334108269678 %
-[22] Char d: 1.1251537156552103 %
-[23] Char ế: 1.071745940560288 %
-[24] Char k: 1.0695019163966357 %
-[25] Char â: 0.9658280000359044 %
-[26] Char ữ: 0.9604423420431392 %
-[27] Char ê: 0.8374698178749989 %
-[28] Char ệ: 0.7459136319979893 %
-[29] Char ô: 0.7073164163831717 %
-[30] Char ạ: 0.6727584442629277 %
-[31] Char ộ: 0.6705144200992756 %
-[32] Char ố: 0.6476253736300233 %
-[33] Char ó: 0.6072329386842837 %
-[34] Char ả: 0.5484395055965963 %
-[35] Char ủ: 0.5475418959311353 %
-[36] Char q: 0.5138815334763525 %
-[37] Char ợ: 0.48560682901433483 %
-[38] Char ờ: 0.4851580241816044 %
-[39] Char ể: 0.4748355130288043 %
-[40] Char ớ: 0.4676546357051173 %
-[41] Char ấ: 0.418286104104769 %
-[42] Char ị: 0.40212913012647317 %
-[43] Char ầ: 0.3904602044754818 %
-[44] Char ọ: 0.3801376933226817 %
-[45] Char ề: 0.3787912788244904 %
-[46] Char ơ: 0.3590438661843511 %
-[47] Char í: 0.35679984202069887 %
-[48] Char ụ: 0.35276059852612496 %
-[49] Char ậ: 0.3469261357006292 %
-[50] Char ì: 0.32762752789322036 %
-[51] Char ă: 0.3253835037295682 %
-[52] Char ứ: 0.29665999443482005 %
-[53] Char ồ: 0.29665999443482005 %
-[54] Char x: 0.2939671654384374 %
+First 54 characters:
+[ 0] Char n: 11.732340678108812 %
+[ 1] Char h: 8.846059091537517 %
+[ 2] Char t: 6.799279417130371 %
+[ 3] Char c: 6.610713732550658 %
+[ 4] Char i: 6.088467867430215 %
+[ 5] Char g: 5.545639860332495 %
+[ 6] Char a: 3.414244713352093 %
+[ 7] Char u: 2.916842948901194 %
+[ 8] Char m: 2.5668425850754524 %
+[ 9] Char o: 2.5124766241960748 %
+[10] Char đ: 2.3970918888689074 %
+[11] Char à: 2.0960520748982066 %
+[12] Char v: 2.0507297824633914 %
+[13] Char r: 1.966114309890135 %
+[14] Char l: 1.7723511147100985 %
+[15] Char á: 1.7447003583163807 %
+[16] Char p: 1.523390356954633 %
+[17] Char ư: 1.47359820540354 %
+[18] Char b: 1.435656378021183 %
+[19] Char s: 1.3317061660147256 %
+[20] Char y: 1.2888786786680653 %
+[21] Char d: 1.1103961646529779 %
+[22] Char k: 1.0495852906292003 %
+[23] Char ế: 0.9804583996449061 %
+[24] Char e: 0.9535352947352336 %
+[25] Char ộ: 0.8640341621976738 %
+[26] Char ệ: 0.8197513718829229 %
+[27] Char â: 0.8006245328737348 %
+[28] Char ê: 0.792724316761244 %
+[29] Char ô: 0.7877347065849342 %
+[30] Char ố: 0.7180880645406076 %
+[31] Char ạ: 0.7030152837996714 %
+[32] Char q: 0.6624747011171529 %
+[33] Char ả: 0.650208576100391 %
+[34] Char ữ: 0.622038068646641 %
+[35] Char ủ: 0.589085851440594 %
+[36] Char ó: 0.5876305484725036 %
+[37] Char ớ: 0.5369028450133524 %
+[38] Char ề: 0.48440798795009143 %
+[39] Char í: 0.47162211187329717 %
+[40] Char ờ: 0.47131026123727776 %
+[41] Char ợ: 0.46403374639682576 %
+[42] Char ấ: 0.44532270823566344 %
+[43] Char ể: 0.4278590726185786 %
+[44] Char ă: 0.4115388893335648 %
+[45] Char ị: 0.40748483106531297 %
+[46] Char ậ: 0.3686074517748979 %
+[47] Char ơ: 0.36434549308263314 %
+[48] Char ự: 0.35519787442606493 %
+[49] Char ồ: 0.3434515004693352 %
+[50] Char ụ: 0.3314972260885926 %
+[51] Char ầ: 0.32848266994040537 %
+[52] Char ì: 0.32785896866836656 %
+[53] Char x: 0.32650761591228267 %
-The first 55 characters have an accumulated ratio of 0.9603301408349568.
+The first 54 characters have an accumulated ratio of 0.9567099342099108.
-1494 sequences found.
+1890 sequences found.
-First 512 (typical positive ratio): 0.9321889118082535
-Next 512 (512-1024): 0.009604423420431392
-Rest: 0.0068905733918831966
+First 512 (typical positive ratio): 0.9336493792477815
+Next 512 (512-1024): 0.003551978744260649
+Rest: 0.007456342500128027
-- Processing end: 2016-02-13 03:42:07.174723
+- Processing end: 2021-03-16 20:57:29.603172
diff --git a/src/LangModels/LangCroatianModel.cpp b/src/LangModels/LangCroatianModel.cpp
index 961bd0e..e1410b8 100644
--- a/src/LangModels/LangCroatianModel.cpp
+++ b/src/LangModels/LangCroatianModel.cpp
@@ -36,12 +36,13 @@
* ***** END LICENSE BLOCK ***** */
#include "../nsSBCharSetProber.h"
+#include "../nsLanguageDetector.h"
/********* Language model for: Croatian *********/
/**
* Generated by BuildLangModel.py
- * On: 2016-09-25 23:50:27.590137
+ * On: 2021-03-16 19:18:55.486472
**/
/* Character Mapping Table:
@@ -61,45 +62,45 @@
* ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1
* even though they are both used for French. Same for the euro sign.
*/
-static const unsigned char Windows_1250_CharToOrderMap[] =
+static const unsigned char Iso_8859_2_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 12, 4, 3, /* 4X */
- 14, 30, 6, 8, 5, 11, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 12, 4, 3, /* 6X */
- 14, 30, 6, 8, 5, 11, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */
- SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM, 23,SYM, 49, 50, 24, 51, /* 8X */
- ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 23,SYM, 52, 53, 24, 54, /* 9X */
- SYM,SYM,SYM, 40,SYM, 55,SYM,SYM,SYM,SYM, 56,SYM,SYM,SYM,SYM, 57, /* AX */
- SYM,SYM,SYM, 40,SYM,SYM,SYM,SYM,SYM, 58, 59,SYM, 60,SYM, 61, 62, /* BX */
- 63, 41, 43, 64, 36, 65, 25, 39, 18, 31, 66, 47, 67, 68, 69, 70, /* CX */
- 26, 71, 72, 44, 73, 74, 32,SYM, 75, 76, 48, 77, 33, 78, 79, 80, /* DX */
- 81, 41, 43, 82, 36, 83, 25, 39, 18, 31, 84, 47, 85, 86, 87, 88, /* EX */
- 26, 89, 90, 44, 91, 92, 32,SYM, 93, 94, 48, 95, 33, 96, 97,SYM, /* FX */
+ SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 11, 4, 3, /* 4X */
+ 14, 30, 6, 8, 5, 12, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 11, 4, 3, /* 6X */
+ 14, 30, 6, 8, 5, 12, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
+ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
+ SYM, 49,SYM, 38,SYM, 50, 51,SYM,SYM, 23, 52, 53, 54,SYM, 24, 55, /* AX */
+ SYM, 56,SYM, 38,SYM, 57, 58,SYM,SYM, 23, 59, 60, 61,SYM, 24, 62, /* BX */
+ 63, 39, 46, 64, 36, 65, 25, 43, 18, 31, 66, 45, 67, 68, 69, 70, /* CX */
+ 26, 71, 72, 47, 73, 74, 32,SYM, 75, 76, 48, 77, 33, 78, 79, 80, /* DX */
+ 81, 39, 46, 82, 36, 83, 25, 43, 18, 31, 84, 45, 85, 86, 87, 88, /* EX */
+ 26, 89, 90, 47, 91, 92, 32,SYM, 93, 94, 48, 95, 33, 96, 97,SYM, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
-static const unsigned char Iso_8859_2_CharToOrderMap[] =
+static const unsigned char Iso_8859_13_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 12, 4, 3, /* 4X */
- 14, 30, 6, 8, 5, 11, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 12, 4, 3, /* 6X */
- 14, 30, 6, 8, 5, 11, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 11, 4, 3, /* 4X */
+ 14, 30, 6, 8, 5, 12, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 11, 4, 3, /* 6X */
+ 14, 30, 6, 8, 5, 12, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
- SYM, 98,SYM, 40,SYM, 99,100,SYM,SYM, 23,101,102,103,SYM, 24,104, /* AX */
- SYM,105,SYM, 40,SYM,106,107,SYM,SYM, 23,108,109,110,SYM, 24,111, /* BX */
- 112, 41, 43,113, 36,114, 25, 39, 18, 31,115, 47,116,117,118,119, /* CX */
- 26,120,121, 44,122,123, 32,SYM,124,125, 48,126, 33,127,128,129, /* DX */
- 130, 41, 43,131, 36,132, 25, 39, 18, 31,133, 47,134,135,136,137, /* EX */
- 26,138,139, 44,140,141, 32,SYM,142,143, 48,144, 33,145,146,SYM, /* FX */
+ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 35,SYM, 98,SYM,SYM,SYM,SYM, 99, /* AX */
+ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 35,SYM,100,SYM,SYM,SYM,SYM,101, /* BX */
+ 102,103,104, 25, 36,105,106,107, 18, 31,108,109,110,111,112,113, /* CX */
+ 23,114,115, 47, 40, 37, 32,SYM,116, 38,117,118, 33,119, 24,120, /* DX */
+ 121,122,123, 25, 36,124,125,126, 18, 31,127,128,129,130,131,132, /* EX */
+ 23,133,134, 47, 40, 37, 32,SYM,135, 38,136,137, 33,138, 24,SYM, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
@@ -109,147 +110,160 @@ static const unsigned char Iso_8859_16_CharToOrderMap[] =
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 12, 4, 3, /* 4X */
- 14, 30, 6, 8, 5, 11, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 12, 4, 3, /* 6X */
- 14, 30, 6, 8, 5, 11, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 11, 4, 3, /* 4X */
+ 14, 30, 6, 8, 5, 12, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 11, 4, 3, /* 6X */
+ 14, 30, 6, 8, 5, 12, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
- SYM,147,148, 40,SYM,SYM, 23,SYM, 23,SYM,149,SYM,150,SYM,151,152, /* AX */
- SYM,SYM, 18, 40, 24,SYM,SYM,SYM, 24, 18,153,SYM, 45, 45,154,155, /* BX */
- 46, 41, 43,156, 36, 25,157, 39, 35, 31, 42, 47,158,159,160,161, /* CX */
- 26,162,163, 44,164,165, 32,166,167,168, 48,169, 33,170,171,172, /* DX */
- 46, 41, 43,173, 36, 25,174, 39, 35, 31, 42, 47,175,176,177,178, /* EX */
- 26,179,180, 44,181,182, 32,183,184,185, 48,186, 33,187,188,189, /* FX */
+ SYM,139,140, 38,SYM,SYM, 23,SYM, 23,SYM,141,SYM,142,SYM,143,144, /* AX */
+ SYM,SYM, 18, 38, 24,SYM,SYM,SYM, 24, 18,145,SYM, 44, 44,146,147, /* BX */
+ 42, 39, 46,148, 36, 25,149, 43, 34, 31, 41, 45,150,151,152,153, /* CX */
+ 26,154,155, 47,156,157, 32,158,159,160, 48,161, 33,162,163,164, /* DX */
+ 42, 39, 46,165, 36, 25,166, 43, 34, 31, 41, 45,167,168,169,170, /* EX */
+ 26,171,172, 47,173,174, 32,175,176,177, 48,178, 33,179,180,181, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
-static const unsigned char Mac_Centraleurope_CharToOrderMap[] =
+static const unsigned char Windows_1250_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 12, 4, 3, /* 4X */
- 14, 30, 6, 8, 5, 11, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 12, 4, 3, /* 6X */
- 14, 30, 6, 8, 5, 11, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */
- 36,190,191, 31,192, 32, 33, 41,193, 18, 36, 18, 25, 25, 31,194, /* 8X */
- 195,196,197,198,199,200,201, 44,202,203, 32, 37, 48,204,205, 33, /* 9X */
- SYM,SYM,206,SYM,SYM,SYM,SYM,207,SYM,SYM,SYM,208,SYM,SYM,209,210, /* AX */
- 211,212,SYM,SYM,213,214,SYM,SYM, 40,215,216,217,218,219,220,221, /* BX */
- 222,223,SYM,SYM,224,225,SYM,SYM,SYM,SYM,SYM,226,227, 37,228, 38, /* CX */
- SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 38,229,230,231,SYM,SYM,232,233, /* DX */
- 234, 23,SYM,SYM, 23,235,236, 41,237,238,239, 24, 24,240, 44,241, /* EX */
- 242,243, 48,244,245,246,247,248,249,249,249,249, 40,249,249,SYM, /* FX */
+ SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 11, 4, 3, /* 4X */
+ 14, 30, 6, 8, 5, 12, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 11, 4, 3, /* 6X */
+ 14, 30, 6, 8, 5, 12, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM, 23,SYM,182,183, 24,184, /* 8X */
+ ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 23,SYM,185,186, 24,187, /* 9X */
+ SYM,SYM,SYM, 38,SYM,188,SYM,SYM,SYM,SYM,189,SYM,SYM,SYM,SYM,190, /* AX */
+ SYM,SYM,SYM, 38,SYM,SYM,SYM,SYM,SYM,191,192,SYM,193,SYM,194,195, /* BX */
+ 196, 39, 46,197, 36,198, 25, 43, 18, 31,199, 45,200,201,202,203, /* CX */
+ 26,204,205, 47,206,207, 32,SYM,208,209, 48,210, 33,211,212,213, /* DX */
+ 214, 39, 46,215, 36,216, 25, 43, 18, 31,217, 45,218,219,220,221, /* EX */
+ 26,222,223, 47,224,225, 32,SYM,226,227, 48,228, 33,229,230,SYM, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
-static const unsigned char Iso_8859_13_CharToOrderMap[] =
+static const unsigned char Ibm852_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 12, 4, 3, /* 4X */
- 14, 30, 6, 8, 5, 11, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 12, 4, 3, /* 6X */
- 14, 30, 6, 8, 5, 11, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */
- CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
- CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
- SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 34,SYM,249,SYM,SYM,SYM,SYM,249, /* AX */
- SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 34,SYM,249,SYM,SYM,SYM,SYM,249, /* BX */
- 249,249,249, 25, 36,249,249,249, 18, 31,249,249,249,249,249,249, /* CX */
- 23,249,249, 44, 38, 37, 32,SYM,249, 40,249,249, 33,249, 24,249, /* DX */
- 249,249,249, 25, 36,249,249,249, 18, 31,249,249,249,249,249,249, /* EX */
- 23,249,249, 44, 38, 37, 32,SYM,249, 40,249,249, 33,249, 24,SYM, /* FX */
+ SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 11, 4, 3, /* 4X */
+ 14, 30, 6, 8, 5, 12, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 11, 4, 3, /* 6X */
+ 14, 30, 6, 8, 5, 12, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ 43, 33, 31, 46, 36,231, 25, 43, 38, 45,232,233,234,235, 36, 25, /* 8X */
+ 31,236,237,238, 32,239,240,241,242, 32, 33,243,244, 38,SYM, 18, /* 9X */
+ 39,245, 47, 48,246,247, 24, 24,248,249,SYM,249, 18,249,SYM,SYM, /* AX */
+ SYM,SYM,SYM,SYM,SYM, 39, 46,249,249,SYM,SYM,SYM,SYM,249,249,SYM, /* BX */
+ SYM,SYM,SYM,SYM,SYM,SYM,249,249,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* CX */
+ 26, 26,249, 45,249,249,249,249,249,SYM,SYM,SYM,SYM,249,249,SYM, /* DX */
+ 47,249,249,249,249,249, 23, 23,249, 48,249,249,249,249,249,SYM, /* EX */
+ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,249,249,249,SYM,SYM, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
-static const unsigned char Ibm852_CharToOrderMap[] =
+static const unsigned char Mac_Centraleurope_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 12, 4, 3, /* 4X */
- 14, 30, 6, 8, 5, 11, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 12, 4, 3, /* 6X */
- 14, 30, 6, 8, 5, 11, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */
- 39, 33, 31, 43, 36,249, 25, 39, 40, 47,249,249,249,249, 36, 25, /* 8X */
- 31,249,249,249, 32,249,249,249,249, 32, 33,249,249, 40,SYM, 18, /* 9X */
- 41,249, 44, 48,249,249, 24, 24,249,249,SYM,249, 18,249,SYM,SYM, /* AX */
- SYM,SYM,SYM,SYM,SYM, 41, 43,249,249,SYM,SYM,SYM,SYM,249,249,SYM, /* BX */
- SYM,SYM,SYM,SYM,SYM,SYM,249,249,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* CX */
- 26, 26,249, 47,249,249,249,249,249,SYM,SYM,SYM,SYM,249,249,SYM, /* DX */
- 44,249,249,249,249,249, 23, 23,249, 48,249,249,249,249,249,SYM, /* EX */
- SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,249,249,249,SYM,SYM, /* FX */
+ SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 11, 4, 3, /* 4X */
+ 14, 30, 6, 8, 5, 12, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 11, 4, 3, /* 6X */
+ 14, 30, 6, 8, 5, 12, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ 36,249,249, 31,249, 32, 33, 39,249, 18, 36, 18, 25, 25, 31,249, /* 8X */
+ 249,249,249,249,249,249,249, 47,249,249, 32, 37, 48,249,249, 33, /* 9X */
+ SYM,SYM,249,SYM,SYM,SYM,SYM,249,SYM,SYM,SYM,249,SYM,SYM,249,249, /* AX */
+ 249,249,SYM,SYM,249,249,SYM,SYM, 38,249,249,249,249,249,249,249, /* BX */
+ 249,249,SYM,SYM,249,249,SYM,SYM,SYM,SYM,SYM,249,249, 37,249, 40, /* CX */
+ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 40,249,249,249,SYM,SYM,249,249, /* DX */
+ 249, 23,SYM,SYM, 23,249,249, 39,249,249,249, 24, 24,249, 47,249, /* EX */
+ 249,249, 48,249,249,249,249,249,249,249,249,249, 38,249,249,SYM, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
+static const int Unicode_Char_size = 62;
+static const unsigned int Unicode_CharOrder[] =
+{
+ 65, 0, 66, 19, 67, 20, 68, 15, 69, 2, 70, 22, 71, 17, 72, 21,
+ 73, 1, 74, 7, 75, 9, 76, 10, 77, 11, 78, 4, 79, 3, 80, 14,
+ 81, 30, 82, 6, 83, 8, 84, 5, 85, 12, 86, 13, 87, 28, 88, 29,
+ 89, 27, 90, 16, 97, 0, 98, 19, 99, 20, 100, 15, 101, 2,102, 22,
+ 103, 17, 104, 21, 105, 1, 106, 7, 107, 9, 108, 10, 109, 11,110, 4,
+ 111, 3, 112, 14, 113, 30, 114, 6, 115, 8, 116, 5, 117, 12,118, 13,
+ 119, 28, 120, 29, 121, 27, 122, 16, 262, 25, 263, 25, 268, 18,269, 18,
+ 272, 26, 273, 26, 352, 23, 353, 23, 381, 24, 382, 24,
+};
+
/* Model Table:
- * Total sequences: 712
- * First 512 sequences: 0.9989731099787131
- * Next 512 sequences (512-1024): 0.0010268900212868262
- * Rest: 3.7513395167998453e-17
+ * Total sequences: 725
+ * First 512 sequences: 0.9990568119867879
+ * Next 512 sequences (512-1024): 0.0009431880132121777
+ * Rest: -4.0440741033709315e-17
* Negative sequences: TODO
*/
static const PRUint8 CroatianLangModel[] =
{
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,2,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,2,2,2,0,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,0,3,3,2,0,0,0,0,3,2,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,0,2,2,2,0,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,2,3,3,3,0,0,0,0,3,2,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,0,0,
- 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,0,3,3,2,0,2,3,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,3,3,3,3,0,0,0,0,3,2,0,2,
- 3,3,3,3,3,3,3,3,3,0,3,3,3,3,2,2,0,3,3,0,3,2,0,3,0,2,0,2,3,0,0,
- 3,3,3,3,3,3,0,3,3,3,3,3,3,3,2,3,3,3,0,3,3,3,3,2,2,0,0,3,0,2,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,2,0,
- 3,3,3,3,3,2,3,3,3,3,3,3,3,2,3,3,0,3,2,3,3,2,3,0,0,0,0,2,3,0,0,
- 3,3,3,3,3,0,3,3,3,3,3,3,2,0,2,3,0,0,2,0,3,0,0,3,0,0,0,2,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,2,2,2,2,3,3,0,2,0,3,0,2,0,0,0,
- 3,3,3,3,3,3,3,3,3,0,3,3,3,3,0,3,3,3,0,3,2,2,3,0,3,0,0,2,3,2,2,
- 3,3,3,3,3,0,3,3,2,0,3,3,3,3,0,3,0,3,0,3,0,3,0,0,0,0,0,2,2,0,0,
- 3,3,3,3,3,2,3,2,2,0,3,3,3,3,2,3,3,2,0,0,0,3,2,0,0,0,0,3,2,0,0,
- 3,3,3,3,3,0,2,3,0,3,3,3,0,3,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,2,3,3,3,0,3,3,2,2,0,3,3,0,0,2,3,0,3,0,0,0,0,2,0,0,2,
- 3,3,3,3,2,3,3,3,3,3,3,3,3,3,0,3,0,2,0,0,0,3,0,0,0,0,0,2,0,0,3,
- 3,3,3,3,3,3,3,0,3,2,3,3,2,3,0,2,3,2,0,3,3,2,2,0,0,0,0,3,3,2,0,
- 3,3,3,3,3,3,3,0,3,2,3,3,2,0,2,2,0,2,0,0,0,0,3,0,0,0,0,0,0,0,0,
- 3,3,3,2,3,3,2,0,0,3,3,3,2,3,3,0,0,0,2,0,2,0,0,0,0,3,0,0,0,0,0,
- 3,3,3,3,3,0,0,2,0,0,2,3,0,0,0,3,0,0,0,3,0,0,0,0,0,0,2,0,0,0,0,
- 3,3,3,3,3,0,0,0,0,2,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,
- 3,3,3,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,2,3,3,3,2,2,2,3,0,3,3,0,0,0,2,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,2,3,2,2,2,0,0,3,0,0,0,0,0,0,0,2,2,3,2,0,0,0,0,2,2,0,0,
- 2,3,2,0,0,0,2,0,0,0,0,2,0,2,3,0,0,2,0,0,0,0,2,0,0,0,0,0,3,0,0,
- 0,3,2,0,0,0,2,0,0,0,0,3,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,
+ 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,2,3,3,3,2,2,3,0,0,0,0,0,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,3,3,3,3,0,0,0,0,3,2,0,3,
+ 3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,2,2,3,3,0,3,2,2,3,0,2,0,2,3,0,0,
+ 3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,3,0,3,3,2,3,0,0,0,0,3,2,2,0,
+ 3,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3,0,3,2,3,3,2,3,0,0,0,0,2,3,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,0,2,0,
+ 3,3,3,3,3,0,3,3,3,3,3,2,3,0,2,3,0,0,2,2,3,2,2,3,0,0,0,2,0,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,2,2,2,2,3,3,0,2,0,3,0,2,0,2,0,
+ 3,3,3,3,3,2,3,3,3,0,3,3,3,3,0,2,3,3,0,3,2,2,3,0,3,0,0,2,3,2,0,
+ 3,3,3,3,3,0,3,3,2,0,3,3,3,3,0,3,2,3,0,3,0,3,0,0,0,0,0,2,2,0,0,
+ 3,3,3,3,3,2,3,2,2,0,3,3,3,2,2,3,3,2,0,0,0,3,2,0,0,0,0,2,2,0,0,
+ 3,3,3,3,3,0,2,3,0,3,3,0,3,3,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,3,3,2,3,3,2,0,3,2,3,2,0,3,3,0,0,2,3,0,3,0,0,0,0,3,0,0,2,
+ 3,3,3,3,2,3,3,3,3,3,3,3,3,2,0,3,0,2,0,0,2,3,0,0,0,0,0,2,0,0,3,
+ 3,3,3,3,3,3,3,0,3,2,3,3,3,3,0,2,3,2,0,3,3,2,2,0,0,0,0,3,2,2,0,
+ 3,3,3,3,3,3,3,2,3,2,3,2,3,0,2,2,0,2,0,0,0,0,3,0,0,0,0,0,0,0,0,
+ 3,3,3,2,3,3,2,0,0,3,3,2,3,3,3,0,0,0,3,0,2,0,0,0,0,3,0,0,0,0,0,
+ 3,3,3,3,3,0,2,2,0,0,2,0,3,0,0,3,0,0,0,3,0,0,0,0,0,0,2,0,0,0,0,
+ 3,3,3,3,3,0,0,0,0,2,2,0,3,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,
+ 3,3,3,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,2,3,3,2,2,2,2,3,2,3,2,3,0,0,0,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,3,2,3,2,2,3,0,0,0,3,0,0,0,0,0,0,2,2,3,2,0,0,0,0,2,2,0,2,
+ 3,3,2,0,0,2,2,0,0,0,0,0,2,2,2,0,0,2,0,0,0,0,2,0,0,0,0,0,3,0,0,
+ 0,3,0,0,0,0,3,0,0,0,0,0,3,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
};
-const SequenceModel Windows_1250CroatianModel =
+const SequenceModel Iso_8859_2CroatianModel =
{
- Windows_1250_CharToOrderMap,
+ Iso_8859_2_CharToOrderMap,
CroatianLangModel,
31,
- (float)0.9989731099787131,
+ (float)0.9990568119867879,
PR_TRUE,
- "WINDOWS-1250",
+ "ISO-8859-2",
"hr"
};
-const SequenceModel Iso_8859_2CroatianModel =
+const SequenceModel Iso_8859_13CroatianModel =
{
- Iso_8859_2_CharToOrderMap,
+ Iso_8859_13_CharToOrderMap,
CroatianLangModel,
31,
- (float)0.9989731099787131,
+ (float)0.9990568119867879,
PR_TRUE,
- "ISO-8859-2",
+ "ISO-8859-13",
"hr"
};
@@ -258,41 +272,51 @@ const SequenceModel Iso_8859_16CroatianModel =
Iso_8859_16_CharToOrderMap,
CroatianLangModel,
31,
- (float)0.9989731099787131,
+ (float)0.9990568119867879,
PR_TRUE,
"ISO-8859-16",
"hr"
};
-const SequenceModel Mac_CentraleuropeCroatianModel =
+const SequenceModel Windows_1250CroatianModel =
{
- Mac_Centraleurope_CharToOrderMap,
+ Windows_1250_CharToOrderMap,
CroatianLangModel,
31,
- (float)0.9989731099787131,
+ (float)0.9990568119867879,
PR_TRUE,
- "MAC-CENTRALEUROPE",
+ "WINDOWS-1250",
"hr"
};
-const SequenceModel Iso_8859_13CroatianModel =
+const SequenceModel Ibm852CroatianModel =
{
- Iso_8859_13_CharToOrderMap,
+ Ibm852_CharToOrderMap,
CroatianLangModel,
31,
- (float)0.9989731099787131,
+ (float)0.9990568119867879,
PR_TRUE,
- "ISO-8859-13",
+ "IBM852",
"hr"
};
-const SequenceModel Ibm852CroatianModel =
+const SequenceModel Mac_CentraleuropeCroatianModel =
{
- Ibm852_CharToOrderMap,
+ Mac_Centraleurope_CharToOrderMap,
CroatianLangModel,
31,
- (float)0.9989731099787131,
+ (float)0.9990568119867879,
PR_TRUE,
- "IBM852",
+ "MAC-CENTRALEUROPE",
"hr"
};
+
+const LanguageModel CroatianModel =
+{
+ "hr",
+ Unicode_CharOrder,
+ 62,
+ CroatianLangModel,
+ 31,
+ (float)0.9990568119867879,
+};
diff --git a/src/LangModels/LangCzechModel.cpp b/src/LangModels/LangCzechModel.cpp
index c12c07e..75d9dea 100644
--- a/src/LangModels/LangCzechModel.cpp
+++ b/src/LangModels/LangCzechModel.cpp
@@ -36,12 +36,13 @@
* ***** END LICENSE BLOCK ***** */
#include "../nsSBCharSetProber.h"
+#include "../nsLanguageDetector.h"
/********* Language model for: Czech *********/
/**
* Generated by BuildLangModel.py
- * On: 2016-09-21 03:28:11.733089
+ * On: 2021-03-16 18:50:25.564246
**/
/* Character Mapping Table:
@@ -61,45 +62,45 @@
* ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1
* even though they are both used for French. Same for the euro sign.
*/
-static const unsigned char Windows_1250_CharToOrderMap[] =
+static const unsigned char Iso_8859_2_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 3, 22, 14, 15, 1, 30, 32, 17, 4, 21, 12, 10, 16, 2, 0, /* 4X */
- 8, 40, 9, 6, 5, 13, 7, 36, 34, 20, 19,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 3, 22, 14, 15, 1, 30, 32, 17, 4, 21, 12, 10, 16, 2, 0, /* 6X */
- 8, 40, 9, 6, 5, 13, 7, 36, 34, 20, 19,SYM,SYM,SYM,SYM,CTR, /* 7X */
- SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM, 29,SYM, 46, 38, 26, 47, /* 8X */
- ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 29,SYM, 46, 38, 26, 48, /* 9X */
- SYM,SYM,SYM, 49,SYM, 50,SYM,SYM,SYM,SYM, 51,SYM,SYM,SYM,SYM, 52, /* AX */
- SYM,SYM,SYM, 53,SYM,SYM,SYM,SYM,SYM, 54, 55,SYM, 45,SYM, 45, 56, /* BX */
- 57, 18, 58, 59, 42, 60, 61, 62, 25, 24, 63, 64, 23, 11, 65, 39, /* CX */
- 66, 67, 35, 37, 68, 69, 41,SYM, 27, 31, 33, 70, 43, 28, 71, 72, /* DX */
- 73, 18, 74, 75, 42, 76, 77, 78, 25, 24, 79, 80, 23, 11, 81, 39, /* EX */
- 82, 83, 35, 37, 84, 85, 41,SYM, 27, 31, 33, 86, 43, 28, 87,SYM, /* FX */
+ SYM, 3, 22, 14, 13, 1, 31, 30, 17, 4, 21, 11, 10, 16, 2, 0, /* 4X */
+ 9, 39, 8, 6, 5, 15, 7, 35, 34, 20, 18,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 3, 22, 14, 13, 1, 31, 30, 17, 4, 21, 11, 10, 16, 2, 0, /* 6X */
+ 9, 39, 8, 6, 5, 15, 7, 35, 34, 20, 18,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
+ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
+ SYM, 48,SYM, 49,SYM, 44, 45,SYM,SYM, 29, 50, 38, 51,SYM, 26, 52, /* AX */
+ SYM, 53,SYM, 54,SYM, 44, 45,SYM,SYM, 29, 55, 38, 56,SYM, 26, 57, /* BX */
+ 58, 19, 59, 60, 41, 61, 62, 63, 25, 24, 64, 65, 23, 12, 66, 40, /* CX */
+ 67, 68, 36, 37, 69, 70, 42,SYM, 27, 32, 33, 71, 43, 28, 72, 46, /* DX */
+ 73, 19, 74, 75, 41, 76, 77, 78, 25, 24, 79, 80, 23, 12, 81, 40, /* EX */
+ 82, 83, 36, 37, 84, 85, 42,SYM, 27, 32, 33, 86, 43, 28, 87,SYM, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
-static const unsigned char Mac_Centraleurope_CharToOrderMap[] =
+static const unsigned char Windows_1250_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 3, 22, 14, 15, 1, 30, 32, 17, 4, 21, 12, 10, 16, 2, 0, /* 4X */
- 8, 40, 9, 6, 5, 13, 7, 36, 34, 20, 19,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 3, 22, 14, 15, 1, 30, 32, 17, 4, 21, 12, 10, 16, 2, 0, /* 6X */
- 8, 40, 9, 6, 5, 13, 7, 36, 34, 20, 19,SYM,SYM,SYM,SYM,CTR, /* 7X */
- 42, 88, 89, 24, 90, 41, 43, 18, 91, 25, 42, 25, 92, 93, 24, 94, /* 8X */
- 95, 39, 11, 39, 44, 44, 96, 37, 97, 98, 41, 99, 33, 23, 23, 43, /* 9X */
- SYM,SYM,100,SYM,SYM,SYM,SYM,101,SYM,SYM,SYM,102,SYM,SYM,103,104, /* AX */
- 105,106,SYM,SYM,107,108,SYM,SYM,109,110,111, 45, 45,112,113,114, /* BX */
- 115,116,SYM,SYM,117, 35,SYM,SYM,SYM,SYM,SYM, 35,118,119,120,121, /* CX */
- SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,122,123,124, 27,SYM,SYM, 27,125, /* DX */
- 126, 29,SYM,SYM, 29, 46, 46, 18, 38, 38, 11, 26, 26,127, 37,128, /* EX */
- 129, 31, 33, 31,130,131,132,133, 28, 28,134,135,136,137,138,SYM, /* FX */
+ SYM, 3, 22, 14, 13, 1, 31, 30, 17, 4, 21, 11, 10, 16, 2, 0, /* 4X */
+ 9, 39, 8, 6, 5, 15, 7, 35, 34, 20, 18,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 3, 22, 14, 13, 1, 31, 30, 17, 4, 21, 11, 10, 16, 2, 0, /* 6X */
+ 9, 39, 8, 6, 5, 15, 7, 35, 34, 20, 18,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM, 29,SYM, 45, 38, 26, 88, /* 8X */
+ ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 29,SYM, 45, 38, 26, 89, /* 9X */
+ SYM,SYM,SYM, 90,SYM, 91,SYM,SYM,SYM,SYM, 92,SYM,SYM,SYM,SYM, 93, /* AX */
+ SYM,SYM,SYM, 94,SYM,SYM,SYM,SYM,SYM, 95, 96,SYM, 44,SYM, 44, 97, /* BX */
+ 98, 19, 99,100, 41,101,102,103, 25, 24,104,105, 23, 12,106, 40, /* CX */
+ 107,108, 36, 37,109,110, 42,SYM, 27, 32, 33,111, 43, 28,112, 46, /* DX */
+ 113, 19,114,115, 41,116,117,118, 25, 24,119,120, 23, 12,121, 40, /* EX */
+ 122,123, 36, 37,124,125, 42,SYM, 27, 32, 33,126, 43, 28,127,SYM, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
@@ -109,156 +110,172 @@ static const unsigned char Ibm852_CharToOrderMap[] =
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 3, 22, 14, 15, 1, 30, 32, 17, 4, 21, 12, 10, 16, 2, 0, /* 4X */
- 8, 40, 9, 6, 5, 13, 7, 36, 34, 20, 19,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 3, 22, 14, 15, 1, 30, 32, 17, 4, 21, 12, 10, 16, 2, 0, /* 6X */
- 8, 40, 9, 6, 5, 13, 7, 36, 34, 20, 19,SYM,SYM,SYM,SYM,CTR, /* 7X */
- 139, 43, 24,140, 42, 31,141,142,143,144,145,146,147,148, 42,149, /* 8X */
- 24,150,151,152, 41, 45, 45, 46, 46, 41, 43, 38, 38,153,SYM, 25, /* 9X */
- 18, 11, 37, 33,154,155, 26, 26,156,157,SYM,158, 25,159,SYM,SYM, /* AX */
- SYM,SYM,SYM,SYM,SYM, 18,160, 23,161,SYM,SYM,SYM,SYM,162,163,SYM, /* BX */
- SYM,SYM,SYM,SYM,SYM,SYM,164,165,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* CX */
- 166,167, 39,168, 39, 35, 11,169, 23,SYM,SYM,SYM,SYM,170, 31,SYM, /* DX */
- 37,171,172,173,174, 35, 29, 29,175, 33,176,177, 28, 28,178,SYM, /* EX */
- SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,179, 27, 27,SYM,SYM, /* FX */
+ SYM, 3, 22, 14, 13, 1, 31, 30, 17, 4, 21, 11, 10, 16, 2, 0, /* 4X */
+ 9, 39, 8, 6, 5, 15, 7, 35, 34, 20, 18,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 3, 22, 14, 13, 1, 31, 30, 17, 4, 21, 11, 10, 16, 2, 0, /* 6X */
+ 9, 39, 8, 6, 5, 15, 7, 35, 34, 20, 18,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ 128, 43, 24,129, 41, 32,130,131,132,133,134,135,136,137, 41,138, /* 8X */
+ 24,139,140,141, 42, 44, 44, 45, 45, 42, 43, 38, 38,142,SYM, 25, /* 9X */
+ 19, 12, 37, 33,143,144, 26, 26,145,146,SYM,147, 25,148,SYM,SYM, /* AX */
+ SYM,SYM,SYM,SYM,SYM, 19,149, 23,150,SYM,SYM,SYM,SYM,151,152,SYM, /* BX */
+ SYM,SYM,SYM,SYM,SYM,SYM,153,154,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* CX */
+ 155,156, 40,157, 40, 36, 12,158, 23,SYM,SYM,SYM,SYM,159, 32,SYM, /* DX */
+ 37, 46,160,161,162, 36, 29, 29,163, 33,164,165, 28, 28,166,SYM, /* EX */
+ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,167, 27, 27,SYM,SYM, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
-static const unsigned char Iso_8859_2_CharToOrderMap[] =
+static const unsigned char Mac_Centraleurope_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 3, 22, 14, 15, 1, 30, 32, 17, 4, 21, 12, 10, 16, 2, 0, /* 4X */
- 8, 40, 9, 6, 5, 13, 7, 36, 34, 20, 19,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 3, 22, 14, 15, 1, 30, 32, 17, 4, 21, 12, 10, 16, 2, 0, /* 6X */
- 8, 40, 9, 6, 5, 13, 7, 36, 34, 20, 19,SYM,SYM,SYM,SYM,CTR, /* 7X */
- CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
- CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
- SYM,180,SYM,181,SYM, 45, 46,SYM,SYM, 29,182, 38,183,SYM, 26,184, /* AX */
- SYM,185,SYM,186,SYM, 45, 46,SYM,SYM, 29,187, 38,188,SYM, 26,189, /* BX */
- 190, 18,191,192, 42,193,194,195, 25, 24,196,197, 23, 11,198, 39, /* CX */
- 199,200, 35, 37,201,202, 41,SYM, 27, 31, 33,203, 43, 28,204,205, /* DX */
- 206, 18,207,208, 42,209,210,211, 25, 24,212,213, 23, 11,214, 39, /* EX */
- 215,216, 35, 37,217,218, 41,SYM, 27, 31, 33,219, 43, 28,220,SYM, /* FX */
+ SYM, 3, 22, 14, 13, 1, 31, 30, 17, 4, 21, 11, 10, 16, 2, 0, /* 4X */
+ 9, 39, 8, 6, 5, 15, 7, 35, 34, 20, 18,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 3, 22, 14, 13, 1, 31, 30, 17, 4, 21, 11, 10, 16, 2, 0, /* 6X */
+ 9, 39, 8, 6, 5, 15, 7, 35, 34, 20, 18,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ 41,168,169, 24,170, 42, 43, 19,171, 25, 41, 25,172,173, 24,174, /* 8X */
+ 175, 40, 12, 40, 47, 47,176, 37,177,178, 42,179, 33, 23, 23, 43, /* 9X */
+ SYM,SYM,180,SYM,SYM,SYM,SYM, 46,SYM,SYM,SYM,181,SYM,SYM,182,183, /* AX */
+ 184,185,SYM,SYM,186,187,SYM,SYM,188,189,190, 44, 44,191,192,193, /* BX */
+ 194,195,SYM,SYM,196, 36,SYM,SYM,SYM,SYM,SYM, 36,197,198,199,200, /* CX */
+ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,201,202,203, 27,SYM,SYM, 27,204, /* DX */
+ 205, 29,SYM,SYM, 29, 45, 45, 19, 38, 38, 12, 26, 26,206, 37,207, /* EX */
+ 208, 32, 33, 32,209,210,211,212, 28, 28,213,214,215,216,217,SYM, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
+static const int Unicode_Char_size = 82;
+static const unsigned int Unicode_CharOrder[] =
+{
+ 65, 3, 66, 22, 67, 14, 68, 13, 69, 1, 70, 31, 71, 30, 72, 17,
+ 73, 4, 74, 21, 75, 11, 76, 10, 77, 16, 78, 2, 79, 0, 80, 9,
+ 81, 39, 82, 8, 83, 6, 84, 5, 85, 15, 86, 7, 87, 35, 88, 34,
+ 89, 20, 90, 18, 97, 3, 98, 22, 99, 14, 100, 13, 101, 1,102, 31,
+ 103, 30, 104, 17, 105, 4, 106, 21, 107, 11, 108, 10, 109, 16,110, 2,
+ 111, 0, 112, 9, 113, 39, 114, 8, 115, 6, 116, 5, 117, 15,118, 7,
+ 119, 35, 120, 34, 121, 20, 122, 18, 193, 19, 201, 24, 205, 12,211, 37,
+ 218, 33, 221, 28, 225, 19, 233, 24, 237, 12, 243, 37, 250, 33,253, 28,
+ 268, 25, 269, 25, 270, 40, 271, 40, 282, 23, 283, 23, 327, 36,328, 36,
+ 344, 27, 345, 27, 352, 29, 353, 29, 356, 38, 357, 38, 366, 32,367, 32,
+ 381, 26, 382, 26,
+};
+
/* Model Table:
- * Total sequences: 1025
- * First 512 sequences: 0.9786035192432675
- * Next 512 sequences (512-1024): 0.02139445610866691
- * Rest: 2.0246480655940202e-06
+ * Total sequences: 1037
+ * First 512 sequences: 0.9751874547460189
+ * Next 512 sequences (512-1024): 0.024780958582584566
+ * Rest: 3.158667139656693e-05
* Negative sequences: TODO
*/
static const PRUint8 CzechLangModel[] =
{
- 2,2,3,2,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,3,
- 2,3,3,0,0,3,3,3,0,2,3,0,3,0,3,2,2,0,2,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,2,3,
- 2,3,3,0,0,3,3,3,0,3,3,2,3,2,3,2,2,2,2,2,2,
- 3,3,3,3,3,3,3,2,0,2,3,3,3,3,3,3,2,3,3,3,
- 3,2,2,3,3,2,2,0,3,2,3,3,3,0,2,0,0,2,0,0,2,
- 3,3,3,2,2,3,3,3,3,3,3,0,3,3,3,3,3,3,0,3,
- 3,3,3,0,0,3,3,3,0,3,3,0,3,0,3,2,2,0,2,2,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,
- 0,2,3,0,2,3,3,2,0,3,3,0,3,0,2,2,2,2,2,0,2,
- 3,3,3,3,3,2,2,3,2,3,3,3,3,3,2,2,2,3,3,3,
- 3,2,2,3,3,2,0,3,3,3,0,3,2,0,0,2,2,2,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,
- 3,2,3,0,2,2,0,0,2,0,2,2,2,2,0,2,2,0,2,0,0,
- 3,3,3,3,3,2,2,0,2,3,3,3,3,3,2,3,0,2,3,3,
- 3,2,2,3,3,2,2,2,3,3,0,3,0,0,0,2,0,2,0,0,0,
- 3,3,3,3,3,3,3,0,2,3,3,3,2,3,2,2,2,2,3,0,
- 3,2,2,3,2,2,0,3,2,2,2,3,2,0,2,2,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,2,2,2,3,3,2,3,3,2,3,3,
- 3,0,3,0,3,3,2,0,3,2,2,3,3,0,0,2,2,2,2,2,2,
- 3,3,3,3,3,3,2,2,2,2,2,3,3,3,2,2,2,2,3,3,
- 3,0,2,0,3,2,2,0,3,3,2,3,2,0,0,2,0,2,0,0,0,
- 0,2,3,0,2,3,3,3,3,3,3,2,3,0,3,2,3,3,0,3,
- 0,3,2,0,0,3,3,2,0,2,0,0,2,0,0,0,0,0,2,0,0,
- 3,3,3,3,3,3,2,3,0,3,3,0,2,3,3,3,2,2,3,2,
- 3,2,3,0,3,2,2,2,3,0,2,3,2,0,0,0,0,2,0,0,0,
- 2,2,3,3,3,3,3,3,3,3,3,0,3,2,3,3,3,3,3,3,
- 2,3,3,0,0,3,3,2,0,3,2,0,2,0,2,2,2,0,2,2,0,
- 3,3,3,3,3,3,2,2,2,2,2,3,3,2,2,3,2,3,2,2,
- 2,0,2,0,2,0,0,0,0,0,2,2,0,0,0,2,0,0,0,0,2,
- 3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,2,3,2,3,2,
- 3,0,2,3,3,2,2,2,2,2,2,3,2,0,2,2,2,0,0,0,0,
- 3,3,3,3,3,2,2,0,3,3,3,3,2,3,2,2,2,2,3,2,
- 3,2,3,3,3,2,3,2,2,2,2,3,2,0,0,2,0,2,0,0,0,
- 3,3,3,3,3,3,2,3,2,3,3,2,2,3,2,2,2,0,3,0,
- 3,2,2,0,2,2,2,2,3,0,2,2,0,0,0,0,2,2,2,0,0,
- 0,0,3,0,0,3,3,3,2,3,3,0,3,0,3,3,3,3,0,3,
- 0,2,2,0,0,2,3,2,0,3,2,0,0,0,0,2,0,0,0,2,0,
- 3,3,3,3,3,3,2,3,3,2,3,3,3,3,2,3,3,3,3,2,
- 3,2,2,0,2,2,0,2,2,2,2,2,0,2,0,2,0,2,0,0,0,
- 2,2,3,2,2,3,3,3,3,2,3,2,3,2,3,2,3,3,0,3,
- 0,2,3,0,0,2,3,2,0,3,2,0,2,2,2,0,0,0,2,0,0,
- 2,3,3,3,3,2,3,2,2,2,2,3,2,2,2,2,3,2,2,2,
- 0,2,2,0,0,2,0,0,0,3,2,2,0,2,0,2,0,2,0,2,0,
- 3,3,3,3,3,3,3,3,0,3,3,3,2,3,2,2,2,2,3,2,
- 3,3,2,3,2,2,0,2,3,2,0,2,0,0,0,2,0,2,0,0,0,
- 0,0,3,2,0,3,3,2,3,3,3,0,3,0,3,3,2,3,0,2,
- 0,3,0,0,0,2,3,3,0,3,0,0,0,0,0,2,0,0,2,2,0,
- 2,0,3,0,0,3,2,2,2,2,2,0,3,0,0,2,3,3,0,3,
+ 2,2,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,
+ 2,3,3,0,0,3,3,3,0,2,3,3,0,0,3,2,2,0,2,0,0,
+ 3,2,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,2,
+ 2,3,3,0,0,3,3,3,0,3,3,3,2,2,3,2,2,1,2,2,2,
+ 3,3,3,3,3,3,3,2,2,2,2,3,3,3,3,3,2,2,3,3,
+ 3,2,2,3,3,3,2,0,3,2,3,3,3,0,2,1,0,2,0,2,0,
+ 2,2,3,2,2,3,3,3,3,3,3,3,0,3,3,3,3,3,3,0,
+ 3,3,3,0,0,3,3,3,0,3,3,3,0,0,2,2,2,0,2,0,1,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,
+ 2,2,3,0,2,3,3,2,0,3,3,3,0,0,2,2,2,2,2,2,0,
+ 3,3,3,3,3,2,3,3,3,2,3,3,3,2,2,3,2,3,3,3,
+ 3,2,2,3,3,1,0,3,3,3,2,2,3,0,0,2,2,2,0,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,2,3,
+ 3,2,3,0,2,2,0,0,1,0,2,2,2,2,0,2,0,0,2,0,0,
+ 3,3,3,3,3,2,3,0,3,2,3,3,3,3,2,3,0,2,3,3,
+ 3,2,2,3,3,2,2,2,3,3,0,0,3,0,0,0,2,2,0,0,0,
+ 3,3,3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,2,3,3,
+ 3,0,3,0,3,3,2,0,3,3,3,2,3,0,2,2,2,2,0,2,0,
+ 3,3,3,3,3,3,3,2,3,2,3,2,3,2,2,3,2,2,0,3,
+ 2,2,2,3,2,2,0,3,2,2,0,0,3,0,0,0,2,0,0,0,0,
+ 3,3,3,3,3,3,3,2,2,2,3,3,3,2,2,3,3,2,3,3,
+ 3,0,2,0,3,2,2,0,3,3,2,2,3,0,0,0,2,2,0,0,0,
+ 3,3,3,3,3,3,2,3,3,2,3,2,0,3,3,3,2,2,2,3,
+ 3,2,2,0,3,2,2,2,3,0,2,2,3,0,0,0,0,2,0,0,0,
+ 0,2,3,2,2,3,3,3,3,3,3,3,0,3,3,0,3,3,3,0,
+ 0,3,2,0,0,3,3,2,0,2,2,0,0,0,0,0,0,0,2,0,0,
+ 3,3,3,3,3,2,3,3,3,3,3,3,3,2,3,3,3,2,2,3,
+ 3,2,2,3,3,2,2,2,2,2,2,2,3,0,2,2,2,0,0,0,0,
+ 3,3,3,3,3,3,2,2,2,2,2,3,3,3,2,2,2,3,2,2,
+ 2,0,2,0,2,0,0,0,0,0,0,2,2,0,0,0,2,0,0,2,0,
+ 2,2,3,3,2,3,3,3,3,3,3,3,0,3,3,2,3,3,3,3,
+ 2,3,3,0,0,3,3,2,0,3,2,2,0,0,2,2,2,0,2,0,2,
+ 3,3,3,3,3,2,2,0,3,3,3,2,3,2,2,3,2,2,2,3,
+ 3,1,3,3,3,2,3,2,2,2,2,2,3,0,0,0,0,2,0,0,0,
+ 3,3,3,3,3,3,2,3,3,2,3,2,2,2,2,3,2,0,0,3,
+ 3,2,2,0,2,2,2,2,3,0,0,2,2,2,0,2,0,2,2,0,0,
+ 3,3,3,3,3,3,2,3,2,3,3,3,3,3,2,3,3,3,2,3,
+ 3,2,2,0,2,2,0,2,2,2,0,0,2,2,0,2,2,2,0,0,0,
+ 0,0,3,2,0,3,3,3,3,3,3,3,0,3,3,0,3,3,3,0,
+ 0,2,2,0,0,2,3,2,0,3,0,2,0,0,0,0,2,0,0,0,0,
+ 2,2,3,2,2,3,3,3,2,3,3,3,2,2,3,2,3,3,3,0,
+ 0,2,3,0,0,2,3,2,0,3,2,1,0,2,0,0,0,0,2,0,0,
+ 2,3,3,3,3,2,3,2,2,2,2,2,3,2,2,2,3,2,2,3,
+ 0,2,2,0,0,2,0,0,0,3,0,2,2,2,0,0,2,2,0,0,2,
+ 3,3,3,3,3,3,3,2,3,0,3,2,3,2,2,3,2,2,2,3,
+ 3,3,2,3,2,2,0,2,3,2,0,0,2,2,0,0,2,2,0,0,0,
+ 0,0,3,2,0,3,3,2,3,3,3,3,0,3,3,0,3,3,2,0,
+ 0,3,2,0,0,2,3,3,0,2,0,0,0,0,0,0,2,0,2,0,2,
+ 2,0,3,0,0,3,2,2,2,2,2,3,0,2,0,0,3,3,2,0,
0,0,0,0,0,3,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,2,0,0,2,3,3,3,3,2,0,0,0,3,0,
- 0,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
- 3,3,3,2,3,2,2,0,0,2,2,3,2,2,2,3,2,0,3,0,
- 0,0,2,0,0,0,0,0,0,3,0,2,0,0,0,2,0,0,0,2,0,
- 2,3,2,3,3,0,2,0,0,0,0,3,2,2,0,0,0,0,2,2,
- 0,0,2,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
- 0,0,2,0,0,3,3,3,2,3,2,0,2,2,3,2,3,2,0,3,
- 0,2,3,0,0,2,2,2,0,3,2,0,0,0,0,0,0,0,0,0,0,
- 2,3,3,3,3,3,2,2,2,0,3,3,3,3,0,0,0,0,2,0,
+ 3,3,3,3,3,3,2,0,2,0,3,3,3,0,2,3,0,0,0,3,
+ 0,0,3,0,0,0,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0,
+ 3,3,3,2,3,2,2,0,2,0,2,2,3,3,2,2,2,0,0,3,
+ 0,0,2,0,0,0,0,0,0,2,0,0,2,0,0,0,2,0,0,0,2,
+ 2,3,2,3,3,0,2,0,0,0,0,2,3,0,0,2,0,0,2,2,
+ 0,0,2,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
+ 0,0,2,0,0,3,3,3,2,2,2,2,0,2,3,2,3,2,3,0,
+ 0,2,3,0,0,2,2,2,0,3,0,1,0,0,0,0,0,0,0,0,0,
+ 2,3,3,3,3,3,2,2,0,2,3,3,3,0,0,3,0,0,0,2,
0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,2,0,0,
- 3,3,2,3,3,2,2,0,2,3,3,2,0,3,0,2,2,0,2,2,
- 3,0,0,0,2,0,0,0,2,0,2,2,0,2,0,0,0,2,0,0,0,
- 0,0,2,2,0,0,3,3,0,2,2,0,2,0,2,2,3,2,0,3,
- 0,2,2,0,0,2,3,2,0,0,0,0,0,0,0,2,0,0,0,0,0,
- 3,3,3,3,3,2,2,2,2,3,3,0,0,2,2,2,2,2,2,0,
+ 3,3,3,3,3,2,2,2,3,2,3,1,0,2,2,3,2,2,0,2,
2,0,0,0,2,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,
- 0,0,2,0,0,2,3,2,2,2,2,0,2,0,2,2,2,2,0,3,
- 0,2,2,0,0,3,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,
- 2,2,2,2,3,3,0,0,3,2,2,2,2,2,2,2,2,0,2,0,
- 2,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,2,0,0,0,0,
- 2,0,0,2,0,0,2,0,0,0,0,0,2,3,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
- 2,2,2,2,3,0,2,0,0,0,2,0,2,2,2,0,0,2,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,
- 0,0,2,0,0,0,2,0,0,0,2,0,0,0,0,2,2,0,0,3,
- 0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
- 2,0,0,2,0,0,0,0,0,0,0,0,2,2,0,0,0,0,2,0,
+ 3,3,2,3,3,2,2,0,3,2,3,0,1,2,2,3,2,0,2,2,
+ 3,2,2,0,2,0,0,0,2,0,2,2,2,2,0,0,0,2,0,0,0,
+ 0,0,2,0,0,0,3,3,2,0,2,2,0,2,2,0,3,2,3,0,
+ 0,2,2,0,0,2,3,2,0,0,0,0,0,0,0,0,1,0,0,0,0,
+ 0,0,2,0,0,2,3,2,2,2,2,2,0,2,2,0,2,2,3,0,
+ 0,2,2,0,0,3,2,2,0,0,0,2,0,0,0,0,0,0,0,0,0,
+ 2,2,2,2,3,3,0,0,2,2,2,2,2,2,2,2,2,0,0,2,
+ 2,0,0,0,0,0,0,0,0,0,0,0,2,0,2,2,0,0,0,0,0,
+ 2,2,2,2,3,0,2,0,2,0,0,2,0,0,2,2,0,2,0,0,
+ 0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,
+ 2,0,0,0,0,0,2,0,0,0,0,2,0,0,0,3,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
+ 0,0,2,0,0,2,2,0,0,0,2,0,0,2,0,0,2,0,3,0,
+ 0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,
+ 2,0,0,2,0,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 2,0,0,2,0,2,0,0,0,0,0,0,0,2,0,0,2,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,
+ 2,0,0,2,0,2,0,0,0,0,0,0,0,0,0,2,2,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
};
-const SequenceModel Windows_1250CzechModel =
+const SequenceModel Iso_8859_2CzechModel =
{
- Windows_1250_CharToOrderMap,
+ Iso_8859_2_CharToOrderMap,
CzechLangModel,
41,
- (float)0.9786035192432675,
+ (float)0.9751874547460189,
PR_TRUE,
- "WINDOWS-1250",
+ "ISO-8859-2",
"cs"
};
-const SequenceModel Mac_CentraleuropeCzechModel =
+const SequenceModel Windows_1250CzechModel =
{
- Mac_Centraleurope_CharToOrderMap,
+ Windows_1250_CharToOrderMap,
CzechLangModel,
41,
- (float)0.9786035192432675,
+ (float)0.9751874547460189,
PR_TRUE,
- "MAC-CENTRALEUROPE",
+ "WINDOWS-1250",
"cs"
};
@@ -267,19 +284,29 @@ const SequenceModel Ibm852CzechModel =
Ibm852_CharToOrderMap,
CzechLangModel,
41,
- (float)0.9786035192432675,
+ (float)0.9751874547460189,
PR_TRUE,
"IBM852",
"cs"
};
-const SequenceModel Iso_8859_2CzechModel =
+const SequenceModel Mac_CentraleuropeCzechModel =
{
- Iso_8859_2_CharToOrderMap,
+ Mac_Centraleurope_CharToOrderMap,
CzechLangModel,
41,
- (float)0.9786035192432675,
+ (float)0.9751874547460189,
PR_TRUE,
- "ISO-8859-2",
+ "MAC-CENTRALEUROPE",
"cs"
};
+
+const LanguageModel CzechModel =
+{
+ "cs",
+ Unicode_CharOrder,
+ 82,
+ CzechLangModel,
+ 41,
+ (float)0.9751874547460189,
+};
diff --git a/src/LangModels/LangEsperantoModel.cpp b/src/LangModels/LangEsperantoModel.cpp
index 1d55ec7..e0b8fed 100644
--- a/src/LangModels/LangEsperantoModel.cpp
+++ b/src/LangModels/LangEsperantoModel.cpp
@@ -36,12 +36,13 @@
* ***** END LICENSE BLOCK ***** */
#include "../nsSBCharSetProber.h"
+#include "../nsLanguageDetector.h"
/********* Language model for: Esperanto *********/
/**
* Generated by BuildLangModel.py
- * On: 2015-12-04 01:27:38.177516
+ * On: 2021-03-16 18:54:42.163514
**/
/* Character Mapping Table:
@@ -67,66 +68,76 @@ static const unsigned char Iso_8859_3_CharToOrderMap[] =
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 0, 18, 17, 10, 2, 19, 15, 21, 3, 11, 9, 7, 13, 4, 1, /* 4X */
- 14, 32, 5, 8, 6, 12, 16, 27, 33, 25, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 0, 18, 17, 10, 2, 19, 15, 21, 3, 11, 9, 7, 13, 4, 1, /* 6X */
- 14, 32, 5, 8, 6, 12, 16, 27, 33, 25, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ SYM, 0, 18, 17, 10, 2, 19, 15, 20, 3, 11, 9, 7, 13, 4, 1, /* 4X */
+ 14, 34, 5, 8, 6, 12, 16, 25, 33, 26, 21,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 0, 18, 17, 10, 2, 19, 15, 20, 3, 11, 9, 7, 13, 4, 1, /* 6X */
+ 14, 34, 5, 8, 6, 12, 16, 25, 33, 26, 21,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
- SYM, 56,SYM,SYM,SYM,ILL, 34,SYM,SYM, 57, 53, 58, 28,SYM,ILL, 40, /* AX */
- SYM, 59,SYM,SYM,SYM,SYM, 34,SYM,SYM, 60, 53, 61, 28,SYM,ILL, 40, /* BX */
- 44, 29, 46,ILL, 43, 62, 24, 38, 41, 31, 48, 50, 54, 35, 49, 52, /* CX */
- ILL, 42, 63, 30, 47, 64, 36,SYM, 22, 51, 39, 55, 37, 23, 26, 45, /* DX */
- 44, 29, 46,ILL, 43, 65, 24, 38, 41, 31, 48, 50, 54, 35, 49, 52, /* EX */
- ILL, 42, 66, 30, 47, 67, 36,SYM, 22, 51, 39, 55, 37, 23, 26,SYM, /* FX */
+ SYM, 55,SYM,SYM,SYM,ILL, 31,SYM,SYM, 56, 51, 57, 28,SYM,ILL, 41, /* AX */
+ SYM, 58,SYM,SYM,SYM,SYM, 31,SYM,SYM, 53, 51, 59, 28,SYM,ILL, 41, /* BX */
+ 46, 29, 50,ILL, 39, 60, 24, 40, 38, 30, 48, 49, 61, 36, 47, 54, /* CX */
+ ILL, 42, 52, 32, 45, 62, 35,SYM, 22, 63, 44, 64, 37, 23, 27, 43, /* DX */
+ 46, 29, 50,ILL, 39, 65, 24, 40, 38, 30, 48, 49, 66, 36, 47, 54, /* EX */
+ ILL, 42, 52, 32, 45, 67, 35,SYM, 22, 68, 44, 69, 37, 23, 27,SYM, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
+static const int Unicode_Char_size = 64;
+static const unsigned int Unicode_CharOrder[] =
+{
+ 65, 0, 66, 18, 67, 17, 68, 10, 69, 2, 70, 19, 71, 15, 72, 20,
+ 73, 3, 74, 11, 75, 9, 76, 7, 77, 13, 78, 4, 79, 1, 80, 14,
+ 82, 5, 83, 8, 84, 6, 85, 12, 86, 16, 87, 25, 89, 26, 90, 21,
+ 97, 0, 98, 18, 99, 17, 100, 10, 101, 2, 102, 19, 103, 15,104, 20,
+ 105, 3, 106, 11, 107, 9, 108, 7, 109, 13, 110, 4, 111, 1,112, 14,
+ 114, 5, 115, 8, 116, 6, 117, 12, 118, 16, 119, 25, 121, 26,122, 21,
+ 193, 29, 201, 30, 225, 29, 233, 30, 264, 24, 265, 24, 284, 22,285, 22,
+ 292, 31, 293, 31, 308, 28, 309, 28, 348, 27, 349, 27, 364, 23,365, 23,
+};
+
/* Model Table:
- * Total sequences: 989
- * First 512 sequences: 0.9942980632768038
- * Next 512 sequences (512-1024): 0.0057019367231962385
- * Rest: -5.0306980803327406e-17
+ * Total sequences: 1066
+ * First 512 sequences: 0.995442680189542
+ * Next 512 sequences (512-1024): 0.0044874885692908805
+ * Rest: 6.983124116715766e-05
* Negative sequences: TODO
*/
static const PRUint8 EsperantoLangModel[] =
{
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,0,2,3,3,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,0,0,0,2,2,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,0,0,2,3,3,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,3,2,2,2,3,0,2,2,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,2,3,2,0,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,0,3,3,3,2,2,2,
- 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,2,3,3,3,3,0,0,2,3,2,2,2,3,3,2,0,2,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,2,2,3,2,2,0,3,3,3,2,0,2,
- 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3,0,0,0,3,0,2,0,3,2,3,2,2,0,
- 3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,2,3,3,2,3,3,3,0,0,0,3,2,0,2,3,2,2,0,0,0,
- 3,3,3,3,3,3,2,3,3,2,3,2,3,3,3,3,3,2,2,2,3,3,0,0,2,3,0,3,2,2,2,2,0,0,0,
- 3,3,3,3,3,3,3,3,3,2,3,2,3,3,2,2,0,2,2,2,2,2,2,0,0,0,0,0,0,3,3,2,0,2,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,3,2,3,2,0,0,0,2,0,2,2,
- 3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,2,3,3,3,2,0,0,0,2,3,2,2,0,3,2,2,0,0,0,
- 3,3,3,3,2,3,3,3,3,2,2,2,3,2,3,2,0,2,2,2,2,3,0,0,0,2,2,0,0,3,2,2,0,0,0,
- 3,3,3,3,3,3,2,3,3,2,3,0,3,3,2,2,3,2,2,2,2,3,0,2,2,3,2,2,2,2,2,3,0,2,0,
- 3,3,3,3,2,3,2,2,2,2,2,3,3,2,2,2,0,0,2,0,2,2,0,0,2,2,0,0,0,3,2,2,0,0,0,
- 3,3,3,3,0,3,3,3,3,3,2,0,3,2,2,2,0,3,2,2,3,3,0,0,0,3,0,0,0,2,2,2,2,2,2,
- 3,3,3,3,3,3,3,3,3,2,3,2,3,2,2,2,2,2,3,2,0,2,0,0,0,3,2,0,0,3,3,3,0,0,0,
- 3,3,3,3,0,3,3,3,2,2,2,2,3,3,2,3,2,0,2,3,0,0,0,0,0,2,0,0,0,0,0,2,0,3,0,
- 3,3,3,3,3,2,2,3,3,3,2,2,3,2,2,2,2,3,3,2,2,0,0,0,0,3,2,2,0,2,2,2,2,0,0,
- 3,3,3,3,3,3,3,3,2,2,2,0,3,3,2,0,2,0,2,2,0,2,0,0,0,2,0,2,0,2,2,2,0,2,0,
- 3,3,3,3,0,0,2,3,0,0,2,2,3,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,2,3,3,2,3,3,3,3,3,3,2,2,2,2,3,2,0,2,2,3,2,0,0,2,0,3,0,0,0,0,0,0,0,0,
- 3,3,3,3,0,0,2,2,0,2,3,2,3,3,0,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,2,3,3,2,3,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,0,2,0,2,0,0,0,
- 3,3,3,3,2,2,3,2,0,2,0,2,3,2,2,0,3,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,2,2,2,2,3,2,0,0,2,0,0,0,0,0,0,2,0,2,0,0,0,2,0,3,0,0,2,0,0,0,0,
- 3,3,2,2,2,2,0,2,0,2,0,0,3,0,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,2,0,3,3,3,3,3,2,3,0,0,2,2,2,2,3,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,3,3,2,2,2,2,2,2,0,0,2,2,2,0,2,2,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,
- 2,2,2,0,3,3,3,3,3,2,2,0,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,2,0,0,
- 2,0,0,2,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,
- 2,2,2,3,0,0,2,2,0,0,0,0,2,2,2,2,0,0,0,2,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,
- 3,3,3,2,2,0,2,0,0,0,2,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,0,1,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,0,3,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,3,3,2,3,2,2,2,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,3,2,2,2,2,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,2,2,3,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,2,3,3,2,2,2,2,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,3,2,3,3,0,
+ 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3,0,0,2,2,3,2,0,3,3,0,
+ 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,2,0,0,3,2,2,0,3,2,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,3,0,0,2,3,3,0,0,2,2,0,
+ 3,3,3,3,3,3,3,3,3,3,2,1,3,3,3,2,2,2,2,2,2,2,2,2,2,0,0,0,0,3,2,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,0,3,2,2,3,2,2,0,2,
+ 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,2,2,3,3,2,2,0,0,2,2,3,2,0,3,3,0,
+ 3,3,3,3,2,3,3,3,3,2,2,2,3,2,3,0,0,2,2,2,3,0,0,1,0,0,2,0,0,3,3,0,
+ 3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,2,3,2,3,2,0,2,0,2,3,2,2,2,2,2,
+ 3,3,3,3,2,3,2,3,3,2,2,3,3,2,2,2,2,2,2,2,1,2,0,0,2,0,2,2,0,3,1,0,
+ 3,3,3,3,2,3,3,3,3,3,2,2,3,2,2,2,2,3,2,2,3,3,0,0,2,1,3,0,0,2,2,2,
+ 3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,2,2,2,3,0,2,2,2,0,0,2,3,2,0,2,2,0,
+ 3,3,3,3,2,3,3,3,2,2,2,2,3,3,2,2,2,0,2,3,2,0,0,0,0,0,2,0,0,2,2,0,
+ 3,3,3,3,3,3,3,3,2,2,2,2,3,3,2,2,0,2,2,2,2,0,0,2,0,3,2,0,0,2,2,0,
+ 3,3,3,3,3,1,2,3,3,3,2,2,3,2,2,2,2,2,2,2,2,3,0,0,0,2,3,1,0,2,2,0,
+ 3,3,3,3,0,2,2,3,2,2,2,2,3,2,2,2,1,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,
+ 3,2,3,3,3,3,3,3,3,3,3,2,3,2,2,3,2,0,2,2,0,3,0,0,2,0,0,3,0,0,0,0,
+ 3,3,3,3,0,2,0,2,0,2,3,0,3,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,3,3,2,3,2,3,2,2,0,2,2,2,0,0,2,0,2,2,2,0,0,0,2,2,0,0,0,0,0,
+ 3,3,3,3,3,2,2,2,3,2,2,2,2,2,2,2,2,3,2,2,2,2,0,0,0,2,0,0,0,2,1,0,
+ 3,3,3,3,2,2,3,2,0,2,2,2,3,0,2,0,3,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,
+ 3,3,2,2,2,2,0,2,0,0,0,0,3,2,0,2,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,1,2,3,3,3,3,3,2,2,0,0,2,1,2,2,2,2,0,0,2,0,0,0,0,2,0,0,0,0,0,
+ 2,2,2,2,3,3,3,2,2,2,2,1,0,2,2,2,2,2,2,0,2,2,0,0,0,0,0,0,0,0,2,0,
+ 3,3,3,3,2,0,2,2,0,0,1,0,2,2,0,0,0,0,2,0,0,0,0,1,0,0,0,0,0,0,0,0,
};
@@ -134,9 +145,19 @@ const SequenceModel Iso_8859_3EsperantoModel =
{
Iso_8859_3_CharToOrderMap,
EsperantoLangModel,
- 35,
- (float)0.9942980632768038,
+ 32,
+ (float)0.995442680189542,
PR_FALSE,
"ISO-8859-3",
"eo"
};
+
+const LanguageModel EsperantoModel =
+{
+ "eo",
+ Unicode_CharOrder,
+ 64,
+ EsperantoLangModel,
+ 32,
+ (float)0.995442680189542,
+};
diff --git a/src/LangModels/LangEstonianModel.cpp b/src/LangModels/LangEstonianModel.cpp
index 71d9c66..f1ed29c 100644
--- a/src/LangModels/LangEstonianModel.cpp
+++ b/src/LangModels/LangEstonianModel.cpp
@@ -36,12 +36,13 @@
* ***** END LICENSE BLOCK ***** */
#include "../nsSBCharSetProber.h"
+#include "../nsLanguageDetector.h"
/********* Language model for: Estonian *********/
/**
* Generated by BuildLangModel.py
- * On: 2016-09-26 23:47:54.476870
+ * On: 2021-03-16 19:01:52.571827
**/
/* Character Mapping Table:
@@ -67,39 +68,39 @@ static const unsigned char Iso_8859_4_CharToOrderMap[] =
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 0, 19, 23, 10, 2, 22, 15, 16, 1, 17, 8, 5, 12, 7, 9, /* 4X */
- 14, 28, 11, 3, 4, 6, 13, 27, 26, 25, 30,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 0, 19, 23, 10, 2, 22, 15, 16, 1, 17, 8, 5, 12, 7, 9, /* 6X */
- 14, 28, 11, 3, 4, 6, 13, 27, 26, 25, 30,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ SYM, 0, 20, 23, 10, 2, 22, 15, 16, 1, 17, 8, 5, 12, 7, 9, /* 4X */
+ 14, 29, 11, 3, 4, 6, 13, 27, 26, 25, 28,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 0, 20, 23, 10, 2, 22, 15, 16, 1, 17, 8, 5, 12, 7, 9, /* 6X */
+ 14, 29, 11, 3, 4, 6, 13, 27, 26, 25, 28,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
- SYM, 55, 56, 57,SYM, 58, 59,SYM,SYM, 29, 45, 60, 61,SYM, 32,SYM, /* AX */
- SYM, 62,SYM, 63,SYM, 64, 65,SYM,SYM, 29, 45, 66, 67, 68, 32, 69, /* BX */
- 37, 43, 70, 71, 18, 44, 47, 72, 73, 33, 74, 75, 76, 36, 77, 39, /* CX */
- 78, 79, 31, 80, 81, 20, 24,SYM, 38, 82, 52, 83, 21, 84, 34, 85, /* DX */
- 37, 43, 86, 87, 18, 44, 47, 88, 89, 33, 90, 91, 92, 36, 93, 39, /* EX */
- 94, 95, 31, 96, 97, 20, 24,SYM, 38, 98, 52, 99, 21,100, 34,SYM, /* FX */
+ SYM, 55, 56, 57,SYM, 58, 59,SYM,SYM, 30, 47, 60, 61,SYM, 33,SYM, /* AX */
+ SYM, 62,SYM, 63,SYM, 64, 65,SYM,SYM, 30, 47, 66, 67, 68, 33, 69, /* BX */
+ 37, 44, 70, 71, 18, 43, 45, 72, 73, 31, 74, 75, 76, 36, 77, 41, /* CX */
+ 78, 79, 32, 80, 81, 19, 24,SYM, 39, 82, 53, 83, 21, 84, 34, 85, /* DX */
+ 37, 44, 86, 87, 18, 43, 45, 88, 89, 31, 90, 91, 92, 36, 93, 41, /* EX */
+ 94, 95, 32, 96, 97, 19, 24,SYM, 39, 98, 53, 99, 21,100, 34,SYM, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
-static const unsigned char Windows_1252_CharToOrderMap[] =
+static const unsigned char Iso_8859_13_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 0, 19, 23, 10, 2, 22, 15, 16, 1, 17, 8, 5, 12, 7, 9, /* 4X */
- 14, 28, 11, 3, 4, 6, 13, 27, 26, 25, 30,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 0, 19, 23, 10, 2, 22, 15, 16, 1, 17, 8, 5, 12, 7, 9, /* 6X */
- 14, 28, 11, 3, 4, 6, 13, 27, 26, 25, 30,SYM,SYM,SYM,SYM,CTR, /* 7X */
- SYM,ILL,SYM,101,SYM,SYM,SYM,SYM,SYM,SYM, 29,SYM,102,ILL, 32,ILL, /* 8X */
- ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 29,SYM,103,ILL, 32,104, /* 9X */
- SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
- SYM,SYM,SYM,SYM,SYM, 50,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
- 40, 43,105,106, 18, 44, 47, 48, 41, 33,107,108, 35, 36,109,110, /* CX */
- 46,111, 53, 42,112, 20, 24,SYM, 38, 54, 52,113, 21,114,115,116, /* DX */
- 40, 43,117,118, 18, 44, 47, 48, 41, 33,119,120, 35, 36,121,122, /* EX */
- 46,123, 53, 42,124, 20, 24,SYM, 38, 54, 52,125, 21,126,127,128, /* FX */
+ SYM, 0, 20, 23, 10, 2, 22, 15, 16, 1, 17, 8, 5, 12, 7, 9, /* 4X */
+ 14, 29, 11, 3, 4, 6, 13, 27, 26, 25, 28,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 0, 20, 23, 10, 2, 22, 15, 16, 1, 17, 8, 5, 12, 7, 9, /* 6X */
+ 14, 29, 11, 3, 4, 6, 13, 27, 26, 25, 28,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
+ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
+ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 39,SYM,101,SYM,SYM,SYM,SYM, 45, /* AX */
+ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 39,SYM,102,SYM,SYM,SYM,SYM, 45, /* BX */
+ 103,104, 37,105, 18, 43,106, 47,107, 31,108,109,110,111, 41,112, /* CX */
+ 30,113,114, 42, 32, 19, 24,SYM,115, 54,116, 34, 21, 51, 33,117, /* DX */
+ 118,119, 37,120, 18, 43,121, 47,122, 31,123,124,125,126, 41,127, /* EX */
+ 30,128,129, 42, 32, 19, 24,SYM,130, 54,131, 34, 21, 51, 33,SYM, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
@@ -109,39 +110,39 @@ static const unsigned char Iso_8859_15_CharToOrderMap[] =
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 0, 19, 23, 10, 2, 22, 15, 16, 1, 17, 8, 5, 12, 7, 9, /* 4X */
- 14, 28, 11, 3, 4, 6, 13, 27, 26, 25, 30,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 0, 19, 23, 10, 2, 22, 15, 16, 1, 17, 8, 5, 12, 7, 9, /* 6X */
- 14, 28, 11, 3, 4, 6, 13, 27, 26, 25, 30,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ SYM, 0, 20, 23, 10, 2, 22, 15, 16, 1, 17, 8, 5, 12, 7, 9, /* 4X */
+ 14, 29, 11, 3, 4, 6, 13, 27, 26, 25, 28,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 0, 20, 23, 10, 2, 22, 15, 16, 1, 17, 8, 5, 12, 7, 9, /* 6X */
+ 14, 29, 11, 3, 4, 6, 13, 27, 26, 25, 28,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
- SYM,SYM,SYM,SYM,SYM,SYM, 29,SYM, 29,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
- SYM,SYM,SYM,SYM, 32, 50,SYM,SYM, 32,SYM,SYM,SYM,129,130,131,SYM, /* BX */
- 40, 43,132,133, 18, 44, 47, 48, 41, 33,134,135, 35, 36,136,137, /* CX */
- 46,138, 53, 42,139, 20, 24,SYM, 38, 54, 52,140, 21,141,142,143, /* DX */
- 40, 43,144,145, 18, 44, 47, 48, 41, 33,146,147, 35, 36,148,149, /* EX */
- 46,150, 53, 42,151, 20, 24,SYM, 38, 54, 52,152, 21,153,154,155, /* FX */
+ SYM,SYM,SYM,SYM,SYM,SYM, 30,SYM, 30,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
+ SYM,SYM,SYM,SYM, 33, 52,SYM,SYM, 33,SYM,SYM,SYM,132,133,134,SYM, /* BX */
+ 38, 44,135,136, 18, 43, 45, 50, 40, 31,137,138, 35, 36,139,140, /* CX */
+ 46,141, 49, 42,142, 19, 24,SYM, 39, 48, 53,143, 21,144,145,146, /* DX */
+ 38, 44,147,148, 18, 43, 45, 50, 40, 31,149,150, 35, 36,151,152, /* EX */
+ 46,153, 49, 42,154, 19, 24,SYM, 39, 48, 53,155, 21,156,157,158, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
-static const unsigned char Iso_8859_13_CharToOrderMap[] =
+static const unsigned char Windows_1252_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 0, 19, 23, 10, 2, 22, 15, 16, 1, 17, 8, 5, 12, 7, 9, /* 4X */
- 14, 28, 11, 3, 4, 6, 13, 27, 26, 25, 30,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 0, 19, 23, 10, 2, 22, 15, 16, 1, 17, 8, 5, 12, 7, 9, /* 6X */
- 14, 28, 11, 3, 4, 6, 13, 27, 26, 25, 30,SYM,SYM,SYM,SYM,CTR, /* 7X */
- CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
- CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
- SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 38,SYM,156,SYM,SYM,SYM,SYM, 47, /* AX */
- SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 38,SYM,157,SYM,SYM,SYM,SYM, 47, /* BX */
- 158,159, 37,160, 18, 44,161, 45,162, 33,163,164,165,166, 39,167, /* CX */
- 29,168,169, 42, 31, 20, 24,SYM,170, 51,171, 34, 21, 49, 32,172, /* DX */
- 173,174, 37,175, 18, 44,176, 45,177, 33,178,179,180,181, 39,182, /* EX */
- 29,183,184, 42, 31, 20, 24,SYM,185, 51,186, 34, 21, 49, 32,SYM, /* FX */
+ SYM, 0, 20, 23, 10, 2, 22, 15, 16, 1, 17, 8, 5, 12, 7, 9, /* 4X */
+ 14, 29, 11, 3, 4, 6, 13, 27, 26, 25, 28,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 0, 20, 23, 10, 2, 22, 15, 16, 1, 17, 8, 5, 12, 7, 9, /* 6X */
+ 14, 29, 11, 3, 4, 6, 13, 27, 26, 25, 28,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ SYM,ILL,SYM,159,SYM,SYM,SYM,SYM,SYM,SYM, 30,SYM,160,ILL, 33,ILL, /* 8X */
+ ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 30,SYM,161,ILL, 33,162, /* 9X */
+ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
+ SYM,SYM,SYM,SYM,SYM, 52,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
+ 38, 44,163,164, 18, 43, 45, 50, 40, 31,165,166, 35, 36,167,168, /* CX */
+ 46,169, 49, 42,170, 19, 24,SYM, 39, 48, 53,171, 21,172,173,174, /* DX */
+ 38, 44,175,176, 18, 43, 45, 50, 40, 31,177,178, 35, 36,179,180, /* EX */
+ 46,181, 49, 42,182, 19, 24,SYM, 39, 48, 53,183, 21,184,185,186, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
@@ -151,64 +152,79 @@ static const unsigned char Windows_1257_CharToOrderMap[] =
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 0, 19, 23, 10, 2, 22, 15, 16, 1, 17, 8, 5, 12, 7, 9, /* 4X */
- 14, 28, 11, 3, 4, 6, 13, 27, 26, 25, 30,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 0, 19, 23, 10, 2, 22, 15, 16, 1, 17, 8, 5, 12, 7, 9, /* 6X */
- 14, 28, 11, 3, 4, 6, 13, 27, 26, 25, 30,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ SYM, 0, 20, 23, 10, 2, 22, 15, 16, 1, 17, 8, 5, 12, 7, 9, /* 4X */
+ 14, 29, 11, 3, 4, 6, 13, 27, 26, 25, 28,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 0, 20, 23, 10, 2, 22, 15, 16, 1, 17, 8, 5, 12, 7, 9, /* 6X */
+ 14, 29, 11, 3, 4, 6, 13, 27, 26, 25, 28,SYM,SYM,SYM,SYM,CTR, /* 7X */
SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM,ILL,SYM,ILL,SYM,SYM,SYM, /* 8X */
ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,ILL,SYM,ILL,SYM,SYM,ILL, /* 9X */
- SYM,ILL,SYM,SYM,SYM,ILL,SYM,SYM, 38,SYM,187,SYM,SYM,SYM,SYM, 47, /* AX */
- SYM,SYM,SYM,SYM,SYM, 50,SYM,SYM, 38,SYM,188,SYM,SYM,SYM,SYM, 47, /* BX */
- 189,190, 37,191, 18, 44,192, 45,193, 33,194,195,196,197, 39,198, /* CX */
- 29,199,200, 42, 31, 20, 24,SYM,201, 51,202, 34, 21, 49, 32,203, /* DX */
- 204,205, 37,206, 18, 44,207, 45,208, 33,209,210,211,212, 39,213, /* EX */
- 29,214,215, 42, 31, 20, 24,SYM,216, 51,217, 34, 21, 49, 32,SYM, /* FX */
+ SYM,ILL,SYM,SYM,SYM,ILL,SYM,SYM, 39,SYM,187,SYM,SYM,SYM,SYM, 45, /* AX */
+ SYM,SYM,SYM,SYM,SYM, 52,SYM,SYM, 39,SYM,188,SYM,SYM,SYM,SYM, 45, /* BX */
+ 189,190, 37,191, 18, 43,192, 47,193, 31,194,195,196,197, 41,198, /* CX */
+ 30,199,200, 42, 32, 19, 24,SYM,201, 54,202, 34, 21, 51, 33,203, /* DX */
+ 204,205, 37,206, 18, 43,207, 47,208, 31,209,210,211,212, 41,213, /* EX */
+ 30,214,215, 42, 32, 19, 24,SYM,216, 54,217, 34, 21, 51, 33,SYM, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
+static const int Unicode_Char_size = 68;
+static const unsigned int Unicode_CharOrder[] =
+{
+ 65, 0, 66, 20, 67, 23, 68, 10, 69, 2, 70, 22, 71, 15, 72, 16,
+ 73, 1, 74, 17, 75, 8, 76, 5, 77, 12, 78, 7, 79, 9, 80, 14,
+ 81, 29, 82, 11, 83, 3, 84, 4, 85, 6, 86, 13, 87, 27, 88, 26,
+ 89, 25, 90, 28, 97, 0, 98, 20, 99, 23, 100, 10, 101, 2,102, 22,
+ 103, 15, 104, 16, 105, 1, 106, 17, 107, 8, 108, 5, 109, 12,110, 7,
+ 111, 9, 112, 14, 113, 29, 114, 11, 115, 3, 116, 4, 117, 6,118, 13,
+ 119, 27, 120, 26, 121, 25, 122, 28, 196, 18, 201, 31, 213, 19,214, 24,
+ 220, 21, 228, 18, 233, 31, 245, 19, 246, 24, 252, 21, 332, 32,333, 32,
+ 352, 30, 353, 30, 381, 33, 382, 33,
+};
+
/* Model Table:
- * Total sequences: 853
- * First 512 sequences: 0.9972721312183132
- * Next 512 sequences (512-1024): 0.0027278687816868537
- * Rest: -5.204170427930421e-18
+ * Total sequences: 869
+ * First 512 sequences: 0.9973685549586747
+ * Next 512 sequences (512-1024): 0.002631445041325318
+ * Rest: -3.122502256758253e-17
* Negative sequences: TODO
*/
static const PRUint8 EstonianLangModel[] =
{
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,3,3,0,3,3,3,3,2,2,0,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,2,3,3,2,2,3,3,2,2,2,0,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,3,3,0,3,3,3,2,0,2,0,2,
- 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,2,2,0,2,2,0,
- 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,0,3,3,2,3,3,3,2,2,0,3,2,2,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,0,0,0,2,2,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,2,3,3,0,0,2,2,2,2,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,3,3,2,3,3,3,3,2,3,3,0,2,2,2,0,2,
- 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,0,3,3,2,2,3,3,0,2,0,0,0,2,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,0,2,3,3,0,3,3,3,2,2,2,0,0,
- 3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,2,2,3,0,2,0,3,0,0,0,2,2,2,0,0,0,3,3,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,0,2,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,3,2,3,3,3,3,3,2,3,3,0,2,0,2,2,0,0,
- 3,3,3,3,2,3,3,3,3,3,2,2,2,2,2,2,2,2,3,0,3,2,0,2,3,2,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,3,2,3,0,3,3,0,2,3,3,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,0,2,2,2,2,2,0,3,2,0,2,0,2,0,0,
- 3,3,3,3,3,3,3,3,3,3,0,3,3,3,0,0,3,3,3,0,3,3,3,2,0,3,0,2,0,0,0,2,0,
- 3,3,3,2,3,0,3,3,0,3,0,2,3,0,3,0,0,0,3,0,3,3,0,0,2,0,0,0,0,0,0,0,0,
- 2,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,3,0,0,0,0,0,0,2,0,0,0,0,0,0,
- 3,3,3,3,2,3,3,3,2,3,0,3,2,0,0,0,2,3,0,2,0,2,0,2,0,2,2,0,0,0,0,0,0,
- 0,3,3,3,3,3,3,3,2,0,3,3,3,3,3,3,3,3,0,3,3,0,0,0,0,0,0,0,0,0,2,0,0,
- 3,0,2,3,3,3,0,3,3,3,3,3,3,3,3,3,3,0,0,3,0,3,2,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,2,3,3,3,2,0,3,2,3,0,0,0,2,0,2,2,0,0,3,3,3,2,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,2,3,0,0,2,0,0,2,3,0,3,0,0,2,0,0,0,0,
- 2,3,3,3,3,3,0,3,3,2,3,3,2,3,3,3,2,2,0,3,0,0,0,0,3,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,2,3,2,3,2,0,3,3,0,0,0,0,0,0,0,3,2,0,2,0,0,0,2,3,0,
- 3,3,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,0,0,0,0,0,2,0,2,0,0,0,0,0,0,0,
- 3,3,3,2,2,2,2,2,2,3,0,2,0,0,0,2,2,0,0,0,0,0,2,0,0,2,0,2,0,0,0,0,0,
- 3,3,2,0,0,0,3,0,0,2,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,
- 2,3,3,0,0,2,3,2,2,2,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,
- 2,3,2,2,0,2,2,2,2,3,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,
- 0,0,0,2,2,2,2,2,2,0,0,0,2,0,0,2,2,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 2,3,0,0,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,0,3,3,3,3,3,2,0,0,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,2,3,3,2,2,3,3,3,2,2,2,0,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,0,2,3,3,2,2,0,0,0,2,
+ 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,2,2,2,0,0,2,0,
+ 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,0,3,2,3,3,3,2,2,2,0,3,0,2,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,2,0,2,2,0,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,3,2,2,3,0,0,2,2,0,2,2,2,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,2,3,3,3,2,2,3,0,3,2,2,3,0,2,
+ 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,0,3,2,2,3,3,0,2,0,0,0,0,2,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,3,2,3,3,0,3,3,3,2,2,2,0,0,0,
+ 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,2,3,0,2,0,3,0,0,0,2,2,2,0,0,0,0,2,3,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,2,2,2,2,0,0,
+ 3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,2,3,3,3,3,3,2,3,3,0,2,2,0,2,2,0,0,
+ 3,3,3,3,2,3,3,3,3,3,2,2,2,2,2,2,2,2,3,3,0,2,0,2,3,2,0,0,0,0,0,0,0,0,
+ 3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,3,2,3,3,0,3,0,2,3,2,0,0,0,0,0,0,0,0,
+ 3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,0,2,2,2,2,2,0,2,2,0,2,2,0,0,0,0,
+ 3,3,3,3,3,3,3,3,3,3,0,3,3,3,0,0,3,3,3,3,0,3,3,2,0,3,0,2,0,0,0,2,2,0,
+ 3,3,3,2,3,0,3,3,2,3,2,0,3,0,2,0,0,0,3,3,0,3,0,0,2,0,0,0,0,0,0,0,0,0,
+ 2,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,0,3,0,0,0,0,0,2,0,0,0,0,0,0,0,
+ 0,3,3,3,3,3,3,3,2,0,3,3,3,3,3,3,3,3,0,3,3,0,0,0,0,0,0,0,2,0,0,0,0,0,
+ 3,3,3,3,2,3,3,3,3,3,0,3,2,0,0,0,2,3,0,2,2,2,0,2,0,2,2,0,0,0,0,0,0,0,
+ 3,0,2,3,3,3,0,3,3,3,3,3,3,3,3,3,3,0,0,0,3,3,2,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,2,3,3,3,2,0,3,2,3,0,0,0,2,0,2,2,0,0,3,3,3,2,0,0,0,0,0,0,0,0,0,
+ 3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,2,3,0,0,0,2,0,2,3,0,3,0,0,2,2,0,0,0,0,
+ 2,2,3,3,3,3,0,3,3,2,3,3,3,3,3,3,2,2,0,0,3,0,0,0,3,0,0,0,0,0,0,0,0,0,
+ 3,3,3,3,2,3,3,3,2,3,2,3,2,0,3,2,0,0,0,0,2,0,0,3,2,0,2,0,2,0,0,0,2,0,
+ 2,3,2,2,2,0,2,2,2,2,0,0,0,0,0,0,0,2,0,0,0,0,0,2,0,2,0,0,0,0,0,0,0,0,
+ 3,3,3,2,2,2,2,2,2,3,0,2,0,0,0,2,3,0,0,0,0,0,2,0,0,2,0,2,0,0,0,0,0,0,
+ 2,3,3,2,0,2,2,2,2,3,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,2,2,0,0,0,0,0,
+ 3,3,2,0,0,0,3,0,0,2,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,
+ 2,3,3,0,0,2,3,2,2,2,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,
+ 0,0,0,2,0,2,0,2,2,2,2,2,2,0,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,2,2,2,2,2,2,0,0,0,2,0,0,2,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 2,3,2,0,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
};
@@ -216,21 +232,21 @@ const SequenceModel Iso_8859_4EstonianModel =
{
Iso_8859_4_CharToOrderMap,
EstonianLangModel,
- 33,
- (float)0.9972721312183132,
+ 34,
+ (float)0.9973685549586747,
PR_TRUE,
"ISO-8859-4",
"et"
};
-const SequenceModel Windows_1252EstonianModel =
+const SequenceModel Iso_8859_13EstonianModel =
{
- Windows_1252_CharToOrderMap,
+ Iso_8859_13_CharToOrderMap,
EstonianLangModel,
- 33,
- (float)0.9972721312183132,
+ 34,
+ (float)0.9973685549586747,
PR_TRUE,
- "WINDOWS-1252",
+ "ISO-8859-13",
"et"
};
@@ -238,21 +254,21 @@ const SequenceModel Iso_8859_15EstonianModel =
{
Iso_8859_15_CharToOrderMap,
EstonianLangModel,
- 33,
- (float)0.9972721312183132,
+ 34,
+ (float)0.9973685549586747,
PR_TRUE,
"ISO-8859-15",
"et"
};
-const SequenceModel Iso_8859_13EstonianModel =
+const SequenceModel Windows_1252EstonianModel =
{
- Iso_8859_13_CharToOrderMap,
+ Windows_1252_CharToOrderMap,
EstonianLangModel,
- 33,
- (float)0.9972721312183132,
+ 34,
+ (float)0.9973685549586747,
PR_TRUE,
- "ISO-8859-13",
+ "WINDOWS-1252",
"et"
};
@@ -260,9 +276,19 @@ const SequenceModel Windows_1257EstonianModel =
{
Windows_1257_CharToOrderMap,
EstonianLangModel,
- 33,
- (float)0.9972721312183132,
+ 34,
+ (float)0.9973685549586747,
PR_TRUE,
"WINDOWS-1257",
"et"
};
+
+const LanguageModel EstonianModel =
+{
+ "et",
+ Unicode_CharOrder,
+ 68,
+ EstonianLangModel,
+ 34,
+ (float)0.9973685549586747,
+};
diff --git a/src/LangModels/LangFinnishModel.cpp b/src/LangModels/LangFinnishModel.cpp
index cbc9528..23f7c58 100644
--- a/src/LangModels/LangFinnishModel.cpp
+++ b/src/LangModels/LangFinnishModel.cpp
@@ -36,12 +36,13 @@
* ***** END LICENSE BLOCK ***** */
#include "../nsSBCharSetProber.h"
+#include "../nsLanguageDetector.h"
/********* Language model for: Finnish *********/
/**
* Generated by BuildLangModel.py
- * On: 2016-09-21 18:15:05.189948
+ * On: 2021-03-16 19:06:31.129345
**/
/* Character Mapping Table:
@@ -61,66 +62,66 @@
* ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1
* even though they are both used for French. Same for the euro sign.
*/
-static const unsigned char Iso_8859_15_CharToOrderMap[] =
+static const unsigned char Iso_8859_1_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 0, 19, 21, 18, 4, 23, 20, 14, 1, 15, 9, 6, 12, 2, 7, /* 4X */
+ SYM, 0, 19, 21, 18, 4, 23, 20, 15, 1, 14, 9, 6, 11, 2, 7, /* 4X */
16, 29, 10, 5, 3, 8, 13, 24, 26, 17, 25,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 0, 19, 21, 18, 4, 23, 20, 14, 1, 15, 9, 6, 12, 2, 7, /* 6X */
+ SYM, 0, 19, 21, 18, 4, 23, 20, 15, 1, 14, 9, 6, 11, 2, 7, /* 6X */
16, 29, 10, 5, 3, 8, 13, 24, 26, 17, 25,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
- SYM,SYM,SYM,SYM,SYM,SYM, 27,SYM, 27,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
- SYM,SYM,SYM,SYM, 28, 61,SYM,SYM, 28,SYM,SYM,SYM, 62, 63, 64,SYM, /* BX */
- 49, 35, 65, 46, 11, 56, 39, 37, 40, 30, 51, 31, 66, 36, 67, 57, /* CX */
- 68, 58, 52, 33, 34, 59, 22,SYM, 69, 70, 38, 71, 32, 72, 73, 55, /* DX */
- 49, 35, 74, 46, 11, 56, 39, 37, 40, 30, 51, 31, 75, 36, 76, 57, /* EX */
- 77, 58, 52, 33, 34, 59, 22,SYM, 78, 79, 38, 80, 32, 81, 82, 83, /* FX */
+ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
+ SYM,SYM,SYM,SYM,SYM, 65,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
+ 44, 36, 62, 47, 12, 54, 37, 39, 38, 30, 52, 31, 60, 32, 66, 59, /* CX */
+ 67, 58, 50, 33, 35, 53, 22,SYM, 68, 69, 41, 70, 34, 71, 72, 56, /* DX */
+ 44, 36, 62, 47, 12, 54, 37, 39, 38, 30, 52, 31, 60, 32, 73, 59, /* EX */
+ 74, 58, 50, 33, 35, 53, 22,SYM, 75, 76, 41, 77, 34, 78, 79, 80, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
-static const unsigned char Windows_1252_CharToOrderMap[] =
+static const unsigned char Iso_8859_4_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 0, 19, 21, 18, 4, 23, 20, 14, 1, 15, 9, 6, 12, 2, 7, /* 4X */
+ SYM, 0, 19, 21, 18, 4, 23, 20, 15, 1, 14, 9, 6, 11, 2, 7, /* 4X */
16, 29, 10, 5, 3, 8, 13, 24, 26, 17, 25,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 0, 19, 21, 18, 4, 23, 20, 14, 1, 15, 9, 6, 12, 2, 7, /* 6X */
+ SYM, 0, 19, 21, 18, 4, 23, 20, 15, 1, 14, 9, 6, 11, 2, 7, /* 6X */
16, 29, 10, 5, 3, 8, 13, 24, 26, 17, 25,SYM,SYM,SYM,SYM,CTR, /* 7X */
- SYM,ILL,SYM, 84,SYM,SYM,SYM,SYM,SYM,SYM, 27,SYM, 85,ILL, 28,ILL, /* 8X */
- ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 27,SYM, 86,ILL, 28, 87, /* 9X */
- SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
- SYM,SYM,SYM,SYM,SYM, 88,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
- 49, 35, 89, 46, 11, 56, 39, 37, 40, 30, 51, 31, 90, 36, 91, 57, /* CX */
- 92, 58, 52, 33, 34, 59, 22,SYM, 93, 94, 38, 95, 32, 96, 97, 55, /* DX */
- 49, 35, 98, 46, 11, 56, 39, 37, 40, 30, 51, 31, 99, 36,100, 57, /* EX */
- 101, 58, 52, 33, 34, 59, 22,SYM,102,103, 38,104, 32,105,106,107, /* FX */
+ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
+ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
+ SYM, 81, 82, 48,SYM, 83, 84,SYM,SYM, 28, 85, 86, 87,SYM, 27,SYM, /* AX */
+ SYM, 88,SYM, 48,SYM, 89, 90,SYM,SYM, 28, 91, 92, 93, 42, 27, 42, /* BX */
+ 63, 36, 62, 47, 12, 54, 37, 94, 46, 30, 95, 31, 96, 32, 97, 98, /* CX */
+ 99, 64,100,101, 35, 53, 22,SYM,102,103, 41,104, 34,105, 55, 56, /* DX */
+ 63, 36, 62, 47, 12, 54, 37,106, 46, 30,107, 31,108, 32,109,110, /* EX */
+ 111, 64,112,113, 35, 53, 22,SYM,114,115, 41,116, 34,117, 55,SYM, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
-static const unsigned char Iso_8859_4_CharToOrderMap[] =
+static const unsigned char Iso_8859_9_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 0, 19, 21, 18, 4, 23, 20, 14, 1, 15, 9, 6, 12, 2, 7, /* 4X */
+ SYM, 0, 19, 21, 18, 4, 23, 20, 15, 1, 14, 9, 6, 11, 2, 7, /* 4X */
16, 29, 10, 5, 3, 8, 13, 24, 26, 17, 25,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 0, 19, 21, 18, 4, 23, 20, 14, 1, 15, 9, 6, 12, 2, 7, /* 6X */
+ SYM, 0, 19, 21, 18, 4, 23, 20, 15, 1, 14, 9, 6, 11, 2, 7, /* 6X */
16, 29, 10, 5, 3, 8, 13, 24, 26, 17, 25,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
- SYM,108,109, 47,SYM,110,111,SYM,SYM, 27,112,113,114,SYM, 28,SYM, /* AX */
- SYM,115,SYM, 47,SYM,116,117,SYM,SYM, 27,118,119,120, 45, 28, 45, /* BX */
- 53, 35,121, 46, 11, 56, 39,122, 43, 30,123, 31,124, 36,125,126, /* CX */
- 127, 54,128,129, 34, 59, 22,SYM,130,131, 38,132, 32,133,134, 55, /* DX */
- 53, 35,135, 46, 11, 56, 39,136, 43, 30,137, 31,138, 36,139,140, /* EX */
- 141, 54,142,143, 34, 59, 22,SYM,144,145, 38,146, 32,147,148,SYM, /* FX */
+ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
+ SYM,SYM,SYM,SYM,SYM,118,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
+ 44, 36, 62, 47, 12, 54, 37, 39, 38, 30, 52, 31, 60, 32,119, 59, /* CX */
+ 51, 58, 50, 33, 35, 53, 22,SYM,120,121, 41,122, 34, 49, 43, 56, /* DX */
+ 44, 36, 62, 47, 12, 54, 37, 39, 38, 30, 52, 31, 60, 32,123, 59, /* EX */
+ 51, 58, 50, 33, 35, 53, 22,SYM,124,125, 41,126, 34, 45, 43,127, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
@@ -130,136 +131,149 @@ static const unsigned char Iso_8859_13_CharToOrderMap[] =
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 0, 19, 21, 18, 4, 23, 20, 14, 1, 15, 9, 6, 12, 2, 7, /* 4X */
+ SYM, 0, 19, 21, 18, 4, 23, 20, 15, 1, 14, 9, 6, 11, 2, 7, /* 4X */
16, 29, 10, 5, 3, 8, 13, 24, 26, 17, 25,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 0, 19, 21, 18, 4, 23, 20, 14, 1, 15, 9, 6, 12, 2, 7, /* 6X */
+ SYM, 0, 19, 21, 18, 4, 23, 20, 15, 1, 14, 9, 6, 11, 2, 7, /* 6X */
16, 29, 10, 5, 3, 8, 13, 24, 26, 17, 25,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
- SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,149,SYM, 47,SYM,SYM,SYM,SYM, 39, /* AX */
- SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,150,SYM, 47,SYM,SYM,SYM,SYM, 39, /* BX */
- 151,152, 53, 41, 11, 56,153,154, 43, 30,155,156,157,158,159,160, /* CX */
- 27,161, 54, 33,162, 59, 22,SYM,163,164,165,166, 32, 60, 28, 55, /* DX */
- 167,168, 53, 41, 11, 56,169,170, 43, 30,171,172,173,174,175,176, /* EX */
- 27,177, 54, 33,178, 59, 22,SYM,179,180,181,182, 32, 60, 28,SYM, /* FX */
+ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,128,SYM, 48,SYM,SYM,SYM,SYM, 37, /* AX */
+ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,129,SYM, 48,SYM,SYM,SYM,SYM, 37, /* BX */
+ 130,131, 63, 40, 12, 54,132,133, 46, 30, 61,134,135,136,137,138, /* CX */
+ 28,139, 64, 33,140, 53, 22,SYM,141,142,143, 55, 34, 57, 27, 56, /* DX */
+ 144,145, 63, 40, 12, 54,146,147, 46, 30, 61,148,149,150,151,152, /* EX */
+ 28,153, 64, 33,154, 53, 22,SYM,155,156,157, 55, 34, 57, 27,SYM, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
-static const unsigned char Iso_8859_9_CharToOrderMap[] =
+static const unsigned char Iso_8859_15_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 0, 19, 21, 18, 4, 23, 20, 14, 1, 15, 9, 6, 12, 2, 7, /* 4X */
+ SYM, 0, 19, 21, 18, 4, 23, 20, 15, 1, 14, 9, 6, 11, 2, 7, /* 4X */
16, 29, 10, 5, 3, 8, 13, 24, 26, 17, 25,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 0, 19, 21, 18, 4, 23, 20, 14, 1, 15, 9, 6, 12, 2, 7, /* 6X */
+ SYM, 0, 19, 21, 18, 4, 23, 20, 15, 1, 14, 9, 6, 11, 2, 7, /* 6X */
16, 29, 10, 5, 3, 8, 13, 24, 26, 17, 25,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
- SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
- SYM,SYM,SYM,SYM,SYM,183,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
- 49, 35,184, 46, 11, 56, 39, 37, 40, 30, 51, 31,185, 36,186, 57, /* CX */
- 50, 58, 52, 33, 34, 59, 22,SYM,187,188, 38,189, 32, 48, 42, 55, /* DX */
- 49, 35,190, 46, 11, 56, 39, 37, 40, 30, 51, 31,191, 36,192, 57, /* EX */
- 50, 58, 52, 33, 34, 59, 22,SYM,193,194, 38,195, 32, 44, 42,196, /* FX */
+ SYM,SYM,SYM,SYM,SYM,SYM, 28,SYM, 28,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
+ SYM,SYM,SYM,SYM, 27,158,SYM,SYM, 27,SYM,SYM,SYM,159,160,161,SYM, /* BX */
+ 44, 36, 62, 47, 12, 54, 37, 39, 38, 30, 52, 31, 60, 32,162, 59, /* CX */
+ 163, 58, 50, 33, 35, 53, 22,SYM,164,165, 41,166, 34,167,168, 56, /* DX */
+ 44, 36, 62, 47, 12, 54, 37, 39, 38, 30, 52, 31, 60, 32,169, 59, /* EX */
+ 170, 58, 50, 33, 35, 53, 22,SYM,171,172, 41,173, 34,174,175,176, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
-static const unsigned char Iso_8859_1_CharToOrderMap[] =
+static const unsigned char Windows_1252_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 0, 19, 21, 18, 4, 23, 20, 14, 1, 15, 9, 6, 12, 2, 7, /* 4X */
+ SYM, 0, 19, 21, 18, 4, 23, 20, 15, 1, 14, 9, 6, 11, 2, 7, /* 4X */
16, 29, 10, 5, 3, 8, 13, 24, 26, 17, 25,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 0, 19, 21, 18, 4, 23, 20, 14, 1, 15, 9, 6, 12, 2, 7, /* 6X */
+ SYM, 0, 19, 21, 18, 4, 23, 20, 15, 1, 14, 9, 6, 11, 2, 7, /* 6X */
16, 29, 10, 5, 3, 8, 13, 24, 26, 17, 25,SYM,SYM,SYM,SYM,CTR, /* 7X */
- CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
- CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
+ SYM,ILL,SYM,177,SYM,SYM,SYM,SYM,SYM,SYM, 28,SYM,178,ILL, 27,ILL, /* 8X */
+ ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 28,SYM,179,ILL, 27,180, /* 9X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
- SYM,SYM,SYM,SYM,SYM,197,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
- 49, 35,198, 46, 11, 56, 39, 37, 40, 30, 51, 31,199, 36,200, 57, /* CX */
- 201, 58, 52, 33, 34, 59, 22,SYM,202,203, 38,204, 32,205,206, 55, /* DX */
- 49, 35,207, 46, 11, 56, 39, 37, 40, 30, 51, 31,208, 36,209, 57, /* EX */
- 210, 58, 52, 33, 34, 59, 22,SYM,211,212, 38,213, 32,214,215,216, /* FX */
+ SYM,SYM,SYM,SYM,SYM,181,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
+ 44, 36, 62, 47, 12, 54, 37, 39, 38, 30, 52, 31, 60, 32,182, 59, /* CX */
+ 183, 58, 50, 33, 35, 53, 22,SYM,184,185, 41,186, 34,187,188, 56, /* DX */
+ 44, 36, 62, 47, 12, 54, 37, 39, 38, 30, 52, 31, 60, 32,189, 59, /* EX */
+ 190, 58, 50, 33, 35, 53, 22,SYM,191,192, 41,193, 34,194,195,196, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
+static const int Unicode_Char_size = 60;
+static const unsigned int Unicode_CharOrder[] =
+{
+ 65, 0, 66, 19, 67, 21, 68, 18, 69, 4, 70, 23, 71, 20, 72, 15,
+ 73, 1, 74, 14, 75, 9, 76, 6, 77, 11, 78, 2, 79, 7, 80, 16,
+ 81, 29, 82, 10, 83, 5, 84, 3, 85, 8, 86, 13, 87, 24, 88, 26,
+ 89, 17, 90, 25, 97, 0, 98, 19, 99, 21, 100, 18, 101, 4,102, 23,
+ 103, 20, 104, 15, 105, 1, 106, 14, 107, 9, 108, 6, 109, 11,110, 2,
+ 111, 7, 112, 16, 113, 29, 114, 10, 115, 5, 116, 3, 117, 8,118, 13,
+ 119, 24, 120, 26, 121, 17, 122, 25, 196, 12, 214, 22, 228, 12,246, 22,
+ 352, 28, 353, 28, 381, 27, 382, 27,
+};
+
/* Model Table:
- * Total sequences: 919
- * First 512 sequences: 0.9985378147555799
- * Next 512 sequences (512-1024): 0.0014621852444200612
- * Rest: 3.881443777498106e-17
+ * Total sequences: 940
+ * First 512 sequences: 0.9985812031154878
+ * Next 512 sequences (512-1024): 0.0014187968845121583
+ * Rest: 2.7321894746634712e-17
* Negative sequences: TODO
*/
static const PRUint8 FinnishLangModel[] =
{
- 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,2,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,0,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,2,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,2,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,0,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,2,3,3,0,3,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,2,0,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,3,3,3,3,2,3,3,2,0,3,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,2,2,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,0,0,0,2,
- 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,2,0,2,
- 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,0,3,3,2,3,2,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,0,2,3,2,3,2,2,0,2,0,0,0,
+ 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,0,2,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,0,3,3,3,3,0,2,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,0,3,3,2,3,0,2,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,0,2,3,2,3,2,2,0,2,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,
- 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,0,0,2,2,0,0,0,0,0,0,0,
3,3,2,2,3,3,2,3,3,2,3,3,3,2,2,2,3,3,2,3,3,3,3,2,2,2,2,0,0,0,
- 3,3,2,2,3,2,2,3,3,3,2,3,0,2,2,2,2,3,2,2,0,0,2,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,0,3,3,2,2,0,0,0,0,2,
- 3,3,3,2,3,2,2,3,3,2,2,3,2,0,2,0,2,3,0,2,0,0,3,2,0,0,0,0,0,0,
- 3,3,2,3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,2,0,2,2,3,2,3,0,0,2,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,2,3,2,2,2,0,0,
- 3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,0,0,3,2,
- 3,3,3,3,3,3,3,3,3,3,3,2,2,0,3,2,0,3,3,3,2,3,2,0,2,2,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,3,3,3,3,3,3,3,2,3,2,0,0,0,0,
- 3,3,2,3,3,3,3,3,3,3,3,0,2,0,3,0,2,3,3,2,2,3,0,0,0,2,0,0,0,2,
- 2,3,3,3,2,3,3,2,0,3,3,3,3,3,3,3,3,3,3,2,0,0,3,2,0,0,0,0,0,0,
- 3,3,2,3,3,3,3,3,3,2,3,2,0,2,0,2,2,3,0,2,2,2,0,3,0,2,0,0,0,0,
- 3,3,3,2,3,3,2,3,2,2,3,0,2,0,3,0,0,2,2,2,2,2,0,2,2,0,0,0,0,0,
- 3,3,3,2,3,2,2,3,2,2,2,2,2,2,2,0,2,3,2,2,2,0,0,2,2,3,0,0,0,0,
- 3,3,0,2,2,2,3,2,0,0,0,0,2,2,3,0,2,0,0,2,0,2,0,3,2,0,2,0,0,0,
- 3,3,2,2,3,0,0,2,2,2,2,0,2,2,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,0,2,2,2,0,0,0,0,0,0,
+ 3,3,2,2,3,2,2,3,3,2,2,0,3,2,2,2,2,3,2,0,0,0,2,0,0,0,0,0,0,0,
+ 3,3,2,0,3,2,2,3,3,2,2,2,3,2,0,2,2,3,0,2,0,2,3,2,0,0,0,0,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,2,0,3,3,2,2,0,0,0,0,2,
+ 3,3,2,3,3,3,3,3,3,2,3,3,3,2,0,3,3,3,3,2,2,2,3,2,3,0,0,0,2,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,2,3,2,2,0,2,0,
+ 3,3,3,2,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,0,3,0,2,
+ 3,3,3,3,3,3,3,3,3,3,3,2,2,0,2,3,0,3,3,3,2,3,2,0,2,2,0,0,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,2,3,3,3,3,3,3,2,3,2,0,0,0,2,
+ 3,3,2,3,3,3,3,3,3,3,3,2,0,0,0,3,2,3,3,2,3,3,0,0,2,2,0,0,0,2,
+ 3,3,3,3,2,3,3,2,0,3,3,3,3,3,3,3,3,3,3,0,2,0,3,2,0,0,0,0,0,0,
+ 3,3,2,3,3,3,3,3,3,2,3,2,2,2,2,0,2,3,0,2,2,3,0,3,2,2,0,0,0,0,
+ 3,3,3,2,3,3,2,3,2,2,3,2,0,0,0,3,0,2,2,2,0,2,0,2,0,0,0,0,0,0,
+ 3,3,3,2,3,0,2,3,2,2,2,2,2,2,0,2,2,3,2,2,2,0,0,2,2,3,0,0,0,0,
+ 3,3,0,2,2,2,3,2,2,0,0,2,0,2,0,2,2,0,0,2,0,2,0,3,2,0,2,0,0,0,
3,2,0,0,2,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,2,2,3,0,0,2,2,2,2,2,0,2,2,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,
2,2,0,0,0,2,0,0,3,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
};
-const SequenceModel Iso_8859_15FinnishModel =
+const SequenceModel Iso_8859_1FinnishModel =
{
- Iso_8859_15_CharToOrderMap,
+ Iso_8859_1_CharToOrderMap,
FinnishLangModel,
30,
- (float)0.9985378147555799,
+ (float)0.9985812031154878,
PR_TRUE,
- "ISO-8859-15",
+ "ISO-8859-1",
"fi"
};
-const SequenceModel Windows_1252FinnishModel =
+const SequenceModel Iso_8859_4FinnishModel =
{
- Windows_1252_CharToOrderMap,
+ Iso_8859_4_CharToOrderMap,
FinnishLangModel,
30,
- (float)0.9985378147555799,
+ (float)0.9985812031154878,
PR_TRUE,
- "WINDOWS-1252",
+ "ISO-8859-4",
"fi"
};
-const SequenceModel Iso_8859_4FinnishModel =
+const SequenceModel Iso_8859_9FinnishModel =
{
- Iso_8859_4_CharToOrderMap,
+ Iso_8859_9_CharToOrderMap,
FinnishLangModel,
30,
- (float)0.9985378147555799,
+ (float)0.9985812031154878,
PR_TRUE,
- "ISO-8859-4",
+ "ISO-8859-9",
"fi"
};
@@ -268,30 +282,40 @@ const SequenceModel Iso_8859_13FinnishModel =
Iso_8859_13_CharToOrderMap,
FinnishLangModel,
30,
- (float)0.9985378147555799,
+ (float)0.9985812031154878,
PR_TRUE,
"ISO-8859-13",
"fi"
};
-const SequenceModel Iso_8859_9FinnishModel =
+const SequenceModel Iso_8859_15FinnishModel =
{
- Iso_8859_9_CharToOrderMap,
+ Iso_8859_15_CharToOrderMap,
FinnishLangModel,
30,
- (float)0.9985378147555799,
+ (float)0.9985812031154878,
PR_TRUE,
- "ISO-8859-9",
+ "ISO-8859-15",
"fi"
};
-const SequenceModel Iso_8859_1FinnishModel =
+const SequenceModel Windows_1252FinnishModel =
{
- Iso_8859_1_CharToOrderMap,
+ Windows_1252_CharToOrderMap,
FinnishLangModel,
30,
- (float)0.9985378147555799,
+ (float)0.9985812031154878,
PR_TRUE,
- "ISO-8859-1",
+ "WINDOWS-1252",
"fi"
};
+
+const LanguageModel FinnishModel =
+{
+ "fi",
+ Unicode_CharOrder,
+ 60,
+ FinnishLangModel,
+ 30,
+ (float)0.9985812031154878,
+};
diff --git a/src/LangModels/LangGreekModel.cpp b/src/LangModels/LangGreekModel.cpp
index 28951e6..4038450 100644
--- a/src/LangModels/LangGreekModel.cpp
+++ b/src/LangModels/LangGreekModel.cpp
@@ -36,12 +36,13 @@
* ***** END LICENSE BLOCK ***** */
#include "../nsSBCharSetProber.h"
+#include "../nsLanguageDetector.h"
/********* Language model for: Greek *********/
/**
* Generated by BuildLangModel.py
- * On: 2016-05-25 15:21:50.073117
+ * On: 2021-03-16 18:58:31.005768
**/
/* Character Mapping Table:
@@ -61,171 +62,200 @@
* ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1
* even though they are both used for French. Same for the euro sign.
*/
-static const unsigned char Windows_1253_CharToOrderMap[] =
+static const unsigned char Iso_8859_7_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 32, 46, 41, 40, 30, 52, 48, 42, 33, 56, 49, 39, 44, 36, 34, /* 4X */
- 47, 59, 35, 38, 37, 43, 54, 50, 58, 53, 57,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 32, 46, 41, 40, 30, 52, 48, 42, 33, 56, 49, 39, 44, 36, 34, /* 6X */
- 47, 59, 35, 38, 37, 43, 54, 50, 58, 53, 57,SYM,SYM,SYM,SYM,CTR, /* 7X */
- SYM,ILL,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,ILL,SYM,ILL,ILL,ILL,ILL, /* 8X */
- ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,ILL,SYM,ILL,ILL,ILL,ILL, /* 9X */
- SYM,SYM, 17,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,SYM,SYM,SYM,SYM, /* AX */
- SYM,SYM,SYM,SYM,SYM, 62,SYM,SYM, 19, 22, 15,SYM, 16,SYM, 24, 28, /* BX */
- 55, 0, 25, 18, 20, 5, 29, 10, 26, 3, 8, 14, 13, 4, 31, 1, /* CX */
- 11, 6,ILL, 7, 2, 12, 27, 23, 45, 21, 51, 60, 17, 19, 22, 15, /* DX */
- 61, 0, 25, 18, 20, 5, 29, 10, 26, 3, 8, 14, 13, 4, 31, 1, /* EX */
- 11, 6, 9, 7, 2, 12, 27, 23, 45, 21, 51, 60, 16, 24, 28,ILL, /* FX */
+ SYM, 33, 51, 41, 40, 30, 53, 48, 42, 32, 56, 49, 39, 44, 34, 36, /* 4X */
+ 47, 60, 35, 37, 38, 43, 55, 52, 58, 54, 57,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 33, 51, 41, 40, 30, 53, 48, 42, 32, 56, 49, 39, 44, 34, 36, /* 6X */
+ 47, 60, 35, 37, 38, 43, 55, 52, 58, 54, 57,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
+ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
+ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, /* AX */
+ SYM,SYM,SYM,SYM,SYM,SYM, 18,SYM, 19, 23, 15,SYM, 16,SYM, 25, 28, /* BX */
+ 45, 0, 21, 17, 20, 5, 29, 11, 27, 3, 8, 12, 14, 4, 31, 1, /* CX */
+ 13, 6,ILL, 9, 2, 10, 26, 24, 46, 22, 50, 59, 18, 19, 23, 15, /* DX */
+ 61, 0, 21, 17, 20, 5, 29, 11, 27, 3, 8, 12, 14, 4, 31, 1, /* EX */
+ 13, 6, 7, 9, 2, 10, 26, 24, 46, 22, 50, 59, 16, 25, 28,ILL, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
-static const unsigned char Iso_8859_7_CharToOrderMap[] =
+static const unsigned char Windows_1253_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 32, 46, 41, 40, 30, 52, 48, 42, 33, 56, 49, 39, 44, 36, 34, /* 4X */
- 47, 59, 35, 38, 37, 43, 54, 50, 58, 53, 57,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 32, 46, 41, 40, 30, 52, 48, 42, 33, 56, 49, 39, 44, 36, 34, /* 6X */
- 47, 59, 35, 38, 37, 43, 54, 50, 58, 53, 57,SYM,SYM,SYM,SYM,CTR, /* 7X */
- CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
- CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
- SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, /* AX */
- SYM,SYM,SYM,SYM,SYM,SYM, 17,SYM, 19, 22, 15,SYM, 16,SYM, 24, 28, /* BX */
- 55, 0, 25, 18, 20, 5, 29, 10, 26, 3, 8, 14, 13, 4, 31, 1, /* CX */
- 11, 6,ILL, 7, 2, 12, 27, 23, 45, 21, 51, 60, 17, 19, 22, 15, /* DX */
- 61, 0, 25, 18, 20, 5, 29, 10, 26, 3, 8, 14, 13, 4, 31, 1, /* EX */
- 11, 6, 9, 7, 2, 12, 27, 23, 45, 21, 51, 60, 16, 24, 28,ILL, /* FX */
+ SYM, 33, 51, 41, 40, 30, 53, 48, 42, 32, 56, 49, 39, 44, 34, 36, /* 4X */
+ 47, 60, 35, 37, 38, 43, 55, 52, 58, 54, 57,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 33, 51, 41, 40, 30, 53, 48, 42, 32, 56, 49, 39, 44, 34, 36, /* 6X */
+ 47, 60, 35, 37, 38, 43, 55, 52, 58, 54, 57,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ SYM,ILL,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,ILL,SYM,ILL,ILL,ILL,ILL, /* 8X */
+ ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,ILL,SYM,ILL,ILL,ILL,ILL, /* 9X */
+ SYM,SYM, 18,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,SYM,SYM,SYM,SYM, /* AX */
+ SYM,SYM,SYM,SYM,SYM, 62,SYM,SYM, 19, 23, 15,SYM, 16,SYM, 25, 28, /* BX */
+ 45, 0, 21, 17, 20, 5, 29, 11, 27, 3, 8, 12, 14, 4, 31, 1, /* CX */
+ 13, 6,ILL, 9, 2, 10, 26, 24, 46, 22, 50, 59, 18, 19, 23, 15, /* DX */
+ 61, 0, 21, 17, 20, 5, 29, 11, 27, 3, 8, 12, 14, 4, 31, 1, /* EX */
+ 13, 6, 7, 9, 2, 10, 26, 24, 46, 22, 50, 59, 16, 25, 28,ILL, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
+static const int Unicode_Char_size = 93;
+static const unsigned int Unicode_CharOrder[] =
+{
+ 65, 33, 67, 41, 68, 40, 69, 30, 72, 42, 73, 32, 76, 39, 77, 44,
+ 78, 34, 79, 36, 82, 35, 83, 37, 84, 38, 85, 43, 97, 33, 99, 41,
+ 100, 40, 101, 30, 104, 42, 105, 32, 108, 39, 109, 44, 110, 34,111, 36,
+ 114, 35, 115, 37, 116, 38, 117, 43, 902, 18, 904, 19, 905, 23,906, 15,
+ 908, 16, 910, 25, 911, 28, 912, 45, 913, 0, 914, 21, 915, 17,916, 20,
+ 917, 5, 918, 29, 919, 11, 920, 27, 921, 3, 922, 8, 923, 12,924, 14,
+ 925, 4, 926, 31, 927, 1, 928, 13, 929, 6, 931, 7, 931, 9,932, 2,
+ 933, 10, 934, 26, 935, 24, 936, 46, 937, 22, 940, 18, 941, 19,942, 23,
+ 943, 15, 945, 0, 946, 21, 947, 17, 948, 20, 949, 5, 950, 29,951, 11,
+ 952, 27, 953, 3, 954, 8, 955, 12, 956, 14, 957, 4, 958, 31,959, 1,
+ 960, 13, 961, 6, 962, 7, 963, 9, 964, 2, 965, 10, 966, 26,967, 24,
+ 968, 46, 969, 22, 972, 16, 973, 25, 974, 28,
+};
+
/* Model Table:
- * Total sequences: 1579
- * First 512 sequences: 0.958419074626211
- * Next 512 sequences (512-1024): 0.03968891876305471
- * Rest: 0.0018920066107342773
+ * Total sequences: 1390
+ * First 512 sequences: 0.9624941725288916
+ * Next 512 sequences (512-1024): 0.035897222027766316
+ * Rest: 0.0016086054433421051
* Negative sequences: TODO
*/
static const PRUint8 GreekLangModel[] =
{
- 1,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,2,3,2,3,1,2,
- 3,3,3,3,3,1,3,0,3,0,0,0,0,0,0,1,0,0,1,0,0,0,2,
- 2,2,3,3,3,3,3,3,3,3,2,3,3,3,3,3,1,2,3,2,3,1,2,
- 3,3,3,3,3,2,2,0,2,0,0,0,0,0,0,0,0,1,0,0,1,0,2,
- 3,3,2,3,2,3,3,3,2,3,3,1,3,2,2,3,3,3,2,3,0,3,3,
- 2,2,2,2,2,3,3,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,2,3,3,3,3,3,3,3,3,1,3,3,1,3,3,3,3,3,3,2,
- 3,1,3,3,2,3,3,0,2,0,0,1,0,0,0,1,0,0,0,0,0,0,2,
- 3,3,3,3,3,3,2,3,2,2,3,1,2,2,2,3,3,3,3,3,3,3,3,
- 2,2,1,3,2,3,2,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
- 2,3,3,3,3,2,3,3,3,3,2,3,3,3,3,3,2,2,3,1,3,3,1,
- 3,3,3,3,3,2,2,0,3,0,0,0,0,0,0,1,0,0,0,0,0,0,2,
- 3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,3,3,3,3,
- 3,3,2,3,2,3,2,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
- 3,3,3,3,2,3,2,3,3,0,3,3,3,3,2,3,3,3,2,3,2,3,3,
- 3,3,2,2,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,2,3,3,2,3,2,3,2,3,2,3,3,3,3,1,3,3,3,3,
- 2,3,2,2,2,3,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,
- 1,1,0,1,1,1,0,1,1,0,2,1,0,1,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,
- 1,1,3,0,3,2,3,3,3,3,0,3,0,3,3,1,0,0,3,1,2,0,0,
- 2,1,1,3,2,0,0,0,2,0,0,1,0,0,0,0,0,0,1,0,0,0,2,
- 3,3,3,3,2,3,3,2,1,1,3,2,3,1,3,3,3,3,1,3,0,3,3,
- 1,2,1,1,1,2,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,2,3,2,3,2,3,3,3,3,2,3,0,3,3,2,2,3,3,2,3,1,2,
- 3,0,3,3,2,1,3,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,2,
- 3,3,1,3,2,3,1,2,1,2,3,3,2,3,1,3,3,3,1,3,1,3,3,
- 1,2,3,0,3,2,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,2,
- 3,3,3,3,2,3,1,2,2,2,3,2,3,3,3,3,3,3,2,3,2,3,3,
- 2,3,2,2,2,3,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
- 3,3,3,1,3,3,3,3,3,3,2,3,0,3,3,0,0,0,3,0,3,3,0,
- 3,0,2,3,2,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,
- 2,2,3,2,3,3,3,3,3,3,2,3,1,3,3,0,0,0,3,0,3,1,0,
- 3,1,2,2,3,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,
- 2,2,3,3,3,2,3,3,3,3,2,3,1,3,3,0,0,0,3,0,3,1,0,
- 3,0,3,3,3,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,
- 3,3,0,3,3,3,3,0,3,0,3,0,2,3,3,3,3,3,3,3,2,3,3,
- 2,2,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,2,3,3,3,3,2,3,1,3,3,0,0,0,3,0,3,3,0,
- 3,0,3,3,3,0,2,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2,
- 3,3,1,3,2,3,3,1,0,0,3,0,3,1,0,3,3,3,0,3,0,3,3,
- 0,3,0,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 2,1,3,2,3,1,3,3,2,3,1,3,1,3,2,2,1,2,3,1,2,0,2,
- 2,0,3,3,2,1,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,3,1,3,1,3,3,3,3,1,2,0,3,3,0,0,0,2,0,2,1,0,
- 2,0,1,3,2,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,
- 3,3,3,3,3,3,3,1,0,1,3,1,2,2,2,3,2,3,0,3,0,3,3,
- 0,2,1,3,1,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 2,3,3,2,3,3,3,3,2,3,2,3,0,3,3,0,0,0,3,0,2,1,0,
- 2,0,2,3,2,0,2,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,2,
- 3,3,1,3,2,3,3,1,1,1,2,1,2,0,3,3,3,3,2,3,2,2,2,
- 0,2,2,0,0,2,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,0,3,3,3,3,1,1,0,3,0,3,3,3,2,2,3,1,3,0,2,3,
- 0,2,0,0,1,3,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,2,3,1,3,3,3,2,0,3,1,3,1,2,3,3,3,2,3,0,3,3,
- 0,2,0,2,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 1,1,3,2,3,0,3,3,2,3,2,3,0,3,2,0,0,0,1,0,2,1,0,
- 1,0,2,2,1,0,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,1,3,1,3,1,1,1,0,2,0,2,2,1,2,2,2,1,2,0,3,2,
- 0,2,1,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,1,0,0,1,0,1,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,2,1,2,2,2,3,3,2,3,2,2,2,2,2,2,0,
- 3,3,1,3,1,3,0,0,1,0,3,1,2,1,1,2,2,3,1,2,0,2,2,
- 0,3,0,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,1,0,1,1,0,0,0,1,0,0,1,1,1,1,0,0,0,1,0,0,0,0,
- 0,0,0,1,1,0,0,2,0,2,2,1,3,3,3,2,3,2,2,2,2,2,0,
- 0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
- 0,0,1,0,0,0,0,2,0,3,2,3,2,3,3,3,2,2,3,1,2,2,0,
- 0,0,1,0,1,0,0,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,0,
- 0,1,0,1,0,0,0,2,0,2,2,2,3,3,2,2,2,2,2,2,2,2,0,
- 0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,1,0,0,0,3,0,3,3,3,2,2,2,2,2,2,2,1,2,2,0,
- 0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,
- 0,0,0,0,0,0,0,3,0,3,2,2,1,2,2,2,2,3,2,1,2,1,0,
- 1,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,
- 0,0,0,0,0,0,1,3,0,3,3,3,2,1,2,2,2,1,1,3,2,2,0,
- 0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,2,0,2,2,2,1,1,3,2,2,1,2,2,2,2,0,
+ 1,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,2,2,3,3,1,
+ 2,3,3,3,3,1,3,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,3,2,
+ 2,2,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,2,2,3,3,1,
+ 2,3,3,3,3,1,2,0,3,0,0,0,0,0,0,0,0,0,0,0,1,0,2,2,
+ 3,3,2,3,2,3,3,3,2,3,3,3,3,1,2,3,3,2,3,3,0,2,3,
+ 3,2,2,2,1,3,3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,2,3,3,3,3,3,3,1,2,3,3,3,1,3,3,3,3,3,3,3,
+ 2,3,0,2,3,3,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,
+ 3,3,3,3,3,3,2,3,2,3,2,3,2,1,2,3,3,3,3,3,3,2,3,
+ 3,2,2,2,3,3,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 2,3,3,3,3,2,3,3,3,3,3,1,3,3,3,3,2,3,2,1,3,3,3,
+ 1,3,3,3,3,2,2,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,
+ 3,3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,
+ 3,3,3,2,3,3,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
+ 0,0,0,1,0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,
+ 3,3,3,3,2,3,3,2,3,2,3,3,3,2,2,3,3,2,3,3,3,2,3,
+ 3,2,3,2,2,3,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,
+ 3,3,3,3,2,3,2,0,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,
+ 3,3,3,3,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,2,3,2,3,2,3,3,3,3,0,2,3,3,3,3,2,3,3,2,3,3,1,
+ 2,3,0,2,3,2,2,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,
+ 1,1,3,1,3,1,3,3,3,3,0,0,3,2,3,0,1,3,0,1,2,1,0,
+ 0,2,1,2,3,1,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,
+ 3,3,3,3,3,3,2,2,2,2,3,3,3,2,3,3,3,3,3,3,2,3,3,
+ 3,2,3,3,2,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,3,2,3,3,2,2,1,3,3,3,2,1,3,3,1,3,3,0,1,3,
+ 3,1,2,1,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,1,3,2,3,1,2,1,2,2,3,2,3,3,3,3,1,3,3,0,3,3,
+ 3,1,2,3,0,2,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,
+ 3,3,3,1,3,3,3,3,3,3,1,2,3,3,3,0,0,3,0,0,3,2,3,
+ 0,3,0,2,3,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,
+ 2,2,3,2,3,3,3,3,3,3,2,2,3,3,3,0,0,3,0,0,3,2,1,
+ 0,2,0,2,2,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,
+ 3,3,0,3,3,3,3,0,3,0,2,3,3,0,3,3,3,3,3,3,2,0,3,
+ 3,2,2,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 2,3,3,3,3,2,3,3,3,3,1,2,3,3,3,0,0,3,0,0,3,3,1,
+ 0,3,0,3,3,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,
+ 3,3,3,3,3,2,3,3,3,3,2,1,3,3,3,0,0,3,0,0,3,2,2,
+ 0,3,0,3,2,0,2,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,
+ 3,3,0,3,1,3,3,0,0,2,3,3,0,0,0,3,3,0,3,3,0,0,3,
+ 3,0,3,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,1,3,1,3,3,1,1,1,2,2,3,0,0,3,3,2,3,3,2,2,2,
+ 2,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 2,1,3,2,3,1,3,3,2,3,0,1,2,3,3,1,0,3,2,1,2,3,1,
+ 2,2,0,2,3,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 1,0,3,1,3,1,3,3,3,3,0,0,2,2,3,0,0,2,0,0,2,1,1,
+ 0,2,0,2,3,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,
+ 3,3,3,3,3,3,3,1,0,2,2,3,2,1,2,3,3,1,3,3,0,1,3,
+ 3,0,2,1,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 2,3,3,2,3,3,3,3,3,3,0,2,3,3,3,0,0,3,0,0,2,2,1,
+ 0,2,0,2,3,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,
+ 3,3,2,3,2,3,3,1,1,2,3,3,2,0,1,3,3,2,2,3,0,1,3,
+ 3,0,2,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,0,2,3,3,3,1,1,0,3,3,3,0,3,2,2,0,3,3,0,0,3,
+ 3,0,2,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 1,1,3,2,3,0,3,3,2,3,0,2,2,3,2,0,0,1,0,0,2,2,1,
+ 0,1,0,1,2,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,0,3,2,3,1,0,0,1,2,3,1,0,2,3,3,1,2,3,0,2,3,
+ 2,0,1,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,2,1,2,2,3,3,2,3,2,2,3,2,2,2,2,0,0,
+ 3,3,1,3,0,3,0,0,0,0,2,3,1,1,1,2,2,1,2,2,0,0,2,
+ 2,0,3,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,2,0,2,3,3,2,2,3,3,2,2,3,1,2,2,0,0,
+ 0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,2,0,2,1,3,3,1,2,3,3,2,2,2,2,2,0,0,
+ 1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,2,0,2,2,2,2,2,2,2,1,3,2,1,2,1,0,0,
+ 0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,3,0,3,3,2,2,2,2,2,2,2,2,2,2,2,0,0,
+ 0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,1,0,0,0,0,2,0,2,2,3,3,2,2,2,2,2,2,2,2,2,0,0,
+ 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,2,0,2,2,1,1,2,2,3,2,2,2,2,2,2,0,0,
+ 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,3,0,3,2,0,2,2,2,2,2,1,1,3,2,1,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,3,0,3,3,2,1,1,2,2,2,2,1,1,2,2,0,
+ 0,0,0,0,0,0,0,3,0,3,2,1,1,3,2,2,2,2,1,1,2,2,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,3,0,2,2,2,2,1,1,2,2,1,2,1,2,1,0,
+ 0,0,0,0,0,0,0,3,0,3,2,1,2,2,2,1,2,2,1,1,2,1,0,0,
0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,2,0,2,2,2,2,1,2,1,2,2,2,3,2,1,0,
- 0,0,0,0,0,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,3,0,2,2,2,2,2,2,1,2,1,1,1,2,2,0,
+ 0,0,0,0,0,0,0,2,0,2,2,1,2,2,1,2,2,1,1,3,2,1,0,0,
+ 0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,3,0,2,2,2,2,2,2,2,2,0,0,0,2,2,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,2,0,2,2,1,2,2,2,2,2,2,2,1,1,2,0,
- 1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,
- 0,0,0,0,0,0,0,3,0,2,2,2,1,1,1,2,2,1,1,1,2,2,0,
- 2,2,0,2,0,3,0,0,0,0,3,0,2,0,0,2,1,1,0,1,0,1,2,
- 0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,2,0,2,2,2,2,1,2,2,2,2,2,1,0,2,0,0,
+ 1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,
+ 0,0,0,0,0,0,0,3,0,2,3,1,1,2,2,1,1,0,1,0,2,2,0,0,
+ 1,3,1,0,2,0,1,1,1,2,0,0,1,0,1,0,0,0,0,0,2,0,0,
+ 0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 2,2,0,2,0,2,0,0,0,0,2,3,0,0,0,2,1,0,2,1,0,2,1,
+ 2,0,1,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
};
+const SequenceModel Iso_8859_7GreekModel =
+{
+ Iso_8859_7_CharToOrderMap,
+ GreekLangModel,
+ 47,
+ (float)0.9624941725288916,
+ PR_FALSE,
+ "ISO-8859-7",
+ "el"
+};
+
const SequenceModel Windows_1253GreekModel =
{
Windows_1253_CharToOrderMap,
GreekLangModel,
- 46,
- (float)0.958419074626211,
+ 47,
+ (float)0.9624941725288916,
PR_FALSE,
"WINDOWS-1253",
"el"
};
-const SequenceModel Iso_8859_7GreekModel =
+const LanguageModel GreekModel =
{
- Iso_8859_7_CharToOrderMap,
+ "el",
+ Unicode_CharOrder,
+ 93,
GreekLangModel,
- 46,
- (float)0.958419074626211,
- PR_FALSE,
- "ISO-8859-7",
- "el"
+ 47,
+ (float)0.9624941725288916,
};
diff --git a/src/LangModels/LangHungarianModel.cpp b/src/LangModels/LangHungarianModel.cpp
index 22f0de6..2bee180 100644
--- a/src/LangModels/LangHungarianModel.cpp
+++ b/src/LangModels/LangHungarianModel.cpp
@@ -36,12 +36,13 @@
* ***** END LICENSE BLOCK ***** */
#include "../nsSBCharSetProber.h"
+#include "../nsLanguageDetector.h"
/********* Language model for: Hungarian *********/
/**
* Generated by BuildLangModel.py
- * On: 2015-12-12 18:02:46.730481
+ * On: 2021-03-16 19:23:30.842519
**/
/* Character Mapping Table:
@@ -67,18 +68,18 @@ static const unsigned char Iso_8859_2_CharToOrderMap[] =
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 1, 15, 23, 16, 0, 24, 13, 20, 7, 22, 9, 4, 12, 6, 8, /* 4X */
- 21, 34, 5, 3, 2, 19, 17, 32, 33, 18, 10,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 1, 15, 23, 16, 0, 24, 13, 20, 7, 22, 9, 4, 12, 6, 8, /* 6X */
- 21, 34, 5, 3, 2, 19, 17, 32, 33, 18, 10,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ SYM, 1, 15, 23, 16, 0, 25, 14, 20, 7, 22, 8, 4, 13, 5, 9, /* 4X */
+ 21, 34, 6, 3, 2, 19, 18, 32, 33, 17, 10,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 1, 15, 23, 16, 0, 25, 14, 20, 7, 22, 8, 4, 13, 5, 9, /* 6X */
+ 21, 34, 6, 3, 2, 19, 18, 32, 33, 17, 10,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
- SYM, 55,SYM, 42,SYM, 56, 46,SYM,SYM, 37, 52, 57, 58,SYM, 48, 59, /* AX */
- SYM, 60,SYM, 42,SYM, 61, 46,SYM,SYM, 37, 52, 62, 63,SYM, 48, 64, /* BX */
- 65, 11, 40, 36, 35, 66, 38, 39, 41, 14, 50, 67, 53, 28, 45, 68, /* CX */
- 49, 43, 54, 26, 69, 27, 25,SYM, 44, 70, 30, 31, 29, 47, 51, 71, /* DX */
- 72, 11, 40, 36, 35, 73, 38, 39, 41, 14, 50, 74, 53, 28, 45, 75, /* EX */
- 49, 43, 54, 26, 76, 27, 25,SYM, 44, 77, 30, 31, 29, 47, 51,SYM, /* FX */
+ SYM, 55,SYM, 40,SYM, 56, 51,SYM,SYM, 38, 54, 57, 58,SYM, 43, 59, /* AX */
+ SYM, 55,SYM, 40,SYM, 60, 51,SYM,SYM, 38, 54, 61, 62,SYM, 43, 63, /* BX */
+ 64, 11, 45, 41, 37, 65, 35, 50, 39, 12, 66, 46, 49, 28, 42, 67, /* CX */
+ 36, 52, 68, 26, 44, 27, 24,SYM, 48, 69, 30, 31, 29, 47, 70, 53, /* DX */
+ 71, 11, 45, 41, 37, 72, 35, 50, 39, 12, 73, 46, 49, 28, 42, 74, /* EX */
+ 36, 52, 75, 26, 44, 27, 24,SYM, 48, 76, 30, 31, 29, 47, 77,SYM, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
@@ -88,63 +89,76 @@ static const unsigned char Windows_1250_CharToOrderMap[] =
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 1, 15, 23, 16, 0, 24, 13, 20, 7, 22, 9, 4, 12, 6, 8, /* 4X */
- 21, 34, 5, 3, 2, 19, 17, 32, 33, 18, 10,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 1, 15, 23, 16, 0, 24, 13, 20, 7, 22, 9, 4, 12, 6, 8, /* 6X */
- 21, 34, 5, 3, 2, 19, 17, 32, 33, 18, 10,SYM,SYM,SYM,SYM,CTR, /* 7X */
- SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM, 37,SYM, 46, 78, 48, 79, /* 8X */
- ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 37,SYM, 46, 80, 48, 81, /* 9X */
- SYM,SYM,SYM, 42,SYM, 82,SYM,SYM,SYM,SYM, 52,SYM,SYM,SYM,SYM, 83, /* AX */
- SYM,SYM,SYM, 42,SYM,SYM,SYM,SYM,SYM, 84, 52,SYM, 85,SYM, 86, 87, /* BX */
- 88, 11, 40, 36, 35, 89, 38, 39, 41, 14, 50, 90, 53, 28, 45, 91, /* CX */
- 49, 43, 54, 26, 92, 27, 25,SYM, 44, 93, 30, 31, 29, 47, 51, 94, /* DX */
- 95, 11, 40, 36, 35, 96, 38, 39, 41, 14, 50, 97, 53, 28, 45, 98, /* EX */
- 49, 43, 54, 26, 99, 27, 25,SYM, 44,100, 30, 31, 29, 47, 51,SYM, /* FX */
+ SYM, 1, 15, 23, 16, 0, 25, 14, 20, 7, 22, 8, 4, 13, 5, 9, /* 4X */
+ 21, 34, 6, 3, 2, 19, 18, 32, 33, 17, 10,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 1, 15, 23, 16, 0, 25, 14, 20, 7, 22, 8, 4, 13, 5, 9, /* 6X */
+ 21, 34, 6, 3, 2, 19, 18, 32, 33, 17, 10,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM, 38,SYM, 51, 78, 43, 79, /* 8X */
+ ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 38,SYM, 51, 80, 43, 81, /* 9X */
+ SYM,SYM,SYM, 40,SYM, 55,SYM,SYM,SYM,SYM, 54,SYM,SYM,SYM,SYM, 82, /* AX */
+ SYM,SYM,SYM, 40,SYM,SYM,SYM,SYM,SYM, 55, 54,SYM, 83,SYM, 84, 85, /* BX */
+ 86, 11, 45, 41, 37, 87, 35, 50, 39, 12, 88, 46, 49, 28, 42, 89, /* CX */
+ 36, 52, 90, 26, 44, 27, 24,SYM, 48, 91, 30, 31, 29, 47, 92, 53, /* DX */
+ 93, 11, 45, 41, 37, 94, 35, 50, 39, 12, 95, 46, 49, 28, 42, 96, /* EX */
+ 36, 52, 97, 26, 44, 27, 24,SYM, 48, 98, 30, 31, 29, 47, 99,SYM, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
+static const int Unicode_Char_size = 64;
+static const unsigned int Unicode_CharOrder[] =
+{
+ 65, 1, 66, 15, 67, 23, 68, 16, 69, 0, 70, 25, 71, 14, 72, 20,
+ 73, 7, 74, 22, 75, 8, 76, 4, 77, 13, 78, 5, 79, 9, 80, 21,
+ 82, 6, 83, 3, 84, 2, 85, 19, 86, 18, 89, 17, 90, 10, 97, 1,
+ 98, 15, 99, 23, 100, 16, 101, 0, 102, 25, 103, 14, 104, 20,105, 7,
+ 106, 22, 107, 8, 108, 4, 109, 13, 110, 5, 111, 9, 112, 21,114, 6,
+ 115, 3, 116, 2, 117, 19, 118, 18, 121, 17, 122, 10, 193, 11,201, 12,
+ 205, 28, 211, 26, 214, 24, 218, 30, 220, 29, 225, 11, 233, 12,237, 28,
+ 243, 26, 246, 24, 250, 30, 252, 29, 336, 27, 337, 27, 368, 31,369, 31,
+};
+
/* Model Table:
- * Total sequences: 1084
- * First 512 sequences: 0.9748272224933486
- * Next 512 sequences (512-1024): 0.024983863604162403
- * Rest: 0.0001889139024889644
+ * Total sequences: 1122
+ * First 512 sequences: 0.9736098834669349
+ * Next 512 sequences (512-1024): 0.026285470450181352
+ * Rest: 0.00010464608288375879
* Negative sequences: TODO
*/
static const PRUint8 HungarianLangModel[] =
{
- 3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,1,0,2,2,0,0,
- 3,2,3,3,3,3,3,3,2,3,3,2,3,3,2,3,3,3,3,3,3,3,3,3,3,0,0,2,2,1,2,1,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,2,3,2,2,3,3,3,3,3,2,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,3,3,3,2,3,2,2,3,3,3,3,3,2,
- 3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,3,3,2,
- 3,3,3,3,3,2,3,3,3,3,3,3,2,3,3,3,3,3,3,3,2,2,2,3,3,3,2,3,2,2,2,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,1,3,3,3,2,3,3,2,3,0,2,2,2,2,
- 3,2,3,3,3,3,3,2,2,3,3,2,3,3,0,3,3,3,2,3,3,3,2,3,3,0,2,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,2,3,2,2,3,3,2,2,2,3,2,2,2,2,2,3,3,2,3,3,2,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,2,2,3,3,3,3,3,2,2,
- 1,2,3,3,3,3,3,3,2,3,3,0,3,3,2,3,3,3,2,2,2,3,3,3,2,0,0,0,2,0,0,0,
- 3,3,3,2,3,2,2,3,3,2,3,3,3,2,3,3,2,2,2,3,2,3,2,2,2,2,3,2,2,2,2,3,
- 3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,2,3,3,3,3,2,3,2,2,3,3,2,3,2,2,2,
- 0,1,3,3,3,3,3,2,2,3,3,0,3,3,2,3,3,3,0,0,2,3,2,3,0,0,0,0,0,2,0,0,
- 3,3,2,3,3,3,2,3,3,2,2,3,2,1,3,3,3,2,2,3,1,2,2,2,2,2,3,3,3,2,2,2,
- 3,3,3,3,2,3,3,3,3,2,2,3,3,2,3,2,2,3,2,3,2,2,3,2,2,3,3,3,3,2,2,2,
- 3,3,2,2,2,2,2,3,3,2,0,3,0,2,3,2,2,2,1,2,2,0,2,1,2,3,2,3,3,2,2,2,
- 3,3,3,3,2,2,3,3,3,2,3,3,3,2,3,3,2,3,1,3,3,2,2,2,2,2,2,2,2,2,2,3,
- 3,2,3,3,3,3,3,2,2,3,2,3,3,3,0,3,3,2,2,2,2,2,2,3,2,0,0,0,1,0,0,0,
- 3,3,2,2,2,2,2,3,3,2,0,3,2,2,2,2,2,2,2,3,2,0,2,2,2,2,2,2,3,2,2,2,
- 3,3,3,3,3,3,2,3,3,2,2,3,1,2,3,2,2,2,2,3,2,3,3,3,2,2,2,2,3,3,2,0,
- 3,3,3,2,2,2,3,2,3,2,2,3,2,2,3,2,3,2,0,3,2,2,2,2,2,2,3,0,2,2,3,2,
- 3,3,2,3,2,2,2,3,3,3,3,2,2,2,3,2,2,2,2,2,3,0,0,2,2,2,2,0,3,0,0,0,
- 3,3,2,2,2,3,2,3,3,0,0,2,2,2,3,2,2,2,2,3,0,2,2,2,2,3,2,3,2,3,2,2,
- 2,0,3,3,3,3,3,0,0,3,3,0,2,3,0,3,3,3,0,0,2,2,2,2,1,0,0,0,0,0,0,0,
- 2,2,3,3,3,3,3,3,2,3,3,2,3,3,2,3,3,3,0,0,2,3,3,2,2,2,0,0,1,2,2,0,
- 2,2,3,3,3,3,2,3,2,3,3,2,2,2,2,3,3,2,0,0,2,2,3,2,2,1,0,0,1,2,1,0,
- 0,2,3,2,2,3,3,2,2,2,3,0,3,3,0,2,2,3,0,2,1,2,3,2,2,0,0,0,0,0,0,0,
- 0,0,3,2,3,2,3,0,0,3,2,0,2,3,0,0,2,2,0,0,1,0,2,0,0,0,0,0,0,0,0,0,
- 2,2,3,3,3,2,3,0,0,2,2,0,0,3,0,2,2,2,0,0,2,2,3,2,1,0,0,0,0,0,0,0,
- 2,2,2,2,3,2,2,2,0,3,2,0,2,2,0,2,2,3,0,2,2,0,2,2,2,0,0,0,0,0,0,0,
+ 2,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,2,3,3,3,3,3,3,2,3,2,1,2,2,0,0,
+ 2,2,3,3,3,3,3,3,3,2,3,2,2,3,3,3,3,3,3,3,3,3,3,3,2,3,2,0,2,2,2,0,
+ 3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,3,2,3,2,3,2,3,3,3,3,3,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,2,3,3,3,2,3,2,2,3,3,3,3,3,2,
+ 3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,
+ 3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,3,3,3,2,3,2,2,2,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3,
+ 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,2,3,1,3,3,1,2,2,2,2,
+ 3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,2,2,2,3,2,2,2,2,3,2,3,2,3,3,2,2,
+ 2,2,3,3,3,3,3,2,3,2,3,2,2,3,3,3,3,2,3,3,3,3,2,3,1,3,2,0,0,0,0,0,
+ 3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,2,3,2,3,3,3,3,2,2,
+ 2,2,3,3,3,3,3,2,3,2,3,2,2,3,3,3,3,0,3,2,2,3,3,3,0,2,0,0,2,0,0,0,
+ 2,2,3,3,3,3,3,2,3,2,3,0,2,3,3,3,3,0,3,0,3,3,2,2,0,2,0,0,2,2,0,0,
+ 3,3,3,3,3,2,2,3,2,3,3,3,3,3,2,3,2,2,2,3,2,3,2,2,2,2,3,2,3,2,3,3,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,2,3,2,3,3,3,2,3,3,2,2,
+ 3,3,2,3,3,2,3,3,2,3,2,3,3,2,0,3,2,2,2,3,2,2,2,2,3,2,3,3,2,2,2,2,
+ 3,3,3,3,2,3,3,3,2,3,2,3,3,3,2,2,3,2,3,3,2,2,3,2,3,2,3,3,3,2,2,2,
+ 3,3,3,3,3,3,2,3,2,3,3,3,3,2,2,3,2,0,3,3,2,2,2,2,2,2,3,3,3,3,2,3,
+ 3,3,2,2,2,2,2,3,2,3,1,3,3,2,2,2,1,2,2,2,2,2,2,2,2,2,2,3,3,2,2,2,
+ 2,2,3,3,3,3,3,2,3,2,2,3,0,3,3,2,3,2,2,2,2,2,2,3,0,2,2,1,1,0,0,0,
+ 3,3,2,2,2,2,2,3,2,3,2,3,3,2,2,2,2,2,2,3,2,2,2,2,2,2,2,2,3,2,2,2,
+ 3,3,3,3,3,2,3,3,2,3,2,3,3,2,2,2,2,2,2,3,2,3,3,3,2,2,2,2,3,3,2,0,
+ 3,3,3,2,3,3,2,2,2,3,2,3,3,2,2,2,3,2,2,3,2,2,2,2,2,2,3,1,2,2,3,2,
+ 3,3,2,3,2,2,2,3,3,3,2,2,3,2,2,2,2,2,2,2,3,2,2,3,2,2,2,0,3,0,0,1,
+ 0,0,3,3,3,3,3,0,3,0,3,0,0,2,2,3,3,0,3,0,2,2,1,2,0,2,0,0,0,0,0,0,
+ 3,3,2,2,2,2,3,3,2,3,0,2,3,2,2,2,2,2,1,2,1,2,2,2,3,2,2,3,1,3,2,2,
+ 2,2,3,3,3,3,3,3,3,2,3,2,2,2,3,3,3,1,2,2,2,3,3,3,1,2,0,0,1,2,2,0,
+ 3,2,3,3,3,2,3,3,3,2,3,2,2,2,2,3,3,0,3,1,2,2,3,2,2,2,0,1,2,2,2,0,
+ 2,2,3,2,2,3,3,0,2,0,3,0,0,3,3,2,2,0,3,0,0,2,3,2,0,0,0,0,0,0,0,0,
+ 0,2,3,2,3,3,2,0,3,0,3,0,0,2,3,2,2,0,2,0,2,0,2,2,0,0,0,0,0,0,0,0,
+ 0,2,3,2,3,3,3,2,2,0,2,0,1,1,3,2,2,0,2,0,2,2,3,2,1,2,0,0,0,0,0,0,
+ 2,2,2,2,2,2,3,2,2,0,2,0,0,2,2,2,2,0,3,0,2,0,2,2,0,2,0,0,0,0,0,0,
};
@@ -153,7 +167,7 @@ const SequenceModel Iso_8859_2HungarianModel =
Iso_8859_2_CharToOrderMap,
HungarianLangModel,
32,
- (float)0.9748272224933486,
+ (float)0.9736098834669349,
PR_FALSE,
"ISO-8859-2",
"hu"
@@ -164,8 +178,18 @@ const SequenceModel Windows_1250HungarianModel =
Windows_1250_CharToOrderMap,
HungarianLangModel,
32,
- (float)0.9748272224933486,
+ (float)0.9736098834669349,
PR_FALSE,
"WINDOWS-1250",
"hu"
};
+
+const LanguageModel HungarianModel =
+{
+ "hu",
+ Unicode_CharOrder,
+ 64,
+ HungarianLangModel,
+ 32,
+ (float)0.9736098834669349,
+};
diff --git a/src/LangModels/LangIrishModel.cpp b/src/LangModels/LangIrishModel.cpp
index bbd9500..a9d814f 100644
--- a/src/LangModels/LangIrishModel.cpp
+++ b/src/LangModels/LangIrishModel.cpp
@@ -36,12 +36,13 @@
* ***** END LICENSE BLOCK ***** */
#include "../nsSBCharSetProber.h"
+#include "../nsLanguageDetector.h"
/********* Language model for: Irish *********/
/**
* Generated by BuildLangModel.py
- * On: 2016-09-27 00:33:40.158624
+ * On: 2021-03-16 19:09:36.532691
**/
/* Character Mapping Table:
@@ -61,174 +62,197 @@
* ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1
* even though they are both used for French. Same for the euro sign.
*/
-static const unsigned char Iso_8859_1_CharToOrderMap[] =
+static const unsigned char Iso_8859_15_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
SYM, 0, 16, 8, 11, 5, 19, 12, 3, 1, 27, 25, 9, 13, 2, 10, /* 4X */
- 21, 30, 4, 6, 7, 15, 23, 26, 29, 24, 28,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ 22, 30, 4, 6, 7, 15, 24, 26, 29, 23, 28,SYM,SYM,SYM,SYM,SYM, /* 5X */
SYM, 0, 16, 8, 11, 5, 19, 12, 3, 1, 27, 25, 9, 13, 2, 10, /* 6X */
- 21, 30, 4, 6, 7, 15, 23, 26, 29, 24, 28,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ 22, 30, 4, 6, 7, 15, 24, 26, 29, 23, 28,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
- SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
- SYM,SYM,SYM,SYM,SYM, 44,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
- 45, 14, 46, 47, 33, 48, 49, 39, 35, 18, 42, 37, 50, 17, 51, 40, /* CX */
- 52, 32, 43, 22, 53, 54, 38,SYM, 36, 55, 20, 56, 31, 57, 58, 59, /* DX */
- 60, 14, 61, 62, 33, 63, 64, 39, 35, 18, 42, 37, 65, 17, 66, 40, /* EX */
- 67, 32, 43, 22, 68, 69, 38,SYM, 36, 70, 20, 71, 31, 72, 73, 74, /* FX */
+ SYM,SYM,SYM,SYM,SYM,SYM, 35,SYM, 35,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
+ SYM,SYM,SYM,SYM, 42, 43,SYM,SYM, 44,SYM,SYM,SYM, 45, 46, 47,SYM, /* BX */
+ 48, 14, 49, 50, 34, 51, 52, 36, 32, 18, 40, 53, 54, 17, 55, 39, /* CX */
+ 56, 37, 57, 21, 58, 59, 33,SYM, 38, 60, 20, 61, 31, 62, 63, 64, /* DX */
+ 65, 14, 66, 67, 34, 68, 69, 36, 32, 18, 40, 70, 71, 17, 72, 39, /* EX */
+ 73, 37, 74, 21, 75, 76, 33,SYM, 38, 77, 20, 78, 31, 79, 80, 81, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
-static const unsigned char Windows_1252_CharToOrderMap[] =
+static const unsigned char Iso_8859_1_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
SYM, 0, 16, 8, 11, 5, 19, 12, 3, 1, 27, 25, 9, 13, 2, 10, /* 4X */
- 21, 30, 4, 6, 7, 15, 23, 26, 29, 24, 28,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ 22, 30, 4, 6, 7, 15, 24, 26, 29, 23, 28,SYM,SYM,SYM,SYM,SYM, /* 5X */
SYM, 0, 16, 8, 11, 5, 19, 12, 3, 1, 27, 25, 9, 13, 2, 10, /* 6X */
- 21, 30, 4, 6, 7, 15, 23, 26, 29, 24, 28,SYM,SYM,SYM,SYM,CTR, /* 7X */
- SYM,ILL,SYM, 75,SYM,SYM,SYM,SYM,SYM,SYM, 34,SYM, 76,ILL, 77,ILL, /* 8X */
- ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 34,SYM, 78,ILL, 79, 80, /* 9X */
+ 22, 30, 4, 6, 7, 15, 24, 26, 29, 23, 28,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
+ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
- SYM,SYM,SYM,SYM,SYM, 81,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
- 82, 14, 83, 84, 33, 85, 86, 39, 35, 18, 42, 37, 87, 17, 88, 40, /* CX */
- 89, 32, 43, 22, 90, 91, 38,SYM, 36, 92, 20, 93, 31, 94, 95, 96, /* DX */
- 97, 14, 98, 99, 33,100,101, 39, 35, 18, 42, 37,102, 17,103, 40, /* EX */
- 104, 32, 43, 22,105,106, 38,SYM, 36,107, 20,108, 31,109,110,111, /* FX */
+ SYM,SYM,SYM,SYM,SYM, 82,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
+ 83, 14, 84, 85, 34, 86, 87, 36, 32, 18, 40, 88, 89, 17, 90, 39, /* CX */
+ 91, 37, 92, 21, 93, 94, 33,SYM, 38, 95, 20, 96, 31, 97, 98, 99, /* DX */
+ 100, 14,101,102, 34,103,104, 36, 32, 18, 40,105,106, 17,107, 39, /* EX */
+ 108, 37,109, 21,110,111, 33,SYM, 38,112, 20,113, 31,114,115,116, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
-static const unsigned char Iso_8859_15_CharToOrderMap[] =
+static const unsigned char Iso_8859_9_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
SYM, 0, 16, 8, 11, 5, 19, 12, 3, 1, 27, 25, 9, 13, 2, 10, /* 4X */
- 21, 30, 4, 6, 7, 15, 23, 26, 29, 24, 28,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ 22, 30, 4, 6, 7, 15, 24, 26, 29, 23, 28,SYM,SYM,SYM,SYM,SYM, /* 5X */
SYM, 0, 16, 8, 11, 5, 19, 12, 3, 1, 27, 25, 9, 13, 2, 10, /* 6X */
- 21, 30, 4, 6, 7, 15, 23, 26, 29, 24, 28,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ 22, 30, 4, 6, 7, 15, 24, 26, 29, 23, 28,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
- SYM,SYM,SYM,SYM,SYM,SYM, 34,SYM, 34,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
- SYM,SYM,SYM,SYM,112,113,SYM,SYM,114,SYM,SYM,SYM,115,116,117,SYM, /* BX */
- 118, 14,119,120, 33,121,122, 39, 35, 18, 42, 37,123, 17,124, 40, /* CX */
- 125, 32, 43, 22,126,127, 38,SYM, 36,128, 20,129, 31,130,131,132, /* DX */
- 133, 14,134,135, 33,136,137, 39, 35, 18, 42, 37,138, 17,139, 40, /* EX */
- 140, 32, 43, 22,141,142, 38,SYM, 36,143, 20,144, 31,145,146,147, /* FX */
+ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
+ SYM,SYM,SYM,SYM,SYM,117,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
+ 118, 14,119,120, 34,121,122, 36, 32, 18, 40,123,124, 17,125, 39, /* CX */
+ 126, 37,127, 21,128,129, 33,SYM, 38,130, 20,131, 31,132,133,134, /* DX */
+ 135, 14,136,137, 34,138,139, 36, 32, 18, 40,140,141, 17,142, 39, /* EX */
+ 143, 37,144, 21,145,146, 33,SYM, 38,147, 20,148, 31, 41,149,150, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
-static const unsigned char Iso_8859_9_CharToOrderMap[] =
+static const unsigned char Windows_1252_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
SYM, 0, 16, 8, 11, 5, 19, 12, 3, 1, 27, 25, 9, 13, 2, 10, /* 4X */
- 21, 30, 4, 6, 7, 15, 23, 26, 29, 24, 28,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ 22, 30, 4, 6, 7, 15, 24, 26, 29, 23, 28,SYM,SYM,SYM,SYM,SYM, /* 5X */
SYM, 0, 16, 8, 11, 5, 19, 12, 3, 1, 27, 25, 9, 13, 2, 10, /* 6X */
- 21, 30, 4, 6, 7, 15, 23, 26, 29, 24, 28,SYM,SYM,SYM,SYM,CTR, /* 7X */
- CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
- CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
+ 22, 30, 4, 6, 7, 15, 24, 26, 29, 23, 28,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ SYM,ILL,SYM,151,SYM,SYM,SYM,SYM,SYM,SYM, 35,SYM,152,ILL,153,ILL, /* 8X */
+ ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 35,SYM,154,ILL,155,156, /* 9X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
- SYM,SYM,SYM,SYM,SYM,148,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
- 149, 14,150,151, 33,152,153, 39, 35, 18, 42, 37,154, 17,155, 40, /* CX */
- 156, 32, 43, 22,157,158, 38,SYM, 36,159, 20,160, 31,161,162,163, /* DX */
- 164, 14,165,166, 33,167,168, 39, 35, 18, 42, 37,169, 17,170, 40, /* EX */
- 171, 32, 43, 22,172,173, 38,SYM, 36,174, 20,175, 31, 41,176,177, /* FX */
+ SYM,SYM,SYM,SYM,SYM,157,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
+ 158, 14,159,160, 34,161,162, 36, 32, 18, 40,163,164, 17,165, 39, /* CX */
+ 166, 37,167, 21,168,169, 33,SYM, 38,170, 20,171, 31,172,173,174, /* DX */
+ 175, 14,176,177, 34,178,179, 36, 32, 18, 40,180,181, 17,182, 39, /* EX */
+ 183, 37,184, 21,185,186, 33,SYM, 38,187, 20,188, 31,189,190,191, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
+static const int Unicode_Char_size = 62;
+static const unsigned int Unicode_CharOrder[] =
+{
+ 65, 0, 66, 16, 67, 8, 68, 11, 69, 5, 70, 19, 71, 12, 72, 3,
+ 73, 1, 74, 27, 75, 25, 76, 9, 77, 13, 78, 2, 79, 10, 80, 22,
+ 81, 30, 82, 4, 83, 6, 84, 7, 85, 15, 86, 24, 87, 26, 88, 29,
+ 89, 23, 90, 28, 97, 0, 98, 16, 99, 8, 100, 11, 101, 5,102, 19,
+ 103, 12, 104, 3, 105, 1, 106, 27, 107, 25, 108, 9, 109, 13,110, 2,
+ 111, 10, 112, 22, 113, 30, 114, 4, 115, 6, 116, 7, 117, 15,118, 24,
+ 119, 26, 120, 29, 121, 23, 122, 28, 193, 14, 201, 18, 205, 17,211, 21,
+ 218, 20, 225, 14, 233, 18, 237, 17, 243, 21, 250, 20,
+};
+
/* Model Table:
- * Total sequences: 701
- * First 512 sequences: 0.9974076651249096
- * Next 512 sequences (512-1024): 0.0025923348750903907
- * Rest: -2.7755575615628914e-17
+ * Total sequences: 707
+ * First 512 sequences: 0.9976732191628278
+ * Next 512 sequences (512-1024): 0.0023267808371722288
+ * Rest: -3.5561831257524545e-17
* Negative sequences: TODO
*/
static const PRUint8 IrishLangModel[] =
{
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,0,3,0,3,0,3,3,3,3,2,3,3,2,
- 3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,0,2,3,3,3,3,3,3,3,0,3,3,3,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,0,2,3,0,2,
- 3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,2,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,3,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,0,3,3,3,3,3,3,2,3,3,0,
- 3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,3,3,3,3,3,2,3,3,3,0,3,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,0,2,3,3,3,3,3,3,2,3,0,3,3,3,3,2,2,3,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,2,0,3,3,3,2,3,3,3,3,2,3,0,3,3,2,0,3,0,2,
- 3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,3,0,0,
- 2,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,0,3,3,3,0,3,3,3,3,3,2,3,2,
- 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,2,3,3,3,3,0,3,2,3,2,3,2,2,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,3,3,2,3,0,3,0,2,0,2,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,2,0,3,3,3,3,3,3,2,3,3,3,0,3,0,0,0,2,2,0,
- 0,3,3,0,3,2,3,3,3,3,0,3,3,3,0,0,3,3,0,3,0,3,0,2,0,0,0,0,2,0,0,
- 3,3,3,2,3,3,3,3,3,3,2,3,3,3,3,0,3,3,2,2,0,3,0,2,2,2,0,2,3,2,0,
- 3,3,3,3,3,3,3,2,2,3,3,2,0,0,3,3,3,3,3,2,3,3,3,0,2,0,0,2,0,0,0,
- 2,0,3,0,3,0,3,3,3,3,3,3,3,2,0,0,3,0,0,0,3,0,0,2,0,0,0,0,0,0,0,
- 3,3,3,0,2,2,3,3,0,2,3,2,0,2,0,0,2,0,0,2,2,2,0,2,0,0,0,0,0,0,0,
- 3,3,0,3,3,3,2,3,2,3,3,0,3,2,3,3,2,3,3,3,0,0,3,2,2,0,0,0,0,0,0,
- 2,3,3,0,3,0,3,3,3,3,0,3,2,2,0,0,3,0,0,0,0,3,0,0,0,0,0,0,0,0,0,
- 3,3,0,3,3,3,3,3,2,3,3,0,0,2,3,3,0,3,3,0,2,3,3,0,2,0,0,0,0,0,0,
- 0,3,3,0,3,0,3,3,3,3,0,3,3,3,0,0,3,0,0,2,3,3,0,2,0,0,0,0,2,0,0,
- 3,3,2,0,3,3,3,2,0,2,3,0,2,0,3,2,0,3,3,0,0,0,3,2,2,0,0,0,0,0,0,
- 3,0,3,0,2,3,3,2,3,3,3,2,0,3,0,3,2,0,0,2,0,0,0,0,2,0,3,0,0,0,0,
- 3,3,3,3,3,3,3,0,0,3,3,0,0,2,2,3,2,0,2,0,0,2,0,2,3,2,2,0,0,0,0,
- 3,3,3,3,3,3,3,2,0,2,3,2,0,2,0,2,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,
- 3,3,2,0,2,3,0,0,0,0,3,0,0,0,0,3,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0,
- 3,3,2,3,0,3,2,0,0,0,3,2,2,2,0,2,2,0,0,0,0,0,0,0,2,0,0,0,2,0,2,
- 3,3,0,0,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,3,0,3,0,2,2,0,0,0,0,0,0,
- 2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,0,3,0,0,3,3,3,3,3,2,3,3,2,
+ 3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,0,2,3,3,3,3,3,3,3,0,2,3,3,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,0,2,3,0,2,
+ 3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,0,0,0,0,
+ 3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,3,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,3,0,3,3,3,3,3,3,2,3,3,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,3,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,0,2,3,3,3,3,3,3,2,3,3,2,3,3,3,3,2,3,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,3,3,3,3,3,2,3,0,3,2,0,3,2,2,
+ 3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,3,0,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,0,3,3,0,3,3,3,3,3,3,2,3,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,0,3,2,0,3,0,2,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,3,3,2,3,3,0,2,0,2,2,0,0,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,2,0,3,3,3,3,3,3,2,2,3,3,3,0,0,2,0,2,0,0,
+ 0,3,3,0,3,2,3,3,3,3,0,3,3,3,0,0,3,3,0,3,0,0,3,0,2,0,0,0,2,0,0,
+ 3,3,3,2,3,3,3,3,3,3,2,3,3,3,3,2,3,3,0,2,0,0,3,2,2,2,0,0,3,2,0,
+ 3,3,3,3,3,3,3,2,2,3,3,2,0,0,3,3,3,3,3,2,3,3,3,3,0,0,0,2,0,0,0,
+ 2,0,3,0,3,0,3,3,3,3,3,3,3,2,0,0,3,0,0,2,3,2,2,0,2,0,0,0,0,0,0,
+ 3,3,2,0,2,2,2,3,0,2,2,2,0,2,0,0,2,0,0,0,2,0,2,0,2,0,0,0,0,0,0,
+ 3,3,0,3,3,3,2,3,2,3,3,0,3,2,3,3,2,3,3,3,0,3,0,2,0,0,0,0,0,0,0,
+ 0,3,3,0,3,0,3,3,3,3,0,3,3,2,0,0,3,0,0,0,0,0,3,0,0,0,0,0,0,0,0,
+ 0,3,3,0,3,0,3,3,3,3,0,3,3,3,0,0,3,0,0,2,3,0,3,0,2,0,0,0,2,0,0,
+ 3,3,2,3,3,3,3,3,3,3,3,2,0,2,3,3,0,3,3,0,2,3,3,2,0,0,0,2,0,2,0,
+ 3,2,3,0,2,3,3,2,3,3,3,2,0,3,0,3,2,0,0,2,0,0,0,2,0,0,3,0,0,0,0,
+ 3,3,2,0,3,3,3,2,0,2,3,0,2,0,3,2,0,3,3,0,0,3,0,2,2,0,0,0,0,0,0,
+ 3,3,3,3,2,3,3,0,0,3,3,0,0,3,2,3,2,0,2,0,0,0,2,3,2,2,2,0,0,0,0,
+ 3,3,3,3,2,3,3,2,0,3,3,2,0,0,0,2,0,0,0,0,0,0,2,2,0,2,2,0,0,0,0,
+ 3,3,2,0,0,3,0,0,0,0,3,0,0,0,0,3,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,
+ 3,3,2,3,0,3,2,2,0,0,3,2,2,2,0,3,0,0,0,0,0,2,0,2,0,0,0,0,2,0,2,
+ 3,3,0,0,0,2,0,2,2,2,3,0,0,0,0,0,0,0,0,3,0,0,2,2,2,0,0,0,0,2,0,
+ 3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
};
+const SequenceModel Iso_8859_15IrishModel =
+{
+ Iso_8859_15_CharToOrderMap,
+ IrishLangModel,
+ 31,
+ (float)0.9976732191628278,
+ PR_TRUE,
+ "ISO-8859-15",
+ "ga"
+};
+
const SequenceModel Iso_8859_1IrishModel =
{
Iso_8859_1_CharToOrderMap,
IrishLangModel,
31,
- (float)0.9974076651249096,
+ (float)0.9976732191628278,
PR_TRUE,
"ISO-8859-1",
"ga"
};
-const SequenceModel Windows_1252IrishModel =
+const SequenceModel Iso_8859_9IrishModel =
{
- Windows_1252_CharToOrderMap,
+ Iso_8859_9_CharToOrderMap,
IrishLangModel,
31,
- (float)0.9974076651249096,
+ (float)0.9976732191628278,
PR_TRUE,
- "WINDOWS-1252",
+ "ISO-8859-9",
"ga"
};
-const SequenceModel Iso_8859_15IrishModel =
+const SequenceModel Windows_1252IrishModel =
{
- Iso_8859_15_CharToOrderMap,
+ Windows_1252_CharToOrderMap,
IrishLangModel,
31,
- (float)0.9974076651249096,
+ (float)0.9976732191628278,
PR_TRUE,
- "ISO-8859-15",
+ "WINDOWS-1252",
"ga"
};
-const SequenceModel Iso_8859_9IrishModel =
+const LanguageModel IrishModel =
{
- Iso_8859_9_CharToOrderMap,
+ "ga",
+ Unicode_CharOrder,
+ 62,
IrishLangModel,
31,
- (float)0.9974076651249096,
- PR_TRUE,
- "ISO-8859-9",
- "ga"
+ (float)0.9976732191628278,
};
diff --git a/src/LangModels/LangLatvianModel.cpp b/src/LangModels/LangLatvianModel.cpp
index fcccc82..3b47d21 100644
--- a/src/LangModels/LangLatvianModel.cpp
+++ b/src/LangModels/LangLatvianModel.cpp
@@ -36,12 +36,13 @@
* ***** END LICENSE BLOCK ***** */
#include "../nsSBCharSetProber.h"
+#include "../nsLanguageDetector.h"
/********* Language model for: Latvian *********/
/**
* Generated by BuildLangModel.py
- * On: 2016-09-21 00:19:18.362275
+ * On: 2021-03-16 19:30:28.293047
**/
/* Character Mapping Table:
@@ -67,18 +68,18 @@ static const unsigned char Iso_8859_4_CharToOrderMap[] =
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 0, 17, 22, 13, 3, 25, 19, 28, 1, 16, 11, 9, 12, 7, 10, /* 4X */
- 15, 38, 4, 2, 5, 6, 14, 33, 35, 34, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 0, 17, 22, 13, 3, 25, 19, 28, 1, 16, 11, 9, 12, 7, 10, /* 6X */
- 15, 38, 4, 2, 5, 6, 14, 33, 35, 34, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ SYM, 0, 17, 22, 13, 3, 25, 19, 28, 1, 15, 11, 9, 12, 7, 10, /* 4X */
+ 16, 39, 5, 2, 4, 6, 14, 34, 35, 33, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 0, 17, 22, 13, 3, 25, 19, 28, 1, 15, 11, 9, 12, 7, 10, /* 6X */
+ 16, 39, 5, 2, 4, 6, 14, 34, 35, 33, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
SYM, 55, 56, 57,SYM, 58, 26,SYM,SYM, 23, 21, 31, 59,SYM, 29,SYM, /* AX */
- SYM, 60,SYM, 61,SYM, 62, 26,SYM,SYM, 23, 21, 31, 63, 48, 29, 48, /* BX */
- 8, 42, 64, 65, 40, 52, 53, 66, 32, 37, 67, 43, 46, 45, 49, 18, /* CX */
- 68, 24, 51, 30, 69, 70, 36,SYM, 71, 72, 73, 74, 39, 75, 27, 44, /* DX */
- 8, 42, 76, 77, 40, 52, 53, 78, 32, 37, 79, 43, 46, 45, 49, 18, /* EX */
- 80, 24, 51, 30, 81, 82, 36,SYM, 83, 84, 85, 86, 39, 87, 27,SYM, /* FX */
+ SYM, 60,SYM, 61,SYM, 62, 26,SYM,SYM, 23, 21, 31, 63, 49, 29, 49, /* BX */
+ 8, 40, 64, 65, 41, 54, 42, 66, 32, 36, 67, 43, 46, 47, 44, 18, /* CX */
+ 68, 24, 53, 30, 69, 70, 37,SYM, 71, 72, 73, 74, 38, 75, 27, 48, /* DX */
+ 8, 40, 76, 77, 41, 54, 42, 78, 32, 36, 79, 43, 46, 47, 44, 18, /* EX */
+ 80, 24, 53, 30, 81, 82, 37,SYM, 83, 84, 85, 86, 38, 87, 27,SYM, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
@@ -88,18 +89,18 @@ static const unsigned char Iso_8859_10_CharToOrderMap[] =
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 0, 17, 22, 13, 3, 25, 19, 28, 1, 16, 11, 9, 12, 7, 10, /* 4X */
- 15, 38, 4, 2, 5, 6, 14, 33, 35, 34, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 0, 17, 22, 13, 3, 25, 19, 28, 1, 16, 11, 9, 12, 7, 10, /* 6X */
- 15, 38, 4, 2, 5, 6, 14, 33, 35, 34, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ SYM, 0, 17, 22, 13, 3, 25, 19, 28, 1, 15, 11, 9, 12, 7, 10, /* 4X */
+ 16, 39, 5, 2, 4, 6, 14, 34, 35, 33, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 0, 17, 22, 13, 3, 25, 19, 28, 1, 15, 11, 9, 12, 7, 10, /* 6X */
+ 16, 39, 5, 2, 4, 6, 14, 34, 35, 33, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
- SYM, 88, 21, 31, 18, 89, 30,SYM, 26, 90, 23, 91, 29,SYM, 27, 48, /* AX */
- SYM, 92, 21, 31, 18, 93, 30,SYM, 26, 94, 23, 95, 29, 96, 27, 48, /* BX */
- 8, 42, 97, 98, 40, 52, 53, 99, 32, 37,100, 43, 46, 45, 49,101, /* CX */
- 50, 24, 51, 47,102,103, 36,104,105,106,107,108, 39,109, 54, 44, /* DX */
- 8, 42,110,111, 40, 52, 53,112, 32, 37,113, 43, 46, 45, 49,114, /* EX */
- 50, 24, 51, 47,115,116, 36,117,118,119,120,121, 39,122, 54,123, /* FX */
+ SYM, 88, 21, 31, 18, 89, 30,SYM, 26, 90, 23, 91, 29,SYM, 27, 49, /* AX */
+ SYM, 92, 21, 31, 18, 93, 30,SYM, 26, 94, 23, 95, 29, 96, 27, 49, /* BX */
+ 8, 40, 97, 98, 41, 54, 42, 99, 32, 36,100, 43, 46, 47, 44,101, /* CX */
+ 52, 24, 53, 45,102,103, 37,104,105,106,107,108, 38,109, 51, 48, /* DX */
+ 8, 40,110,111, 41, 54, 42,112, 32, 36,113, 43, 46, 47, 44,114, /* EX */
+ 52, 24, 53, 45,115,116, 37,117,118,119,120,121, 38,122, 51,123, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
@@ -109,70 +110,86 @@ static const unsigned char Iso_8859_13_CharToOrderMap[] =
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 0, 17, 22, 13, 3, 25, 19, 28, 1, 16, 11, 9, 12, 7, 10, /* 4X */
- 15, 38, 4, 2, 5, 6, 14, 33, 35, 34, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 0, 17, 22, 13, 3, 25, 19, 28, 1, 16, 11, 9, 12, 7, 10, /* 6X */
- 15, 38, 4, 2, 5, 6, 14, 33, 35, 34, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ SYM, 0, 17, 22, 13, 3, 25, 19, 28, 1, 15, 11, 9, 12, 7, 10, /* 4X */
+ 16, 39, 5, 2, 4, 6, 14, 34, 35, 33, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 0, 17, 22, 13, 3, 25, 19, 28, 1, 15, 11, 9, 12, 7, 10, /* 6X */
+ 16, 39, 5, 2, 4, 6, 14, 34, 35, 33, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
- SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,124,SYM,125,SYM,SYM,SYM,SYM, 53, /* AX */
- SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,126,SYM,127,SYM,SYM,SYM,SYM, 53, /* BX */
- 128,129, 8,130, 40, 52,131, 21, 32, 37,132, 46, 31, 30, 18, 26, /* CX */
- 23,133, 24, 47, 51,134, 36,SYM,135, 41,136, 27, 39,137, 29, 44, /* DX */
- 138,139, 8,140, 40, 52,141, 21, 32, 37,142, 46, 31, 30, 18, 26, /* EX */
- 23,143, 24, 47, 51,144, 36,SYM,145, 41,146, 27, 39,147, 29,SYM, /* FX */
+ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,124,SYM,125,SYM,SYM,SYM,SYM, 42, /* AX */
+ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,126,SYM,127,SYM,SYM,SYM,SYM, 42, /* BX */
+ 128,129, 8,130, 41, 54,131, 21, 32, 36,132, 46, 31, 30, 18, 26, /* CX */
+ 23,133, 24, 45, 53,134, 37,SYM,135, 50,136, 27, 38,137, 29, 48, /* DX */
+ 138,139, 8,140, 41, 54,141, 21, 32, 36,142, 46, 31, 30, 18, 26, /* EX */
+ 23,143, 24, 45, 53,144, 37,SYM,145, 50,146, 27, 38,147, 29,SYM, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
+static const int Unicode_Char_size = 80;
+static const unsigned int Unicode_CharOrder[] =
+{
+ 65, 0, 66, 17, 67, 22, 68, 13, 69, 3, 70, 25, 71, 19, 72, 28,
+ 73, 1, 74, 15, 75, 11, 76, 9, 77, 12, 78, 7, 79, 10, 80, 16,
+ 81, 39, 82, 5, 83, 2, 84, 4, 85, 6, 86, 14, 87, 34, 88, 35,
+ 89, 33, 90, 20, 97, 0, 98, 17, 99, 22, 100, 13, 101, 3,102, 25,
+ 103, 19, 104, 28, 105, 1, 106, 15, 107, 11, 108, 9, 109, 12,110, 7,
+ 111, 10, 112, 16, 113, 39, 114, 5, 115, 2, 116, 4, 117, 6,118, 14,
+ 119, 34, 120, 35, 121, 33, 122, 20, 201, 36, 214, 37, 220, 38,233, 36,
+ 246, 37, 252, 38, 256, 8, 257, 8, 268, 32, 269, 32, 274, 21,275, 21,
+ 290, 31, 291, 31, 298, 18, 299, 18, 310, 30, 311, 30, 315, 26,316, 26,
+ 325, 24, 326, 24, 352, 23, 353, 23, 362, 27, 363, 27, 381, 29,382, 29,
+};
+
/* Model Table:
- * Total sequences: 970
- * First 512 sequences: 0.9904102202220861
- * Next 512 sequences (512-1024): 0.009589779777913882
- * Rest: -1.734723475976807e-17
+ * Total sequences: 982
+ * First 512 sequences: 0.9904642991017133
+ * Next 512 sequences (512-1024): 0.009535700898286757
+ * Rest: -5.377642775528102e-17
* Negative sequences: TODO
*/
static const PRUint8 LatvianLangModel[] =
{
- 2,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,3,0,3,3,2,2,3,2,2,2,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,2,3,3,3,3,3,2,3,3,3,2,3,0,0,2,0,2,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,3,3,0,2,2,2,3,2,2,0,0,0,2,2,0,2,2,2,
- 3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,3,0,3,3,2,3,2,2,2,2,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,3,3,2,3,2,2,2,2,0,2,2,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,3,3,0,0,2,0,2,2,0,0,0,0,
- 3,3,3,2,3,3,2,3,3,3,2,3,3,3,3,3,3,3,2,3,3,2,3,3,3,2,3,0,2,2,2,2,2,0,2,0,0,2,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,3,3,3,2,3,3,3,0,3,0,2,2,2,0,0,3,0,2,0,0,0,2,
- 2,2,3,2,3,3,2,3,0,3,0,3,3,3,3,3,3,3,0,2,3,0,3,3,3,3,3,0,0,2,0,2,2,0,0,0,0,0,0,
- 3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,3,3,2,0,2,2,0,2,2,0,2,0,
- 3,2,3,2,3,3,3,3,2,3,2,3,3,3,3,3,3,3,0,3,3,2,3,3,3,3,3,0,2,3,2,3,2,2,2,2,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,2,2,2,2,2,2,3,3,2,0,3,2,2,0,0,0,0,0,2,0,2,0,0,
- 3,3,3,3,0,3,3,3,3,2,3,3,2,2,2,3,3,3,3,2,0,3,2,2,0,2,0,3,0,0,0,2,0,0,2,2,0,2,0,
- 3,3,3,3,3,2,3,3,3,2,3,2,3,2,3,2,2,2,3,2,3,3,2,2,2,0,0,2,0,3,0,0,0,2,2,0,0,2,0,
- 3,3,3,3,2,2,3,2,3,2,3,2,2,2,2,3,3,2,3,2,2,3,2,0,2,0,0,2,0,0,0,0,0,0,2,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,2,3,2,3,2,2,2,2,2,0,0,2,0,0,0,0,0,0,0,
- 3,3,3,3,2,0,3,3,3,2,3,2,2,2,2,2,0,0,2,2,0,3,2,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,0,3,2,3,3,3,2,2,2,2,2,2,2,3,0,0,3,2,2,0,0,2,3,2,0,0,0,2,0,2,0,2,0,0,
- 0,0,3,0,3,3,0,3,0,3,0,3,3,3,3,3,3,3,0,3,3,0,3,3,3,2,2,0,0,2,2,0,2,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,2,3,3,3,2,2,2,2,0,0,0,0,2,2,2,0,3,0,2,3,3,2,2,0,0,0,0,2,0,0,2,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,0,0,2,2,0,0,0,0,2,2,0,0,0,0,
- 2,0,3,0,3,3,0,3,0,3,0,3,3,3,3,2,3,2,0,3,3,0,3,3,2,2,3,0,0,2,2,3,0,0,0,0,0,0,0,
- 3,3,3,3,2,2,3,2,3,2,3,3,2,2,2,2,0,2,3,0,2,3,2,2,0,0,0,2,3,0,0,2,0,0,2,0,0,0,0,
- 3,3,3,3,2,3,3,3,3,3,3,2,2,2,3,2,2,2,3,2,2,2,0,0,2,0,2,2,0,0,3,0,0,0,0,0,0,0,0,
- 3,3,2,3,0,0,3,2,3,0,3,0,2,2,2,2,2,2,0,2,0,3,2,3,0,0,0,2,0,0,3,2,0,0,0,0,0,0,0,
- 3,3,3,3,3,2,3,2,2,3,3,2,2,0,0,0,0,0,2,2,0,2,0,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,2,2,2,0,3,2,3,2,3,2,2,0,2,2,2,0,2,2,0,2,0,2,2,0,2,2,0,0,2,3,0,0,0,0,0,0,0,
- 0,2,3,0,3,3,0,3,0,3,2,3,2,3,3,3,2,0,0,2,3,0,3,2,0,2,0,0,2,2,0,0,0,0,0,0,0,0,0,
- 3,3,2,3,2,2,2,3,2,2,3,2,2,2,0,0,2,0,2,0,0,2,0,0,0,0,0,2,2,0,0,0,0,2,2,0,2,0,0,
- 3,3,2,3,2,0,3,2,3,2,3,2,2,0,2,0,0,0,2,0,2,2,0,0,2,0,0,2,0,0,2,2,0,0,0,0,0,0,0,
- 3,3,2,3,0,2,3,0,2,0,2,0,0,0,0,0,0,0,3,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 2,3,0,3,0,0,2,0,0,0,2,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,2,2,3,0,0,3,2,2,0,2,2,2,0,0,2,0,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,
- 2,2,2,2,0,2,0,0,0,2,2,0,2,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,3,0,0,0,0,0,
- 2,0,2,2,2,0,0,2,0,2,2,0,2,2,0,0,0,0,0,2,0,0,2,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,
- 2,2,0,0,0,0,2,0,0,0,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,
- 0,0,2,0,0,2,0,2,0,2,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,
- 0,0,2,0,2,2,0,2,0,0,2,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 2,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,3,0,3,3,2,2,3,2,2,2,0,0,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,2,3,3,3,3,3,2,3,3,3,2,3,0,0,2,2,0,0,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,2,3,2,0,2,2,2,3,2,2,0,0,0,2,2,0,0,0,2,2,
+ 3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,3,0,3,3,2,3,2,2,2,2,0,0,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,3,3,3,0,0,2,0,2,2,0,0,0,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,2,2,3,3,2,3,2,2,2,2,0,2,2,2,2,
+ 3,3,3,2,3,3,2,3,3,3,2,3,3,3,3,3,3,3,2,3,3,2,3,3,3,0,3,0,2,2,2,2,3,2,0,0,2,0,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,3,3,3,2,3,3,3,0,3,0,2,2,2,0,0,3,2,0,0,2,0,0,2,
+ 2,2,3,2,3,3,2,3,0,3,0,3,3,3,3,3,3,3,0,2,3,0,3,3,3,3,3,0,0,2,0,2,2,0,0,0,0,0,0,0,
+ 3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,0,3,2,2,0,2,2,2,2,2,2,0,0,0,
+ 3,2,3,2,3,3,3,3,2,3,2,3,3,3,3,3,3,3,0,3,3,2,3,3,3,3,3,0,2,3,2,3,2,2,2,2,0,0,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,2,2,2,2,2,2,3,3,2,0,3,2,2,0,0,0,0,2,0,0,2,2,2,0,
+ 3,3,3,3,3,2,3,3,3,2,3,3,2,3,2,3,3,3,3,2,0,3,2,2,0,2,0,3,0,0,0,2,0,2,0,2,2,0,2,0,
+ 3,3,3,3,2,3,3,3,3,2,3,2,3,0,3,2,2,2,3,2,3,3,2,2,2,0,0,3,0,3,0,0,0,0,2,0,2,0,2,0,
+ 3,3,3,3,2,2,3,2,3,2,3,2,2,2,2,3,3,2,3,2,2,3,2,0,2,2,0,2,0,0,0,0,0,2,0,0,0,0,0,0,
+ 3,3,3,3,0,2,3,3,3,2,3,2,2,2,2,0,2,0,2,2,0,3,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,2,3,2,3,2,2,2,2,2,0,2,2,0,0,0,0,2,0,0,0,
+ 3,3,3,3,2,3,3,2,3,3,3,2,2,2,2,2,2,2,3,0,2,3,2,2,0,0,2,3,2,0,0,0,2,2,0,0,0,2,2,0,
+ 0,0,3,0,3,3,0,3,0,3,0,3,3,3,3,3,3,3,0,3,3,0,3,3,3,2,2,0,0,2,2,0,2,0,0,0,0,0,0,0,
+ 3,3,3,3,3,3,3,2,3,3,3,2,2,2,2,0,2,0,0,2,2,2,0,3,0,2,3,3,2,2,0,0,0,2,0,0,2,0,2,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,3,2,3,0,0,2,2,0,0,0,0,2,2,0,0,0,0,0,
+ 2,0,3,0,3,3,2,3,0,3,0,3,3,3,3,3,2,2,0,3,2,0,3,3,2,2,3,0,0,2,2,3,0,0,0,0,0,0,0,0,
+ 3,3,3,3,2,2,3,2,3,2,3,3,2,2,2,0,2,2,3,0,2,3,2,2,0,0,0,2,3,0,0,2,0,2,0,0,0,0,0,0,
+ 3,3,3,3,3,2,3,3,3,3,3,2,2,2,3,2,2,2,3,2,2,3,0,0,2,0,2,2,0,0,3,0,2,0,0,0,0,0,0,0,
+ 3,3,2,3,0,0,3,2,3,0,3,0,2,2,2,2,2,2,0,2,0,3,2,3,0,0,0,2,0,0,2,2,0,0,0,0,0,0,0,0,
+ 3,3,3,3,2,3,3,2,2,3,3,2,0,0,0,0,0,0,2,2,0,2,0,2,0,2,0,2,0,0,0,0,0,0,0,0,2,0,2,0,
+ 3,3,2,3,0,2,3,2,3,2,3,2,2,2,2,2,2,0,2,0,0,2,2,2,2,0,2,2,0,0,2,3,0,0,0,0,0,0,0,0,
+ 0,2,3,0,3,3,0,3,0,3,2,3,2,3,3,2,3,0,0,2,3,0,3,2,0,2,0,0,2,2,0,0,0,0,0,0,0,0,0,0,
+ 3,3,2,3,2,2,2,3,2,2,3,2,2,0,0,2,0,0,2,0,0,2,0,0,0,0,0,2,2,0,0,0,0,2,2,0,0,0,0,0,
+ 3,3,2,3,0,2,3,2,3,2,3,2,2,0,2,0,0,0,2,0,2,2,0,0,2,0,0,2,0,0,2,2,0,0,0,0,0,0,0,0,
+ 3,3,2,3,2,0,2,0,2,0,2,0,0,0,0,0,0,0,3,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 2,3,0,3,0,0,2,0,0,0,2,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,2,2,3,0,0,3,2,2,0,2,2,2,0,0,0,2,0,2,0,0,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,
+ 2,2,2,2,0,2,2,2,0,2,2,2,2,2,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,
+ 2,2,2,2,2,0,0,0,0,2,2,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,
+ 2,2,0,0,0,0,2,0,0,0,2,0,0,0,2,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,
+ 0,0,2,0,2,2,0,2,0,2,2,0,0,2,0,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,2,0,0,0,0,2,0,2,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,2,0,0,2,0,2,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
};
@@ -180,8 +197,8 @@ const SequenceModel Iso_8859_4LatvianModel =
{
Iso_8859_4_CharToOrderMap,
LatvianLangModel,
- 39,
- (float)0.9904102202220861,
+ 40,
+ (float)0.9904642991017133,
PR_TRUE,
"ISO-8859-4",
"lv"
@@ -191,8 +208,8 @@ const SequenceModel Iso_8859_10LatvianModel =
{
Iso_8859_10_CharToOrderMap,
LatvianLangModel,
- 39,
- (float)0.9904102202220861,
+ 40,
+ (float)0.9904642991017133,
PR_TRUE,
"ISO-8859-10",
"lv"
@@ -202,9 +219,19 @@ const SequenceModel Iso_8859_13LatvianModel =
{
Iso_8859_13_CharToOrderMap,
LatvianLangModel,
- 39,
- (float)0.9904102202220861,
+ 40,
+ (float)0.9904642991017133,
PR_TRUE,
"ISO-8859-13",
"lv"
};
+
+const LanguageModel LatvianModel =
+{
+ "lv",
+ Unicode_CharOrder,
+ 80,
+ LatvianLangModel,
+ 40,
+ (float)0.9904642991017133,
+};
diff --git a/src/LangModels/LangLithuanianModel.cpp b/src/LangModels/LangLithuanianModel.cpp
index 686014a..d2fa554 100644
--- a/src/LangModels/LangLithuanianModel.cpp
+++ b/src/LangModels/LangLithuanianModel.cpp
@@ -36,12 +36,13 @@
* ***** END LICENSE BLOCK ***** */
#include "../nsSBCharSetProber.h"
+#include "../nsLanguageDetector.h"
/********* Language model for: Lithuanian *********/
/**
* Generated by BuildLangModel.py
- * On: 2016-09-21 00:25:34.775158
+ * On: 2021-03-16 19:26:36.950339
**/
/* Character Mapping Table:
@@ -61,45 +62,45 @@
* ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1
* even though they are both used for French. Same for the euro sign.
*/
-static const unsigned char Iso_8859_10_CharToOrderMap[] =
+static const unsigned char Iso_8859_4_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 1, 18, 23, 12, 4, 25, 16, 26, 0, 14, 9, 10, 11, 6, 3, /* 4X */
- 15, 37, 5, 2, 7, 8, 13, 33, 32, 19, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 1, 18, 23, 12, 4, 25, 16, 26, 0, 14, 9, 10, 11, 6, 3, /* 6X */
- 15, 37, 5, 2, 7, 8, 13, 33, 32, 19, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ SYM, 1, 18, 23, 12, 4, 25, 16, 29, 0, 14, 9, 10, 11, 7, 3, /* 4X */
+ 15, 39, 5, 2, 6, 8, 13, 33, 32, 19, 28,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 1, 18, 23, 12, 4, 25, 16, 29, 0, 14, 9, 10, 11, 7, 3, /* 6X */
+ 15, 39, 5, 2, 6, 8, 13, 33, 32, 19, 28,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
- SYM, 29, 50, 60, 47, 61, 62,SYM, 56, 55, 21, 63, 22,SYM, 28, 64, /* AX */
- SYM, 29, 50, 65, 47, 66, 67,SYM, 56, 55, 21, 68, 22, 69, 28, 70, /* BX */
- 41, 39, 71, 53, 38, 43, 72, 30, 24, 36, 31, 73, 17, 40, 74, 46, /* CX */
- 75, 57, 34, 44, 59, 76, 35, 77, 48, 20, 54, 78, 45, 79, 80, 52, /* DX */
- 41, 39, 81, 53, 38, 43, 82, 30, 24, 36, 31, 83, 17, 40, 84, 46, /* EX */
- 85, 57, 34, 44, 59, 86, 35, 87, 48, 20, 54, 88, 45, 89, 90, 91, /* FX */
+ SYM, 27, 68, 69,SYM, 40, 60,SYM,SYM, 21, 56, 70, 71,SYM, 22,SYM, /* AX */
+ SYM, 27,SYM, 72,SYM, 40, 60,SYM,SYM, 21, 56, 73, 74, 67, 22, 67, /* BX */
+ 45, 35, 75, 37, 41, 49, 54, 30, 24, 36, 31, 76, 17, 43, 77, 50, /* CX */
+ 63, 61, 42, 78, 62, 46, 38,SYM, 55, 20, 52, 79, 51, 44, 26, 59, /* DX */
+ 45, 35, 80, 37, 41, 49, 54, 30, 24, 36, 31, 81, 17, 43, 82, 50, /* EX */
+ 63, 61, 42, 83, 62, 46, 38,SYM, 55, 20, 52, 84, 51, 44, 26,SYM, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
-static const unsigned char Iso_8859_4_CharToOrderMap[] =
+static const unsigned char Iso_8859_10_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 1, 18, 23, 12, 4, 25, 16, 26, 0, 14, 9, 10, 11, 6, 3, /* 4X */
- 15, 37, 5, 2, 7, 8, 13, 33, 32, 19, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 1, 18, 23, 12, 4, 25, 16, 26, 0, 14, 9, 10, 11, 6, 3, /* 6X */
- 15, 37, 5, 2, 7, 8, 13, 33, 32, 19, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ SYM, 1, 18, 23, 12, 4, 25, 16, 29, 0, 14, 9, 10, 11, 7, 3, /* 4X */
+ 15, 39, 5, 2, 6, 8, 13, 33, 32, 19, 28,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 1, 18, 23, 12, 4, 25, 16, 29, 0, 14, 9, 10, 11, 7, 3, /* 6X */
+ 15, 39, 5, 2, 6, 8, 13, 33, 32, 19, 28,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
- SYM, 29, 92, 93,SYM, 94, 56,SYM,SYM, 21, 50, 95, 96,SYM, 22,SYM, /* AX */
- SYM, 29,SYM, 97,SYM, 98, 56,SYM,SYM, 21, 50, 99,100,101, 22,102, /* BX */
- 41, 39,103, 53, 38, 43,104, 30, 24, 36, 31,105, 17, 40,106, 47, /* CX */
- 55, 57, 34,107, 59,108, 35,SYM, 48, 20, 54,109, 45,110, 28, 52, /* DX */
- 41, 39,111, 53, 38, 43,112, 30, 24, 36, 31,113, 17, 40,114, 47, /* EX */
- 55, 57, 34,115, 59,116, 35,SYM, 48, 20, 54,117, 45,118, 28,SYM, /* FX */
+ SYM, 27, 56, 85, 50, 40, 86,SYM, 60, 63, 21, 87, 22,SYM, 26, 67, /* AX */
+ SYM, 27, 56, 88, 50, 40, 89,SYM, 60, 63, 21, 90, 22, 91, 26, 67, /* BX */
+ 45, 35, 92, 37, 41, 49, 54, 30, 24, 36, 31, 93, 17, 43, 94, 58, /* CX */
+ 65, 61, 42, 34, 62, 46, 38, 44, 55, 20, 52, 95, 51, 48, 96, 59, /* DX */
+ 45, 35, 97, 37, 41, 49, 54, 30, 24, 36, 31, 98, 17, 43, 99, 58, /* EX */
+ 65, 61, 42, 34, 62, 46, 38, 44, 55, 20, 52,100, 51, 48,101,102, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
@@ -109,91 +110,108 @@ static const unsigned char Iso_8859_13_CharToOrderMap[] =
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 1, 18, 23, 12, 4, 25, 16, 26, 0, 14, 9, 10, 11, 6, 3, /* 4X */
- 15, 37, 5, 2, 7, 8, 13, 33, 32, 19, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 1, 18, 23, 12, 4, 25, 16, 26, 0, 14, 9, 10, 11, 6, 3, /* 6X */
- 15, 37, 5, 2, 7, 8, 13, 33, 32, 19, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ SYM, 1, 18, 23, 12, 4, 25, 16, 29, 0, 14, 9, 10, 11, 7, 3, /* 4X */
+ 15, 39, 5, 2, 6, 8, 13, 33, 32, 19, 28,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 1, 18, 23, 12, 4, 25, 16, 29, 0, 14, 9, 10, 11, 7, 3, /* 6X */
+ 15, 39, 5, 2, 6, 8, 13, 33, 32, 19, 28,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
- SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 48,SYM,119,SYM,SYM,SYM,SYM,120, /* AX */
- SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 48,SYM,121,SYM,SYM,SYM,SYM,122, /* BX */
- 29, 30, 41, 49, 38, 43, 31, 50, 24, 36,123, 17,124,125, 47, 56, /* CX */
- 21, 51, 57, 44, 34,126, 35,SYM, 20, 42, 58, 28, 45,127, 22, 52, /* DX */
- 29, 30, 41, 49, 38, 43, 31, 50, 24, 36,128, 17,129,130, 47, 56, /* EX */
- 21, 51, 57, 44, 34,131, 35,SYM, 20, 42, 58, 28, 45,132, 22,SYM, /* FX */
+ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 55,SYM,103,SYM,SYM,SYM,SYM, 54, /* AX */
+ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 55,SYM,104,SYM,SYM,SYM,SYM, 54, /* BX */
+ 27, 30, 45, 53, 41, 49, 31, 56, 24, 36,105, 17,106,107, 50, 60, /* CX */
+ 21, 57, 61, 34, 42, 46, 38,SYM, 20, 47, 64, 26, 51, 66, 22, 59, /* DX */
+ 27, 30, 45, 53, 41, 49, 31, 56, 24, 36,108, 17,109,110, 50, 60, /* EX */
+ 21, 57, 61, 34, 42, 46, 38,SYM, 20, 47, 64, 26, 51, 66, 22,SYM, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
+static const int Unicode_Char_size = 80;
+static const unsigned int Unicode_CharOrder[] =
+{
+ 65, 1, 66, 18, 67, 23, 68, 12, 69, 4, 70, 25, 71, 16, 72, 29,
+ 73, 0, 74, 14, 75, 9, 76, 10, 77, 11, 78, 7, 79, 3, 80, 15,
+ 81, 39, 82, 5, 83, 2, 84, 6, 85, 8, 86, 13, 87, 33, 88, 32,
+ 89, 19, 90, 28, 97, 1, 98, 18, 99, 23, 100, 12, 101, 4,102, 25,
+ 103, 16, 104, 29, 105, 0, 106, 14, 107, 9, 108, 10, 109, 11,110, 7,
+ 111, 3, 112, 15, 113, 39, 114, 5, 115, 2, 116, 6, 117, 8,118, 13,
+ 119, 33, 120, 32, 121, 19, 122, 28, 193, 35, 195, 37, 201, 36,211, 34,
+ 214, 38, 225, 35, 227, 37, 233, 36, 243, 34, 246, 38, 260, 27,261, 27,
+ 268, 24, 269, 24, 278, 17, 279, 17, 280, 31, 281, 31, 302, 30,303, 30,
+ 352, 21, 353, 21, 362, 26, 363, 26, 370, 20, 371, 20, 381, 22,382, 22,
+};
+
/* Model Table:
- * Total sequences: 1016
- * First 512 sequences: 0.9928710196247589
- * Next 512 sequences (512-1024): 0.0071289803752411715
- * Rest: -4.85722573273506e-17
+ * Total sequences: 1138
+ * First 512 sequences: 0.9919219576954762
+ * Next 512 sequences (512-1024): 0.007740222486946524
+ * Rest: 0.00033781981757727893
* Negative sequences: TODO
*/
static const PRUint8 LithuanianLangModel[] =
{
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,3,3,3,0,2,3,2,2,2,0,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,3,3,3,3,3,3,3,0,0,0,0,2,2,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,0,3,3,2,3,2,3,3,2,3,0,2,2,2,2,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,0,3,3,3,2,3,3,3,0,0,0,0,2,3,0,0,0,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,0,3,3,3,3,3,2,3,0,0,2,0,2,3,0,0,0,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,2,2,3,3,3,3,2,2,2,2,2,2,
- 3,3,3,3,3,3,3,3,3,3,2,2,3,3,2,2,3,3,3,3,3,2,3,3,3,3,3,3,2,3,3,3,2,2,2,0,2,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,2,2,3,3,3,3,2,0,2,0,2,3,2,3,3,3,3,0,2,2,2,2,0,
- 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,0,3,2,0,3,3,3,3,3,2,3,0,0,0,0,0,2,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,2,2,3,2,3,3,3,0,3,2,2,3,2,3,3,2,3,0,2,2,0,2,0,
- 3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,3,3,2,3,3,3,3,0,2,0,2,2,0,
- 3,3,3,3,3,2,2,3,3,2,2,3,2,2,2,3,2,3,3,3,3,2,3,2,0,2,0,2,3,3,0,3,0,2,2,2,2,0,
- 3,3,3,3,3,3,2,2,3,3,2,3,2,3,2,2,2,3,2,3,3,2,3,2,0,2,2,2,2,3,2,3,0,2,2,2,2,2,
- 3,3,3,3,3,2,2,2,3,2,3,0,2,0,2,2,0,3,0,3,3,2,0,2,0,0,0,3,2,3,0,3,0,0,0,0,0,0,
- 3,3,2,3,3,2,2,2,3,2,0,0,0,0,0,2,2,3,0,2,3,0,0,0,0,0,0,0,3,3,3,3,0,0,2,2,0,0,
- 3,3,3,3,3,3,2,3,3,3,3,2,2,3,3,2,2,3,0,3,2,3,2,2,2,2,3,0,2,2,2,2,0,0,2,0,2,0,
- 3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,2,3,2,3,3,2,2,2,0,0,3,3,3,3,2,2,0,2,2,2,0,0,
- 2,0,3,0,0,3,3,3,2,3,3,3,3,3,3,0,3,0,2,0,0,2,2,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,2,3,3,2,3,0,3,2,2,2,0,3,2,2,3,2,2,2,0,0,2,2,3,3,2,3,0,2,2,2,0,0,
- 2,3,3,2,2,3,3,3,2,3,3,3,3,3,3,3,3,0,3,2,0,2,2,2,3,2,0,3,2,0,0,0,0,0,2,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,2,2,3,0,3,2,3,2,3,2,2,2,2,3,2,0,0,2,2,2,2,0,0,2,0,0,0,
- 3,3,3,3,3,2,3,3,3,2,2,3,3,3,2,2,2,3,2,3,2,2,0,0,0,2,0,0,2,2,2,2,0,0,2,0,0,0,
- 3,3,2,3,3,2,0,2,3,3,3,2,2,2,0,0,2,2,2,2,0,0,0,2,0,2,3,2,3,2,0,0,0,0,0,0,2,2,
- 3,3,0,2,3,0,0,0,2,2,0,0,2,0,0,2,0,2,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,2,0,0,0,
- 3,3,2,3,3,3,0,2,3,2,3,2,0,0,2,0,2,2,2,2,2,0,0,2,0,2,0,0,2,2,0,0,0,0,0,2,0,0,
- 3,3,2,3,3,3,3,3,3,2,2,3,2,0,2,0,0,0,2,2,2,0,0,0,0,2,0,0,2,2,0,0,0,2,2,0,0,0,
- 3,3,2,3,3,2,2,2,3,2,3,3,3,2,0,2,2,2,2,3,3,0,0,2,0,0,2,2,2,2,0,2,0,2,2,0,2,0,
- 2,0,3,0,0,3,3,3,0,3,2,3,3,2,0,2,3,0,2,0,0,2,2,0,3,0,0,3,0,0,0,0,0,0,0,0,0,0,
- 0,0,3,0,0,2,0,0,0,2,2,2,0,2,3,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,2,3,0,0,3,0,3,0,3,3,2,2,3,2,3,3,2,0,0,0,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,2,0,0,2,2,0,2,2,0,0,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,
- 3,3,2,2,3,2,2,0,2,0,2,0,0,0,0,0,2,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,
- 2,0,2,0,2,0,2,0,0,2,0,2,2,0,0,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,2,0,0,2,2,2,0,2,2,2,2,0,0,0,2,0,0,0,0,0,0,2,0,2,0,2,0,0,0,0,0,0,0,0,0,0,
- 0,0,2,0,0,2,2,0,0,0,0,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,3,3,2,0,2,3,2,2,2,2,2,2,2,
+ 3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,0,3,3,3,3,3,0,0,3,3,0,0,1,3,0,0,0,0,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,2,0,3,3,2,3,3,2,3,2,2,0,2,2,2,2,2,2,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,0,3,3,3,2,3,0,0,3,3,0,0,2,2,0,0,0,2,0,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,0,3,3,3,3,3,0,0,3,2,2,0,2,3,0,0,0,0,0,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,3,3,1,2,2,2,2,2,2,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,2,2,3,3,3,3,2,2,2,0,2,3,3,2,3,3,3,0,2,0,2,2,1,2,0,
+ 3,3,3,3,3,3,3,3,3,3,2,2,3,2,2,2,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,3,2,1,2,0,2,2,0,1,
+ 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,0,3,2,0,3,3,3,3,2,0,0,3,2,0,0,0,2,0,2,0,0,0,0,
+ 3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,2,2,3,1,3,3,3,0,3,1,2,3,3,2,3,2,3,0,2,1,2,1,2,1,0,
+ 3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,2,2,2,3,3,3,2,3,3,3,0,2,1,2,2,0,2,0,
+ 3,3,3,3,3,2,3,2,3,2,2,3,2,2,2,3,2,3,3,3,3,2,3,2,1,2,3,3,2,0,0,3,0,2,3,2,2,2,1,0,
+ 3,3,3,3,3,3,2,2,3,3,2,3,2,3,2,2,2,3,2,3,3,1,3,2,0,2,2,3,2,2,2,3,0,2,2,2,2,0,2,2,
+ 3,3,3,3,3,2,2,2,3,2,3,2,2,0,2,2,2,3,0,3,3,2,0,2,0,0,2,3,3,0,2,3,0,0,2,2,2,0,0,0,
+ 3,3,2,3,3,2,2,2,3,2,0,0,0,0,0,2,2,3,0,2,3,1,0,0,0,0,3,3,0,0,3,3,0,0,2,2,2,0,2,0,
+ 3,3,3,3,3,3,3,2,3,3,3,2,2,3,3,2,2,3,0,3,2,3,2,2,2,2,2,2,0,3,2,2,0,1,0,1,2,1,0,0,
+ 3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,2,3,2,3,3,2,2,2,0,0,3,3,3,3,2,2,0,2,0,0,0,2,2,0,
+ 2,0,3,0,0,3,3,3,2,3,3,3,3,3,3,2,3,0,2,0,0,2,3,2,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,3,3,3,3,2,3,2,3,0,2,2,2,0,0,3,2,2,3,2,2,2,0,0,3,3,2,2,1,3,0,2,0,1,0,0,1,0,
+ 2,3,3,2,2,3,3,3,2,3,3,3,3,3,3,3,3,0,3,2,0,3,2,2,3,2,2,0,3,0,0,0,0,0,0,2,0,0,0,0,
+ 0,0,2,0,0,0,0,0,0,0,0,0,1,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,3,2,3,2,1,2,2,3,1,2,2,0,0,3,2,0,0,0,0,0,1,0,0,
+ 3,3,3,3,3,2,3,3,3,2,2,3,3,3,2,2,2,3,2,3,2,2,0,0,0,2,2,2,0,0,2,2,0,0,0,0,0,1,0,0,
+ 3,3,2,3,3,2,3,0,3,3,3,2,2,2,0,0,2,2,2,2,0,0,0,2,0,2,3,2,2,3,0,0,0,0,0,2,2,0,0,2,
+ 3,3,0,2,3,0,0,0,2,2,0,0,1,0,0,2,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,
+ 3,3,2,3,3,3,2,0,3,2,3,2,0,0,2,0,2,2,2,2,2,0,0,2,0,2,2,1,0,0,0,0,0,0,0,1,0,0,2,0,
+ 1,0,3,0,0,3,3,3,0,3,2,3,3,2,0,2,2,0,2,0,0,3,2,0,3,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,3,0,0,2,0,0,0,2,2,2,0,2,3,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,2,3,3,2,2,2,3,2,3,3,3,2,0,2,2,2,2,3,3,0,0,2,0,0,2,2,2,2,0,2,0,2,0,2,2,0,0,0,
+ 3,3,2,3,3,3,3,3,3,2,2,2,2,2,2,0,0,1,2,3,2,0,0,0,0,2,2,2,0,0,0,0,0,2,0,2,0,0,0,0,
+ 0,2,3,0,2,3,3,2,0,3,3,2,2,3,0,3,3,2,2,0,0,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,
+ 3,2,0,0,2,2,2,0,2,0,0,0,0,3,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,
+ 3,3,2,2,3,2,0,2,2,0,2,0,0,0,0,0,0,0,0,2,0,0,0,1,0,0,0,0,0,2,0,0,0,2,0,0,0,0,1,0,
+ 1,0,0,0,0,2,2,2,0,3,2,0,2,0,2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,
+ 2,0,2,0,2,2,0,2,2,0,2,2,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,
+ 2,0,2,0,0,2,1,2,0,0,1,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,
+ 0,0,1,2,0,1,2,2,0,2,2,2,2,2,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,2,0,0,2,2,2,0,2,2,2,2,0,0,0,2,0,0,0,0,0,0,1,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
};
-const SequenceModel Iso_8859_10LithuanianModel =
+const SequenceModel Iso_8859_4LithuanianModel =
{
- Iso_8859_10_CharToOrderMap,
+ Iso_8859_4_CharToOrderMap,
LithuanianLangModel,
- 38,
- (float)0.9928710196247589,
+ 40,
+ (float)0.9919219576954762,
PR_TRUE,
- "ISO-8859-10",
+ "ISO-8859-4",
"lt"
};
-const SequenceModel Iso_8859_4LithuanianModel =
+const SequenceModel Iso_8859_10LithuanianModel =
{
- Iso_8859_4_CharToOrderMap,
+ Iso_8859_10_CharToOrderMap,
LithuanianLangModel,
- 38,
- (float)0.9928710196247589,
+ 40,
+ (float)0.9919219576954762,
PR_TRUE,
- "ISO-8859-4",
+ "ISO-8859-10",
"lt"
};
@@ -201,9 +219,19 @@ const SequenceModel Iso_8859_13LithuanianModel =
{
Iso_8859_13_CharToOrderMap,
LithuanianLangModel,
- 38,
- (float)0.9928710196247589,
+ 40,
+ (float)0.9919219576954762,
PR_TRUE,
"ISO-8859-13",
"lt"
};
+
+const LanguageModel LithuanianModel =
+{
+ "lt",
+ Unicode_CharOrder,
+ 80,
+ LithuanianLangModel,
+ 40,
+ (float)0.9919219576954762,
+};
diff --git a/src/LangModels/LangMalteseModel.cpp b/src/LangModels/LangMalteseModel.cpp
index e253539..e0bdf42 100644
--- a/src/LangModels/LangMalteseModel.cpp
+++ b/src/LangModels/LangMalteseModel.cpp
@@ -36,12 +36,13 @@
* ***** END LICENSE BLOCK ***** */
#include "../nsSBCharSetProber.h"
+#include "../nsLanguageDetector.h"
/********* Language model for: Maltese *********/
/**
* Generated by BuildLangModel.py
- * On: 2016-09-21 02:07:45.509404
+ * On: 2021-03-16 19:33:28.446672
**/
/* Character Mapping Table:
@@ -67,62 +68,75 @@ static const unsigned char Iso_8859_3_CharToOrderMap[] =
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 0, 15, 28, 13, 4, 16, 19, 22, 1, 9, 12, 3, 10, 5, 8, /* 4X */
- 14, 27, 6, 11, 2, 7, 26, 18, 25, 30, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 0, 15, 28, 13, 4, 16, 19, 22, 1, 9, 12, 3, 10, 5, 8, /* 6X */
- 14, 27, 6, 11, 2, 7, 26, 18, 25, 30, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ SYM, 1, 15, 28, 13, 4, 16, 18, 22, 0, 9, 12, 3, 10, 5, 8, /* 4X */
+ 14, 27, 6, 11, 2, 7, 25, 19, 26, 30, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 1, 15, 28, 13, 4, 16, 18, 22, 0, 9, 12, 3, 10, 5, 8, /* 6X */
+ 14, 27, 6, 11, 2, 7, 25, 19, 26, 30, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
- SYM, 17,SYM,SYM,SYM,ILL, 48,SYM,SYM, 49, 50, 51, 52,SYM,ILL, 21, /* AX */
- SYM, 17,SYM,SYM,SYM,SYM, 53,SYM,SYM, 54, 55, 56, 57,SYM,ILL, 21, /* BX */
- 29, 36, 47,ILL, 58, 24, 59, 40, 33, 31, 60, 39, 45, 35, 61, 62, /* CX */
- ILL, 37, 32, 34, 44, 23, 38,SYM, 63, 43, 42, 64, 46, 65, 66, 41, /* DX */
- 29, 36, 47,ILL, 67, 24, 68, 40, 33, 31, 69, 39, 45, 35, 70, 71, /* EX */
- ILL, 37, 32, 34, 44, 23, 38,SYM, 72, 43, 42, 73, 46, 74, 75,SYM, /* FX */
+ SYM, 17,SYM,SYM,SYM,ILL, 49,SYM,SYM, 50, 51, 52, 53,SYM,ILL, 21, /* AX */
+ SYM, 17,SYM,SYM,SYM,SYM, 54,SYM,SYM, 55, 56, 57, 58,SYM,ILL, 21, /* BX */
+ 29, 36, 48,ILL, 41, 24, 59, 40, 33, 31, 60, 39, 46, 35, 61, 62, /* CX */
+ ILL, 38, 32, 34, 43, 23, 37,SYM, 63, 47, 44, 64, 45, 65, 66, 42, /* DX */
+ 29, 36, 48,ILL, 41, 24, 67, 40, 33, 31, 68, 39, 46, 35, 69, 70, /* EX */
+ ILL, 38, 32, 34, 43, 23, 37,SYM, 71, 47, 44, 72, 45, 73, 74,SYM, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
+static const int Unicode_Char_size = 62;
+static const unsigned int Unicode_CharOrder[] =
+{
+ 65, 1, 66, 15, 67, 28, 68, 13, 69, 4, 70, 16, 71, 18, 72, 22,
+ 73, 0, 74, 9, 75, 12, 76, 3, 77, 10, 78, 5, 79, 8, 80, 14,
+ 81, 27, 82, 6, 83, 11, 84, 2, 85, 7, 86, 25, 87, 19, 88, 26,
+ 89, 30, 90, 20, 97, 1, 98, 15, 99, 28, 100, 13, 101, 4,102, 16,
+ 103, 18, 104, 22, 105, 0, 106, 9, 107, 12, 108, 3, 109, 10,110, 5,
+ 111, 8, 112, 14, 113, 27, 114, 6, 115, 11, 116, 2, 117, 7,118, 25,
+ 119, 19, 120, 26, 121, 30, 122, 20, 192, 29, 224, 29, 266, 24,267, 24,
+ 288, 23, 289, 23, 294, 17, 295, 17, 379, 21, 380, 21,
+};
+
/* Model Table:
- * Total sequences: 870
- * First 512 sequences: 0.9959115850692665
- * Next 512 sequences (512-1024): 0.004088414930733575
- * Rest: -4.423544863740858e-17
+ * Total sequences: 888
+ * First 512 sequences: 0.9960434044151966
+ * Next 512 sequences (512-1024): 0.0039565955848034195
+ * Rest: 1.5612511283791264e-17
* Negative sequences: TODO
*/
static const PRUint8 MalteseLangModel[] =
{
+ 3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,0,2,
3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,0,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,3,3,3,3,3,2,0,3,0,0,3,3,3,2,3,3,
- 3,3,3,3,3,2,2,3,3,3,3,3,3,3,2,3,3,2,3,3,2,0,3,3,0,3,3,3,2,0,2,
+ 3,3,3,3,3,2,2,3,3,3,3,3,3,3,2,3,3,3,3,3,2,0,3,3,0,3,3,3,2,0,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,
- 3,3,3,3,3,3,2,3,3,3,2,3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,3,0,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,3,2,3,3,3,3,3,2,3,0,3,
- 3,3,3,3,3,3,3,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,
- 3,3,2,2,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,0,3,3,2,2,2,2,2,0,0,0,
- 3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,0,2,3,2,2,3,2,2,2,0,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,0,0,3,2,0,0,3,3,3,2,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,3,3,2,3,0,0,0,2,0,3,2,0,0,0,0,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,2,3,3,2,2,0,3,0,0,2,2,0,2,2,2,
- 3,3,2,3,3,2,3,3,3,3,2,3,2,2,3,0,0,0,2,3,0,0,3,0,2,0,2,0,2,0,0,
- 3,3,3,3,3,3,3,3,3,3,0,3,3,3,0,3,2,3,3,3,0,3,2,0,0,2,0,3,3,0,2,
- 3,3,3,3,3,3,3,3,3,3,0,3,2,2,0,2,3,0,0,2,0,0,2,0,0,0,0,2,2,0,2,
- 3,3,3,3,3,2,3,3,3,3,3,3,2,3,0,3,2,3,2,0,0,2,3,2,0,2,0,3,0,0,0,
- 3,3,3,3,3,3,3,2,3,2,2,3,3,3,2,2,2,2,3,2,0,2,2,3,2,3,2,2,0,0,2,
- 3,3,2,3,3,3,3,3,3,2,2,2,2,3,2,2,0,3,3,3,2,3,3,0,0,0,3,0,2,2,3,
- 3,3,2,2,3,2,2,3,2,3,2,0,0,0,2,0,0,0,2,2,3,0,0,0,0,0,2,2,0,0,0,
- 3,3,2,3,3,2,0,3,3,3,3,0,0,3,0,2,2,0,2,3,0,3,0,0,0,0,3,0,0,0,0,
- 3,3,3,2,3,2,3,3,3,0,3,2,2,2,2,2,0,0,2,0,2,0,2,0,0,0,0,2,0,0,2,
- 3,3,2,2,3,3,3,3,3,3,2,0,0,3,0,2,0,2,2,3,2,2,0,3,0,0,2,0,0,2,0,
+ 3,3,3,3,3,3,2,3,3,3,2,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,3,3,3,3,3,2,3,0,3,
+ 3,2,3,3,3,3,3,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,
+ 3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,0,3,3,2,2,2,2,3,2,0,0,
+ 3,3,2,3,3,3,3,3,3,2,3,3,3,3,3,3,2,3,3,3,0,2,3,2,2,2,3,2,2,0,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,0,3,2,0,3,0,3,3,2,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,3,3,2,0,3,0,0,2,0,2,0,2,0,0,0,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,2,3,2,3,2,0,3,2,0,2,2,0,0,2,2,
+ 3,3,2,3,3,3,3,3,3,3,2,3,2,2,3,0,2,0,2,0,0,0,3,2,2,2,0,0,2,0,0,
+ 3,3,3,3,3,3,3,3,3,3,0,3,2,3,0,3,2,3,3,3,0,3,2,0,0,0,2,3,3,0,2,
+ 3,3,3,3,3,3,3,3,3,3,0,3,2,2,0,2,3,0,2,0,0,0,2,0,0,0,0,3,2,0,2,
+ 3,3,3,3,3,2,3,3,3,3,3,3,2,3,0,3,2,3,0,2,0,3,3,2,0,0,2,3,0,0,0,
+ 3,3,2,3,3,3,3,3,3,2,2,2,2,3,2,3,0,3,3,3,2,3,3,0,0,3,0,0,2,2,2,
+ 3,3,3,3,3,3,3,2,3,2,2,3,3,3,2,2,2,2,2,3,0,2,2,3,2,2,2,2,0,0,2,
+ 3,3,2,2,3,2,2,3,2,3,2,0,0,0,2,0,0,0,2,2,3,0,0,0,0,2,0,2,0,0,0,
+ 3,3,2,3,3,2,0,3,3,3,3,0,0,3,0,2,2,0,3,2,0,2,2,0,0,3,0,0,0,0,0,
+ 3,3,3,2,3,2,3,3,3,0,2,2,2,2,2,2,0,0,0,2,2,0,2,0,0,2,0,2,0,0,2,
+ 3,3,2,3,3,3,3,3,3,3,2,0,0,3,0,2,0,2,3,2,2,2,0,3,0,2,0,0,0,2,0,
3,3,2,2,3,0,2,2,0,3,0,0,2,0,2,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,
- 3,3,3,3,3,2,3,3,3,3,3,0,2,2,0,3,2,0,2,0,0,0,3,0,0,3,2,0,2,0,0,
- 3,3,0,2,3,2,3,3,3,3,0,2,0,3,2,0,0,0,0,0,0,0,0,0,0,0,3,0,2,0,2,
- 3,3,3,2,3,0,3,3,3,3,2,3,2,3,0,3,3,0,3,3,0,0,2,2,2,2,0,3,0,2,0,
- 3,3,3,3,3,0,2,2,3,2,0,3,3,3,0,2,3,0,0,0,2,0,3,0,0,0,0,2,2,0,2,
- 0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 2,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,2,0,0,0,0,0,0,0,2,0,2,0,2,
+ 3,3,0,2,3,2,3,3,3,3,0,2,0,3,2,0,0,0,0,0,0,0,0,0,0,3,0,0,2,0,2,
+ 3,3,3,3,3,2,3,3,3,3,3,0,2,2,0,2,2,0,2,2,0,0,3,0,0,2,3,0,2,0,0,
+ 3,3,3,2,3,0,3,3,3,3,2,3,2,3,0,3,3,0,3,3,0,0,2,2,2,0,2,3,0,0,0,
+ 3,3,3,3,3,0,2,3,3,2,0,3,3,3,2,2,2,0,0,0,2,0,3,0,0,0,0,2,2,0,2,
+ 2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,2,2,2,2,2,2,0,2,2,2,2,2,2,2,2,0,0,2,2,0,0,0,0,0,2,0,0,2,0,2,
};
@@ -131,8 +145,18 @@ const SequenceModel Iso_8859_3MalteseModel =
Iso_8859_3_CharToOrderMap,
MalteseLangModel,
31,
- (float)0.9959115850692665,
+ (float)0.9960434044151966,
PR_TRUE,
"ISO-8859-3",
"mt"
};
+
+const LanguageModel MalteseModel =
+{
+ "mt",
+ Unicode_CharOrder,
+ 62,
+ MalteseLangModel,
+ 31,
+ (float)0.9960434044151966,
+};
diff --git a/src/LangModels/LangPolishModel.cpp b/src/LangModels/LangPolishModel.cpp
index 38791de..690738f 100644
--- a/src/LangModels/LangPolishModel.cpp
+++ b/src/LangModels/LangPolishModel.cpp
@@ -36,12 +36,13 @@
* ***** END LICENSE BLOCK ***** */
#include "../nsSBCharSetProber.h"
+#include "../nsLanguageDetector.h"
/********* Language model for: Polish *********/
/**
* Generated by BuildLangModel.py
- * On: 2016-09-21 17:21:04.405363
+ * On: 2021-03-16 19:54:55.178474
**/
/* Character Mapping Table:
@@ -61,190 +62,217 @@
* ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1
* even though they are both used for French. Same for the euro sign.
*/
-static const unsigned char Ibm852_CharToOrderMap[] =
+static const unsigned char Iso_8859_2_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 0, 20, 11, 14, 3, 26, 21, 22, 1, 18, 7, 15, 16, 5, 2, /* 4X */
- 13, 36, 4, 6, 10, 17, 31, 9, 33, 12, 8,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 0, 20, 11, 14, 3, 26, 21, 22, 1, 18, 7, 15, 16, 5, 2, /* 6X */
- 13, 36, 4, 6, 10, 17, 31, 9, 33, 12, 8,SYM,SYM,SYM,SYM,CTR, /* 7X */
- 47, 39, 34, 54, 40, 78, 30, 47, 19, 58, 49, 49, 77, 32, 40, 30, /* 8X */
- 34, 79, 80, 55, 38, 74, 74, 28, 28, 38, 39, 76, 76, 19,SYM, 44, /* 9X */
- 35, 37, 24, 51, 25, 25, 45, 45, 23, 23,SYM, 32, 44, 56,SYM,SYM, /* AX */
- SYM,SYM,SYM,SYM,SYM, 35, 54, 46, 56,SYM,SYM,SYM,SYM, 27, 27,SYM, /* BX */
- SYM,SYM,SYM,SYM,SYM,SYM, 53, 53,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* CX */
- 70, 70, 69, 58, 69, 81, 37, 77, 46,SYM,SYM,SYM,SYM, 65, 82,SYM, /* DX */
- 24, 57, 55, 29, 29, 83, 41, 41, 84, 51, 85, 86, 60, 60, 65,SYM, /* EX */
- SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 87, 50, 50,SYM,SYM, /* FX */
+ SYM, 0, 21, 11, 15, 3, 26, 20, 22, 1, 18, 6, 14, 16, 5, 2, /* 4X */
+ 13, 37, 4, 7, 10, 17, 30, 9, 33, 12, 8,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 0, 21, 11, 15, 3, 26, 20, 22, 1, 18, 6, 14, 16, 5, 2, /* 6X */
+ 13, 37, 4, 7, 10, 17, 30, 9, 33, 12, 8,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
+ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
+ SYM, 24,SYM, 19,SYM, 68, 29,SYM,SYM, 40, 67, 74, 32,SYM, 43, 28, /* AX */
+ SYM, 24,SYM, 19,SYM, 68, 29,SYM,SYM, 40, 67, 74, 32,SYM, 43, 28, /* BX */
+ 86, 35, 61, 53, 41, 87, 31, 45, 42, 34, 23, 52, 48, 36, 80, 77, /* CX */
+ 58, 27, 82, 25, 59, 57, 38,SYM, 50, 75, 49, 79, 39, 51, 78, 54, /* DX */
+ 88, 35, 61, 53, 41, 89, 31, 45, 42, 34, 23, 52, 48, 36, 80, 77, /* EX */
+ 58, 27, 82, 25, 59, 57, 38,SYM, 50, 75, 49, 79, 39, 51, 78,SYM, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
-static const unsigned char Iso_8859_16_CharToOrderMap[] =
+static const unsigned char Iso_8859_13_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 0, 20, 11, 14, 3, 26, 21, 22, 1, 18, 7, 15, 16, 5, 2, /* 4X */
- 13, 36, 4, 6, 10, 17, 31, 9, 33, 12, 8,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 0, 20, 11, 14, 3, 26, 21, 22, 1, 18, 7, 15, 16, 5, 2, /* 6X */
- 13, 36, 4, 6, 10, 17, 31, 9, 33, 12, 8,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ SYM, 0, 21, 11, 15, 3, 26, 20, 22, 1, 18, 6, 14, 16, 5, 2, /* 4X */
+ 13, 37, 4, 7, 10, 17, 30, 9, 33, 12, 8,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 0, 21, 11, 15, 3, 26, 20, 22, 1, 18, 6, 14, 16, 5, 2, /* 6X */
+ 13, 37, 4, 7, 10, 17, 30, 9, 33, 12, 8,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
- SYM, 25, 25, 19,SYM,SYM, 41,SYM, 41,SYM, 62,SYM, 32,SYM, 32, 27, /* AX */
- SYM,SYM, 44, 19, 45,SYM,SYM,SYM, 45, 44, 62,SYM, 75, 75, 88, 27, /* BX */
- 61, 35, 54, 53, 40, 30, 89, 47, 43, 34, 64, 58, 90, 37, 77, 91, /* CX */
- 70, 29, 66, 24, 55, 49, 38, 28, 92, 68, 51, 93, 39, 23, 72, 57, /* DX */
- 61, 35, 54, 53, 40, 30, 94, 47, 43, 34, 64, 58, 95, 37, 77, 96, /* EX */
- 70, 29, 66, 24, 55, 49, 38, 28, 97, 68, 51, 98, 39, 23, 72, 99, /* FX */
+ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 47,SYM, 90,SYM,SYM,SYM,SYM, 76, /* AX */
+ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 47,SYM, 91,SYM,SYM,SYM,SYM, 76, /* BX */
+ 24, 92, 69, 31, 41, 55, 23, 65, 42, 34, 32, 63, 93, 94, 72, 95, /* CX */
+ 40, 27, 73, 25, 44, 66, 38,SYM, 96, 19, 29, 56, 39, 28, 43, 54, /* DX */
+ 24, 97, 69, 31, 41, 55, 23, 65, 42, 34, 32, 63, 98, 99, 72,100, /* EX */
+ 40, 27, 73, 25, 44, 66, 38,SYM,101, 19, 29, 56, 39, 28, 43,SYM, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
-static const unsigned char Iso_8859_2_CharToOrderMap[] =
+static const unsigned char Iso_8859_16_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 0, 20, 11, 14, 3, 26, 21, 22, 1, 18, 7, 15, 16, 5, 2, /* 4X */
- 13, 36, 4, 6, 10, 17, 31, 9, 33, 12, 8,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 0, 20, 11, 14, 3, 26, 21, 22, 1, 18, 7, 15, 16, 5, 2, /* 6X */
- 13, 36, 4, 6, 10, 17, 31, 9, 33, 12, 8,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ SYM, 0, 21, 11, 15, 3, 26, 20, 22, 1, 18, 6, 14, 16, 5, 2, /* 4X */
+ 13, 37, 4, 7, 10, 17, 30, 9, 33, 12, 8,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 0, 21, 11, 15, 3, 26, 20, 22, 1, 18, 6, 14, 16, 5, 2, /* 6X */
+ 13, 37, 4, 7, 10, 17, 30, 9, 33, 12, 8,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
- SYM, 25,SYM, 19,SYM, 74, 28,SYM,SYM, 41, 56, 76, 32,SYM, 45, 27, /* AX */
- SYM, 25,SYM, 19,SYM, 74, 28,SYM,SYM, 41, 56, 76, 32,SYM, 45, 27, /* BX */
- 100, 35, 54, 53, 40,101, 30, 47, 44, 34, 23, 58, 46, 37, 77, 69, /* CX */
- 70, 29,102, 24, 55, 49, 38,SYM, 50,103, 51,104, 39, 60, 65, 57, /* DX */
- 105, 35, 54, 53, 40,106, 30, 47, 44, 34, 23, 58, 46, 37, 77, 69, /* EX */
- 70, 29,107, 24, 55, 49, 38,SYM, 50,108, 51,109, 39, 60, 65,SYM, /* FX */
+ SYM, 24, 24, 19,SYM,SYM, 40,SYM, 40,SYM, 62,SYM, 32,SYM, 32, 28, /* AX */
+ SYM,SYM, 42, 19, 43,SYM,SYM,SYM, 43, 42, 62,SYM, 84, 84,102, 28, /* BX */
+ 70, 35, 61, 53, 41, 31, 76, 45, 46, 34, 60, 52, 83, 36, 80, 71, /* CX */
+ 58, 27, 64, 25, 59, 57, 38, 29, 79, 85, 49,103, 39, 23, 81, 54, /* DX */
+ 70, 35, 61, 53, 41, 31, 76, 45, 46, 34, 60, 52, 83, 36, 80, 71, /* EX */
+ 58, 27, 64, 25, 59, 57, 38, 29, 79, 85, 49,104, 39, 23, 81,105, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
-static const unsigned char Mac_Centraleurope_CharToOrderMap[] =
+static const unsigned char Windows_1250_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 0, 20, 11, 14, 3, 26, 21, 22, 1, 18, 7, 15, 16, 5, 2, /* 4X */
- 13, 36, 4, 6, 10, 17, 31, 9, 33, 12, 8,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 0, 20, 11, 14, 3, 26, 21, 22, 1, 18, 7, 15, 16, 5, 2, /* 6X */
- 13, 36, 4, 6, 10, 17, 31, 9, 33, 12, 8,SYM,SYM,SYM,SYM,CTR, /* 7X */
- 40, 63, 63, 34, 25, 38, 39, 35, 25, 44, 40, 44, 30, 30, 34, 32, /* 8X */
- 32, 69, 37, 69,110,111, 71, 24, 71, 55, 38, 67, 51, 46, 46, 39, /* 9X */
- SYM,SYM, 23,SYM,SYM,SYM,SYM, 57,SYM,SYM,SYM, 23,SYM,SYM,112,113, /* AX */
- 114, 73,SYM,SYM, 73,115,SYM,SYM, 19,116,117, 74, 74,118,119,120, /* BX */
- 121, 29,SYM,SYM, 29,122,SYM,SYM,SYM,SYM,SYM,123, 49, 67, 49, 42, /* CX */
- SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 42,124,125, 50,SYM,SYM, 50,126, /* DX */
- 127, 41,SYM,SYM, 41, 28, 28, 35, 76, 76, 37, 45, 45, 59, 24, 55, /* EX */
- 59,128, 51,129,130,131,132,133, 60, 60,134, 27, 19, 27,135,SYM, /* FX */
+ SYM, 0, 21, 11, 15, 3, 26, 20, 22, 1, 18, 6, 14, 16, 5, 2, /* 4X */
+ 13, 37, 4, 7, 10, 17, 30, 9, 33, 12, 8,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 0, 21, 11, 15, 3, 26, 20, 22, 1, 18, 6, 14, 16, 5, 2, /* 6X */
+ 13, 37, 4, 7, 10, 17, 30, 9, 33, 12, 8,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM, 40,SYM, 29, 74, 43, 32, /* 8X */
+ ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 40,SYM, 29, 74, 43, 32, /* 9X */
+ SYM,SYM,SYM, 19,SYM, 24,SYM,SYM,SYM,SYM, 67,SYM,SYM,SYM,SYM, 28, /* AX */
+ SYM,SYM,SYM, 19,SYM,SYM,SYM,SYM,SYM, 24, 67,SYM, 68,SYM, 68, 28, /* BX */
+ 106, 35, 61, 53, 41,107, 31, 45, 42, 34, 23, 52, 48, 36, 80, 77, /* CX */
+ 58, 27, 82, 25, 59, 57, 38,SYM, 50, 75, 49, 79, 39, 51, 78, 54, /* DX */
+ 108, 35, 61, 53, 41,109, 31, 45, 42, 34, 23, 52, 48, 36, 80, 77, /* EX */
+ 58, 27, 82, 25, 59, 57, 38,SYM, 50, 75, 49, 79, 39, 51, 78,SYM, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
-static const unsigned char Iso_8859_13_CharToOrderMap[] =
+static const unsigned char Ibm852_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 0, 20, 11, 14, 3, 26, 21, 22, 1, 18, 7, 15, 16, 5, 2, /* 4X */
- 13, 36, 4, 6, 10, 17, 31, 9, 33, 12, 8,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 0, 20, 11, 14, 3, 26, 21, 22, 1, 18, 7, 15, 16, 5, 2, /* 6X */
- 13, 36, 4, 6, 10, 17, 31, 9, 33, 12, 8,SYM,SYM,SYM,SYM,CTR, /* 7X */
- CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
- CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
- SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 48,SYM,136,SYM,SYM,SYM,SYM,137, /* AX */
- SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 48,SYM,138,SYM,SYM,SYM,SYM,139, /* BX */
- 25,140, 63, 30, 40, 52, 23,141, 44, 34, 32, 71,142,143, 73,144, /* CX */
- 41, 29,145, 24, 42, 67, 38,SYM,146, 19, 28, 59, 39, 27, 45, 57, /* DX */
- 25,147, 63, 30, 40, 52, 23,148, 44, 34, 32, 71,149,150, 73,151, /* EX */
- 41, 29,152, 24, 42, 67, 38,SYM,153, 19, 28, 59, 39, 27, 45,SYM, /* FX */
+ SYM, 0, 21, 11, 15, 3, 26, 20, 22, 1, 18, 6, 14, 16, 5, 2, /* 4X */
+ 13, 37, 4, 7, 10, 17, 30, 9, 33, 12, 8,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 0, 21, 11, 15, 3, 26, 20, 22, 1, 18, 6, 14, 16, 5, 2, /* 6X */
+ 13, 37, 4, 7, 10, 17, 30, 9, 33, 12, 8,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ 45, 39, 34, 61, 41, 75, 31, 45, 19, 52, 57, 57, 80, 32, 41, 31, /* 8X */
+ 34,110,111, 59, 38, 68, 68, 29, 29, 38, 39, 74, 74, 19,SYM, 42, /* 9X */
+ 35, 36, 25, 49, 24, 24, 43, 43, 23, 23,SYM, 32, 42, 67,SYM,SYM, /* AX */
+ SYM,SYM,SYM,SYM,SYM, 35, 61, 48, 67,SYM,SYM,SYM,SYM, 28, 28,SYM, /* BX */
+ SYM,SYM,SYM,SYM,SYM,SYM, 53, 53,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* CX */
+ 58, 58, 77, 52, 77, 82, 36, 80, 48,SYM,SYM,SYM,SYM, 78, 75,SYM, /* DX */
+ 25, 54, 59, 27, 27, 82, 40, 40,112, 49,113, 79, 51, 51, 78,SYM, /* EX */
+ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 79, 50, 50,SYM,SYM, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
-static const unsigned char Windows_1250_CharToOrderMap[] =
+static const unsigned char Mac_Centraleurope_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 0, 20, 11, 14, 3, 26, 21, 22, 1, 18, 7, 15, 16, 5, 2, /* 4X */
- 13, 36, 4, 6, 10, 17, 31, 9, 33, 12, 8,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 0, 20, 11, 14, 3, 26, 21, 22, 1, 18, 7, 15, 16, 5, 2, /* 6X */
- 13, 36, 4, 6, 10, 17, 31, 9, 33, 12, 8,SYM,SYM,SYM,SYM,CTR, /* 7X */
- SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM, 41,SYM, 28, 76, 45, 32, /* 8X */
- ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 41,SYM, 28, 76, 45, 32, /* 9X */
- SYM,SYM,SYM, 19,SYM, 25,SYM,SYM,SYM,SYM, 56,SYM,SYM,SYM,SYM, 27, /* AX */
- SYM,SYM,SYM, 19,SYM,SYM,SYM,SYM,SYM, 25, 56,SYM, 74,SYM, 74, 27, /* BX */
- 154, 35, 54, 53, 40,155, 30, 47, 44, 34, 23, 58, 46, 37, 77, 69, /* CX */
- 70, 29,156, 24, 55, 49, 38,SYM, 50,157, 51,158, 39, 60, 65, 57, /* DX */
- 159, 35, 54, 53, 40,160, 30, 47, 44, 34, 23, 58, 46, 37, 77, 69, /* EX */
- 70, 29,161, 24, 55, 49, 38,SYM, 50,162, 51,163, 39, 60, 65,SYM, /* FX */
+ SYM, 0, 21, 11, 15, 3, 26, 20, 22, 1, 18, 6, 14, 16, 5, 2, /* 4X */
+ 13, 37, 4, 7, 10, 17, 30, 9, 33, 12, 8,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 0, 21, 11, 15, 3, 26, 20, 22, 1, 18, 6, 14, 16, 5, 2, /* 6X */
+ 13, 37, 4, 7, 10, 17, 30, 9, 33, 12, 8,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ 41, 69, 69, 34, 24, 38, 39, 35, 24, 42, 41, 42, 31, 31, 34, 32, /* 8X */
+ 32, 77, 36, 77, 65, 65, 63, 25, 63, 59, 38, 66, 49, 48, 48, 39, /* 9X */
+ SYM,SYM, 23,SYM,SYM,SYM,SYM, 54,SYM,SYM,SYM, 23,SYM,SYM,114,115, /* AX */
+ 116, 72,SYM,SYM, 72,117,SYM,SYM, 19,118,119, 68, 68,120,121, 73, /* BX */
+ 73, 27,SYM,SYM, 27, 82,SYM,SYM,SYM,SYM,SYM, 82, 57, 66, 57, 44, /* CX */
+ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 44,122,123, 50,SYM,SYM, 50,124, /* DX */
+ 125, 40,SYM,SYM, 40, 29, 29, 35, 74, 74, 36, 43, 43, 56, 25, 59, /* EX */
+ 56, 75, 49, 75, 79, 79,126,127, 51, 51,128, 28, 19, 28,129,SYM, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
+static const int Unicode_Char_size = 76;
+static const unsigned int Unicode_CharOrder[] =
+{
+ 65, 0, 66, 21, 67, 11, 68, 15, 69, 3, 70, 26, 71, 20, 72, 22,
+ 73, 1, 74, 18, 75, 6, 76, 14, 77, 16, 78, 5, 79, 2, 80, 13,
+ 81, 37, 82, 4, 83, 7, 84, 10, 85, 17, 86, 30, 87, 9, 88, 33,
+ 89, 12, 90, 8, 97, 0, 98, 21, 99, 11, 100, 15, 101, 3,102, 26,
+ 103, 20, 104, 22, 105, 1, 106, 18, 107, 6, 108, 14, 109, 16,110, 5,
+ 111, 2, 112, 13, 113, 37, 114, 4, 115, 7, 116, 10, 117, 17,118, 30,
+ 119, 9, 120, 33, 121, 12, 122, 8, 193, 35, 201, 34, 205, 36,211, 25,
+ 225, 35, 233, 34, 237, 36, 243, 25, 260, 24, 261, 24, 262, 31,263, 31,
+ 280, 23, 281, 23, 321, 19, 322, 19, 323, 27, 324, 27, 346, 29,347, 29,
+ 377, 32, 378, 32, 379, 28, 380, 28,
+};
+
/* Model Table:
- * Total sequences: 1321
- * First 512 sequences: 0.9894531815946438
- * Next 512 sequences (512-1024): 0.010193795364991133
- * Rest: 0.0003530230403650733
+ * Total sequences: 1547
+ * First 512 sequences: 0.9881622113600178
+ * Next 512 sequences (512-1024): 0.011288903649768277
+ * Rest: 0.0005488849902139173
* Negative sequences: TODO
*/
static const PRUint8 PolishLangModel[] =
{
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,0,3,3,3,3,3,3,3,2,0,0,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,2,2,2,2,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,0,0,0,3,3,3,3,2,3,2,2,0,0,1,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,2,2,3,3,3,3,2,3,2,2,0,2,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,2,3,3,3,2,2,2,0,2,2,0,1,2,2,2,
- 3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,2,2,3,2,1,2,3,2,3,3,3,3,2,0,0,0,2,0,2,2,2,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,3,2,3,3,2,0,0,0,0,2,0,0,2,2,2,
- 3,3,3,3,3,2,3,3,1,3,3,3,2,2,2,3,3,3,2,3,2,2,2,3,3,3,2,3,2,0,0,2,0,1,2,2,0,
- 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,3,3,3,3,3,3,2,3,2,3,1,2,0,0,0,2,0,0,2,2,2,
- 3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,3,2,3,0,3,2,2,2,3,3,3,1,0,2,0,0,0,0,0,1,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,3,2,3,2,3,2,2,3,3,3,3,2,1,0,0,0,2,0,0,2,2,0,
- 3,3,3,3,2,3,2,3,3,2,3,2,3,1,2,3,2,3,3,2,1,2,3,2,3,3,0,0,0,0,0,2,0,0,2,0,2,
- 3,2,2,2,3,3,3,3,3,3,3,3,0,3,3,3,3,2,3,3,3,3,2,0,0,0,3,3,3,3,3,2,2,0,0,0,0,
- 3,3,3,3,3,3,3,2,2,2,2,3,3,3,2,3,2,3,1,3,2,2,3,2,3,2,2,0,0,0,0,2,0,0,2,2,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,2,3,3,2,2,1,0,0,2,2,0,2,2,1,
- 3,3,3,3,2,3,3,3,2,3,3,3,2,2,3,3,3,3,2,0,3,3,2,3,2,3,3,2,0,0,0,2,0,1,2,2,0,
- 3,3,3,3,2,3,3,2,1,2,2,3,3,3,2,2,3,3,2,2,3,2,1,3,3,2,2,1,1,0,0,1,0,0,2,2,0,
- 3,3,2,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,2,0,1,0,3,3,2,3,2,2,2,2,2,2,2,
- 3,3,3,3,2,3,3,2,2,3,2,3,0,2,3,2,3,3,2,2,2,2,1,3,3,3,1,1,3,1,0,1,0,0,0,2,0,
- 3,0,3,3,1,3,2,3,2,2,3,2,3,2,2,0,2,3,0,2,2,3,1,3,3,3,0,2,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,2,2,1,2,3,2,2,3,2,3,2,3,2,1,2,3,2,2,2,0,0,0,0,0,0,0,2,2,0,
- 3,3,3,3,3,3,2,2,2,3,2,1,2,2,3,3,2,3,2,3,2,2,3,2,3,2,2,1,0,0,0,2,0,0,2,2,2,
- 3,3,3,3,3,3,2,2,1,2,3,2,3,2,2,3,3,3,2,2,2,1,1,2,2,2,2,0,2,0,0,2,0,0,2,2,0,
- 0,0,0,0,0,0,3,3,3,1,3,3,0,3,3,2,0,0,0,3,3,3,0,0,0,0,0,3,3,0,2,0,2,0,0,0,0,
- 0,0,0,0,3,2,2,2,3,3,2,3,1,2,3,3,2,0,3,3,3,2,0,0,0,0,2,3,1,0,0,1,3,0,0,0,0,
- 0,0,0,0,0,0,2,2,3,2,3,3,0,3,3,0,0,0,0,3,2,3,0,0,0,0,0,3,0,0,2,0,0,0,0,0,0,
- 3,3,3,3,3,2,2,2,1,2,2,2,2,1,1,3,2,3,2,2,2,2,2,2,2,2,3,0,0,0,0,1,0,0,2,1,1,
- 3,2,3,3,0,3,2,2,0,2,0,2,3,0,2,3,2,3,0,0,3,1,0,2,2,3,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,3,3,0,2,0,3,0,3,0,2,0,3,3,0,0,0,0,0,0,0,0,0,0,0,0,1,3,0,0,0,0,0,0,
- 1,0,0,0,0,0,3,2,0,0,0,3,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 1,0,0,1,0,0,0,2,0,2,0,0,0,0,2,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,2,2,2,2,1,0,0,0,2,2,1,2,0,2,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,2,2,0,
- 0,0,0,0,2,3,2,0,0,2,0,2,0,0,3,2,2,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,
- 2,3,2,2,0,1,0,1,0,1,1,0,1,2,1,2,1,2,1,0,2,0,2,0,0,0,2,0,0,0,0,2,0,2,0,0,0,
- 2,1,2,2,2,2,2,0,1,0,2,2,1,1,2,2,2,1,1,0,1,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,
- 0,1,0,1,2,2,2,2,2,0,2,2,0,2,1,2,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,
- 1,2,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,0,3,3,3,3,3,3,3,2,0,0,2,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,1,2,2,2,0,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,0,0,0,3,3,3,3,3,2,2,2,1,0,0,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,2,2,3,3,3,3,3,2,2,2,0,1,0,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,2,3,3,3,2,0,2,2,2,2,0,1,2,2,2,2,
+ 3,3,3,3,3,3,3,3,3,2,3,3,3,2,2,3,2,3,2,1,3,2,2,3,3,3,3,0,3,1,2,0,1,2,2,2,2,2,
+ 3,3,3,3,3,2,3,3,1,3,3,3,2,2,3,2,3,3,2,3,2,2,2,3,3,3,2,0,3,1,1,0,0,1,2,2,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,3,2,3,3,2,0,0,0,2,0,0,0,2,2,2,2,
+ 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,0,1,0,2,0,0,0,2,2,1,2,
+ 3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,2,2,3,2,3,2,2,2,3,3,3,1,0,0,2,0,0,0,0,1,0,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,2,3,2,3,2,2,3,3,3,3,2,0,1,1,2,0,0,1,2,2,2,0,
+ 3,3,3,3,2,3,3,2,3,2,3,2,3,1,3,2,2,3,3,2,2,2,3,2,3,3,1,0,0,0,1,0,0,0,2,2,2,2,
+ 3,2,2,2,3,3,3,3,3,3,3,3,0,3,3,3,3,2,3,3,3,3,2,0,0,0,3,3,3,3,2,3,2,0,1,1,0,0,
+ 3,3,3,3,3,3,2,3,2,2,2,3,3,3,3,2,2,3,1,3,2,2,3,2,2,3,2,0,0,0,1,0,0,0,2,2,1,0,
+ 3,3,3,3,2,3,3,3,2,3,3,3,2,2,3,3,3,3,2,0,3,3,2,3,3,2,3,1,2,1,2,0,0,1,2,2,2,0,
+ 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,2,3,2,3,2,2,3,3,2,0,3,1,2,0,2,0,2,2,2,2,
+ 3,3,3,3,2,3,3,3,1,2,2,3,3,3,2,2,3,3,2,2,2,3,2,3,2,3,2,0,2,1,1,0,0,0,2,2,2,0,
+ 3,3,2,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,2,0,0,0,3,3,3,2,2,1,2,2,2,2,2,2,
+ 3,3,3,3,2,3,2,3,2,3,2,3,0,2,2,3,3,3,2,2,2,2,1,3,3,3,1,1,1,3,1,1,0,0,1,2,1,0,
+ 3,1,3,3,2,3,3,2,2,2,3,2,3,2,1,2,2,3,0,2,3,2,1,3,3,3,1,0,2,0,0,0,0,0,0,0,0,0,
+ 3,3,3,3,3,3,2,2,2,3,2,1,2,2,3,3,2,3,2,3,2,2,3,2,2,3,2,0,1,0,2,0,0,1,2,2,1,1,
+ 3,3,3,3,3,3,2,3,2,2,1,2,3,2,3,2,2,3,2,3,1,2,2,3,2,2,2,0,0,0,0,0,0,0,2,2,1,0,
+ 3,3,3,3,3,3,2,2,2,2,2,2,3,2,3,2,3,3,2,2,1,2,2,2,2,2,2,0,1,1,2,0,0,0,2,2,0,1,
+ 0,0,0,0,0,0,3,3,3,1,3,3,0,3,2,3,0,0,0,3,3,3,0,0,0,0,0,0,3,3,0,2,2,0,0,0,0,0,
+ 0,0,0,0,0,0,2,2,3,2,3,3,0,3,0,3,0,0,0,3,3,2,0,0,0,0,0,0,3,0,0,2,0,0,0,0,0,0,
+ 1,0,0,0,3,2,2,2,3,3,2,3,1,2,3,3,2,0,3,3,2,3,1,0,0,0,1,0,3,1,1,0,3,0,0,0,1,0,
+ 3,3,3,3,3,2,2,2,1,2,2,1,2,1,3,2,2,3,2,2,2,2,1,2,2,1,3,0,0,0,2,0,0,0,2,1,0,1,
+ 0,0,0,0,0,0,2,3,0,0,1,3,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,2,3,3,0,3,2,2,0,2,0,2,3,0,3,2,2,3,0,1,1,3,1,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,3,3,2,1,0,3,0,3,0,2,3,0,3,0,0,0,0,2,0,0,0,0,0,1,0,0,0,3,0,0,0,0,0,0,
+ 3,3,3,3,2,2,2,2,1,0,1,0,2,1,2,0,0,2,2,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,2,2,2,0,
+ 1,0,1,1,0,0,2,0,0,2,0,0,0,0,0,2,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,2,3,0,2,0,2,0,2,1,0,2,3,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
+ 2,3,2,2,0,0,1,2,0,1,2,0,1,2,1,0,1,2,0,0,0,1,2,0,0,0,2,0,0,0,2,0,0,2,1,0,0,0,
+ 2,1,2,2,2,2,1,2,1,0,2,2,0,1,2,2,2,1,1,0,2,2,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
+ 0,1,1,1,2,2,2,2,2,0,2,2,0,1,2,2,1,1,1,0,1,2,1,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,
+ 2,1,2,1,2,2,2,2,1,0,1,2,0,0,2,1,1,0,0,0,2,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
+ 1,2,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,3,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,
};
-const SequenceModel Ibm852PolishModel =
+const SequenceModel Iso_8859_2PolishModel =
{
- Ibm852_CharToOrderMap,
+ Iso_8859_2_CharToOrderMap,
PolishLangModel,
- 37,
- (float)0.9894531815946438,
+ 38,
+ (float)0.9881622113600178,
PR_TRUE,
- "IBM852",
+ "ISO-8859-2",
+ "pl"
+};
+
+const SequenceModel Iso_8859_13PolishModel =
+{
+ Iso_8859_13_CharToOrderMap,
+ PolishLangModel,
+ 38,
+ (float)0.9881622113600178,
+ PR_TRUE,
+ "ISO-8859-13",
"pl"
};
@@ -252,53 +280,52 @@ const SequenceModel Iso_8859_16PolishModel =
{
Iso_8859_16_CharToOrderMap,
PolishLangModel,
- 37,
- (float)0.9894531815946438,
+ 38,
+ (float)0.9881622113600178,
PR_TRUE,
"ISO-8859-16",
"pl"
};
-const SequenceModel Iso_8859_2PolishModel =
+const SequenceModel Windows_1250PolishModel =
{
- Iso_8859_2_CharToOrderMap,
+ Windows_1250_CharToOrderMap,
PolishLangModel,
- 37,
- (float)0.9894531815946438,
+ 38,
+ (float)0.9881622113600178,
PR_TRUE,
- "ISO-8859-2",
+ "WINDOWS-1250",
"pl"
};
-const SequenceModel Mac_CentraleuropePolishModel =
+const SequenceModel Ibm852PolishModel =
{
- Mac_Centraleurope_CharToOrderMap,
+ Ibm852_CharToOrderMap,
PolishLangModel,
- 37,
- (float)0.9894531815946438,
+ 38,
+ (float)0.9881622113600178,
PR_TRUE,
- "MAC-CENTRALEUROPE",
+ "IBM852",
"pl"
};
-const SequenceModel Iso_8859_13PolishModel =
+const SequenceModel Mac_CentraleuropePolishModel =
{
- Iso_8859_13_CharToOrderMap,
+ Mac_Centraleurope_CharToOrderMap,
PolishLangModel,
- 37,
- (float)0.9894531815946438,
+ 38,
+ (float)0.9881622113600178,
PR_TRUE,
- "ISO-8859-13",
+ "MAC-CENTRALEUROPE",
"pl"
};
-const SequenceModel Windows_1250PolishModel =
+const LanguageModel PolishModel =
{
- Windows_1250_CharToOrderMap,
+ "pl",
+ Unicode_CharOrder,
+ 76,
PolishLangModel,
- 37,
- (float)0.9894531815946438,
- PR_TRUE,
- "WINDOWS-1250",
- "pl"
+ 38,
+ (float)0.9881622113600178,
};
diff --git a/src/LangModels/LangPortugueseModel.cpp b/src/LangModels/LangPortugueseModel.cpp
index 0b2dd1b..72eae0b 100644
--- a/src/LangModels/LangPortugueseModel.cpp
+++ b/src/LangModels/LangPortugueseModel.cpp
@@ -36,12 +36,13 @@
* ***** END LICENSE BLOCK ***** */
#include "../nsSBCharSetProber.h"
+#include "../nsLanguageDetector.h"
/********* Language model for: Portuguese *********/
/**
* Generated by BuildLangModel.py
- * On: 2016-09-20 23:47:27.348423
+ * On: 2021-03-16 19:59:19.803482
**/
/* Character Mapping Table:
@@ -61,181 +62,206 @@
* ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1
* even though they are both used for French. Same for the euro sign.
*/
-static const unsigned char Iso_8859_1_CharToOrderMap[] =
+static const unsigned char Iso_8859_15_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 0, 17, 10, 7, 1, 16, 14, 18, 4, 28, 34, 12, 9, 6, 2, /* 4X */
- 13, 21, 5, 3, 8, 11, 15, 32, 24, 31, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 0, 17, 10, 7, 1, 16, 14, 18, 4, 28, 34, 12, 9, 6, 2, /* 6X */
- 13, 21, 5, 3, 8, 11, 15, 32, 24, 31, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ SYM, 0, 17, 10, 7, 1, 16, 14, 18, 4, 29, 35, 12, 9, 6, 2, /* 4X */
+ 13, 22, 5, 3, 8, 11, 15, 34, 24, 31, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 0, 17, 10, 7, 1, 16, 14, 18, 4, 29, 35, 12, 9, 6, 2, /* 6X */
+ 13, 22, 5, 3, 8, 11, 15, 34, 24, 31, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
- SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
- SYM,SYM,SYM,SYM,SYM, 51,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
- 36, 25, 35, 20, 41, 42, 43, 22, 39, 19, 29, 44, 52, 23, 45, 47, /* CX */
- 48, 53, 46, 27, 37, 30, 38,SYM, 54, 55, 33, 56, 40, 57, 58, 49, /* DX */
- 36, 25, 35, 20, 41, 42, 43, 22, 39, 19, 29, 44, 59, 23, 45, 47, /* EX */
- 48, 60, 46, 27, 37, 30, 38,SYM, 61, 62, 33, 63, 40, 64, 65, 50, /* FX */
+ SYM,SYM,SYM,SYM,SYM,SYM, 51,SYM, 52,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
+ SYM,SYM,SYM,SYM, 53, 54,SYM,SYM, 55,SYM,SYM,SYM, 56, 57, 50,SYM, /* BX */
+ 36, 25, 33, 19, 41, 42, 43, 21, 38, 20, 28, 44, 58, 23, 45, 46, /* CX */
+ 47, 59, 49, 27, 37, 30, 40,SYM, 60, 61, 32, 62, 39, 63, 64, 48, /* DX */
+ 36, 25, 33, 19, 41, 42, 43, 21, 38, 20, 28, 44, 65, 23, 45, 46, /* EX */
+ 47, 66, 49, 27, 37, 30, 40,SYM, 67, 68, 32, 69, 39, 70, 71, 50, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
-static const unsigned char Iso_8859_9_CharToOrderMap[] =
+static const unsigned char Iso_8859_1_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 0, 17, 10, 7, 1, 16, 14, 18, 4, 28, 34, 12, 9, 6, 2, /* 4X */
- 13, 21, 5, 3, 8, 11, 15, 32, 24, 31, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 0, 17, 10, 7, 1, 16, 14, 18, 4, 28, 34, 12, 9, 6, 2, /* 6X */
- 13, 21, 5, 3, 8, 11, 15, 32, 24, 31, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ SYM, 0, 17, 10, 7, 1, 16, 14, 18, 4, 29, 35, 12, 9, 6, 2, /* 4X */
+ 13, 22, 5, 3, 8, 11, 15, 34, 24, 31, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 0, 17, 10, 7, 1, 16, 14, 18, 4, 29, 35, 12, 9, 6, 2, /* 6X */
+ 13, 22, 5, 3, 8, 11, 15, 34, 24, 31, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
- SYM,SYM,SYM,SYM,SYM, 66,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
- 36, 25, 35, 20, 41, 42, 43, 22, 39, 19, 29, 44, 67, 23, 45, 47, /* CX */
- 68, 69, 46, 27, 37, 30, 38,SYM, 70, 71, 33, 72, 40, 73, 74, 49, /* DX */
- 36, 25, 35, 20, 41, 42, 43, 22, 39, 19, 29, 44, 75, 23, 45, 47, /* EX */
- 76, 77, 46, 27, 37, 30, 38,SYM, 78, 79, 33, 80, 40, 81, 82, 50, /* FX */
+ SYM,SYM,SYM,SYM,SYM, 72,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
+ 36, 25, 33, 19, 41, 42, 43, 21, 38, 20, 28, 44, 73, 23, 45, 46, /* CX */
+ 47, 74, 49, 27, 37, 30, 40,SYM, 75, 76, 32, 77, 39, 78, 79, 48, /* DX */
+ 36, 25, 33, 19, 41, 42, 43, 21, 38, 20, 28, 44, 80, 23, 45, 46, /* EX */
+ 47, 81, 49, 27, 37, 30, 40,SYM, 82, 83, 32, 84, 39, 85, 86, 50, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
-static const unsigned char Iso_8859_15_CharToOrderMap[] =
+static const unsigned char Windows_1252_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 0, 17, 10, 7, 1, 16, 14, 18, 4, 28, 34, 12, 9, 6, 2, /* 4X */
- 13, 21, 5, 3, 8, 11, 15, 32, 24, 31, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 0, 17, 10, 7, 1, 16, 14, 18, 4, 28, 34, 12, 9, 6, 2, /* 6X */
- 13, 21, 5, 3, 8, 11, 15, 32, 24, 31, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */
- CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
- CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
- SYM,SYM,SYM,SYM,SYM,SYM, 83,SYM, 84,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
- SYM,SYM,SYM,SYM, 85, 86,SYM,SYM, 87,SYM,SYM,SYM, 88, 89, 50,SYM, /* BX */
- 36, 25, 35, 20, 41, 42, 43, 22, 39, 19, 29, 44, 90, 23, 45, 47, /* CX */
- 48, 91, 46, 27, 37, 30, 38,SYM, 92, 93, 33, 94, 40, 95, 96, 49, /* DX */
- 36, 25, 35, 20, 41, 42, 43, 22, 39, 19, 29, 44, 97, 23, 45, 47, /* EX */
- 48, 98, 46, 27, 37, 30, 38,SYM, 99,100, 33,101, 40,102,103, 50, /* FX */
+ SYM, 0, 17, 10, 7, 1, 16, 14, 18, 4, 29, 35, 12, 9, 6, 2, /* 4X */
+ 13, 22, 5, 3, 8, 11, 15, 34, 24, 31, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 0, 17, 10, 7, 1, 16, 14, 18, 4, 29, 35, 12, 9, 6, 2, /* 6X */
+ 13, 22, 5, 3, 8, 11, 15, 34, 24, 31, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ SYM,ILL,SYM, 87,SYM,SYM,SYM,SYM,SYM,SYM, 88,SYM, 89,ILL, 90,ILL, /* 8X */
+ ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 91,SYM, 92,ILL, 93, 50, /* 9X */
+ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
+ SYM,SYM,SYM,SYM,SYM, 94,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
+ 36, 25, 33, 19, 41, 42, 43, 21, 38, 20, 28, 44, 95, 23, 45, 46, /* CX */
+ 47, 96, 49, 27, 37, 30, 40,SYM, 97, 98, 32, 99, 39,100,101, 48, /* DX */
+ 36, 25, 33, 19, 41, 42, 43, 21, 38, 20, 28, 44,102, 23, 45, 46, /* EX */
+ 47,103, 49, 27, 37, 30, 40,SYM,104,105, 32,106, 39,107,108, 50, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
-static const unsigned char Windows_1252_CharToOrderMap[] =
+static const unsigned char Iso_8859_9_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 0, 17, 10, 7, 1, 16, 14, 18, 4, 28, 34, 12, 9, 6, 2, /* 4X */
- 13, 21, 5, 3, 8, 11, 15, 32, 24, 31, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 0, 17, 10, 7, 1, 16, 14, 18, 4, 28, 34, 12, 9, 6, 2, /* 6X */
- 13, 21, 5, 3, 8, 11, 15, 32, 24, 31, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */
- SYM,ILL,SYM,104,SYM,SYM,SYM,SYM,SYM,SYM,105,SYM,106,ILL,107,ILL, /* 8X */
- ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,108,SYM,109,ILL,110, 50, /* 9X */
+ SYM, 0, 17, 10, 7, 1, 16, 14, 18, 4, 29, 35, 12, 9, 6, 2, /* 4X */
+ 13, 22, 5, 3, 8, 11, 15, 34, 24, 31, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 0, 17, 10, 7, 1, 16, 14, 18, 4, 29, 35, 12, 9, 6, 2, /* 6X */
+ 13, 22, 5, 3, 8, 11, 15, 34, 24, 31, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
+ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
- SYM,SYM,SYM,SYM,SYM,111,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
- 36, 25, 35, 20, 41, 42, 43, 22, 39, 19, 29, 44,112, 23, 45, 47, /* CX */
- 48,113, 46, 27, 37, 30, 38,SYM,114,115, 33,116, 40,117,118, 49, /* DX */
- 36, 25, 35, 20, 41, 42, 43, 22, 39, 19, 29, 44,119, 23, 45, 47, /* EX */
- 48,120, 46, 27, 37, 30, 38,SYM,121,122, 33,123, 40,124,125, 50, /* FX */
+ SYM,SYM,SYM,SYM,SYM,109,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
+ 36, 25, 33, 19, 41, 42, 43, 21, 38, 20, 28, 44,110, 23, 45, 46, /* CX */
+ 111,112, 49, 27, 37, 30, 40,SYM,113,114, 32,115, 39,116,117, 48, /* DX */
+ 36, 25, 33, 19, 41, 42, 43, 21, 38, 20, 28, 44,118, 23, 45, 46, /* EX */
+ 119,120, 49, 27, 37, 30, 40,SYM,121,122, 32,123, 39,124,125, 50, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
+static const int Unicode_Char_size = 76;
+static const unsigned int Unicode_CharOrder[] =
+{
+ 65, 0, 66, 17, 67, 10, 68, 7, 69, 1, 70, 16, 71, 14, 72, 18,
+ 73, 4, 74, 29, 75, 35, 76, 12, 77, 9, 78, 6, 79, 2, 80, 13,
+ 81, 22, 82, 5, 83, 3, 84, 8, 85, 11, 86, 15, 87, 34, 88, 24,
+ 89, 31, 90, 26, 97, 0, 98, 17, 99, 10, 100, 7, 101, 1,102, 16,
+ 103, 14, 104, 18, 105, 4, 106, 29, 107, 35, 108, 12, 109, 9,110, 6,
+ 111, 2, 112, 13, 113, 22, 114, 5, 115, 3, 116, 8, 117, 11,118, 15,
+ 119, 34, 120, 24, 121, 31, 122, 26, 192, 36, 193, 25, 194, 33,195, 19,
+ 199, 21, 201, 20, 202, 28, 205, 23, 211, 27, 212, 37, 213, 30,218, 32,
+ 224, 36, 225, 25, 226, 33, 227, 19, 231, 21, 233, 20, 234, 28,237, 23,
+ 243, 27, 244, 37, 245, 30, 250, 32,
+};
+
/* Model Table:
- * Total sequences: 891
- * First 512 sequences: 0.9953179582313172
- * Next 512 sequences (512-1024): 0.0046820417686827855
- * Rest: 2.42861286636753e-17
+ * Total sequences: 929
+ * First 512 sequences: 0.9952990712503466
+ * Next 512 sequences (512-1024): 0.004700928749653451
+ * Rest: -7.806255641895632e-18
* Negative sequences: TODO
*/
static const PRUint8 PortugueseLangModel[] =
{
- 2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,3,3,3,3,0,3,2,3,0,0,3,2,2,3,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,2,3,3,2,3,2,3,2,3,0,2,3,3,2,2,2,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,2,3,2,3,2,3,0,2,3,3,0,3,0,0,0,
- 3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,3,0,3,0,3,2,3,0,2,3,3,2,2,3,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,0,3,3,3,3,2,3,3,2,2,2,3,2,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,3,2,3,3,3,2,2,3,3,0,3,
- 3,3,3,3,3,2,3,3,3,2,3,3,2,2,3,3,3,2,3,3,3,3,3,3,2,3,3,3,3,2,0,3,2,3,3,2,0,3,
- 3,3,3,3,3,3,2,3,2,3,2,3,3,2,3,2,2,2,2,3,3,2,0,3,0,3,0,3,2,3,2,3,3,3,0,2,0,2,
- 3,3,3,3,3,3,3,0,3,3,3,3,3,2,2,2,2,2,3,3,3,0,0,3,0,3,2,3,0,3,2,3,2,2,2,3,0,3,
- 3,3,3,3,3,2,3,2,2,3,2,3,2,3,2,0,2,3,0,3,3,2,0,3,0,3,2,3,0,2,2,3,2,3,0,3,0,3,
- 3,3,3,2,3,3,3,2,3,3,3,3,3,2,2,0,2,2,3,3,2,2,3,3,0,3,2,3,0,3,2,3,0,2,3,3,0,2,
- 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,3,2,2,3,3,3,2,3,0,3,3,0,2,2,0,2,0,0,0,
- 3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,0,3,2,3,0,3,0,3,2,2,2,3,0,3,
- 3,3,3,3,3,3,2,2,3,0,2,3,3,3,0,0,0,2,3,3,2,2,3,3,0,3,2,3,0,2,2,2,0,3,0,2,0,2,
- 3,3,3,3,3,3,3,2,2,3,2,3,3,2,2,2,0,2,3,3,2,0,0,2,0,3,0,2,0,3,2,3,2,2,0,2,0,0,
- 3,3,3,0,3,3,0,2,0,0,0,3,0,0,0,2,0,0,0,3,2,0,0,3,0,3,0,2,0,3,2,0,0,0,0,2,0,2,
- 3,3,3,2,3,3,0,2,2,2,2,3,3,2,2,0,3,2,0,3,0,0,0,3,0,2,0,3,0,3,0,2,0,2,0,0,0,2,
- 3,3,3,3,3,3,3,3,3,2,2,3,3,2,2,2,3,2,2,3,2,0,0,2,0,2,2,2,3,2,0,2,2,2,0,0,0,0,
- 3,3,3,3,3,3,3,2,3,2,0,3,3,0,0,0,2,2,2,2,3,0,0,2,0,3,0,2,0,0,3,3,2,0,2,0,0,0,
- 2,2,2,3,2,3,3,3,3,3,3,2,3,3,2,2,2,0,0,0,0,0,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,
+ 2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,3,3,3,3,0,3,2,0,3,0,3,2,0,2,3,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,3,3,2,3,2,3,2,0,3,2,3,2,2,3,2,0,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,2,3,2,3,2,0,3,2,3,0,0,3,3,0,0,
+ 3,3,3,3,3,2,2,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,3,3,0,3,2,3,3,0,3,3,2,2,2,3,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,2,3,3,3,3,3,2,3,2,2,2,2,3,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,2,3,3,2,3,3,3,0,2,
+ 3,3,3,3,3,3,3,3,3,2,3,3,2,2,3,3,3,2,3,3,3,3,3,3,2,3,3,3,2,3,0,3,3,2,2,3,0,3,
+ 3,3,3,3,3,3,2,2,2,3,2,3,2,2,3,2,2,2,2,3,3,0,2,3,0,3,2,2,3,3,2,3,3,2,3,0,0,2,
+ 3,3,3,3,3,3,3,2,3,3,3,3,3,2,2,2,2,2,3,3,3,0,0,3,0,3,2,3,3,2,2,3,2,3,2,2,0,3,
+ 3,3,3,3,3,2,3,2,2,3,2,3,2,3,2,0,2,3,0,3,3,0,2,3,2,3,2,3,2,0,2,3,3,3,2,0,0,3,
+ 3,3,3,2,3,3,3,2,3,3,3,3,3,2,2,0,2,2,3,3,3,3,3,3,0,3,2,3,3,0,2,3,2,3,0,3,0,2,
+ 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,2,3,3,2,3,3,3,3,2,3,3,0,2,0,0,2,2,0,0,
+ 3,3,3,3,3,2,2,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,0,3,2,3,3,0,0,3,2,3,2,2,0,3,
+ 3,3,3,3,3,3,2,2,3,2,2,3,3,3,0,0,2,2,3,2,3,3,2,3,0,3,2,3,2,0,2,2,3,2,0,0,0,2,
+ 3,3,3,2,3,3,3,2,2,3,2,3,3,2,2,2,2,2,3,2,3,0,0,2,0,3,0,2,3,0,2,3,2,2,2,0,0,0,
+ 3,3,3,0,3,3,0,2,0,0,0,3,2,0,0,2,0,0,0,2,3,0,0,3,0,3,0,0,2,0,2,0,0,2,0,0,0,2,
+ 3,3,3,2,3,3,0,2,2,2,2,3,3,2,2,0,3,2,0,0,3,0,0,3,0,2,0,3,3,0,0,2,2,2,0,0,0,2,
+ 3,3,3,3,3,3,3,3,3,2,2,3,3,2,2,2,3,2,2,2,3,0,0,2,0,2,2,2,2,3,0,2,2,0,2,0,0,0,
+ 3,3,3,3,3,3,3,2,3,2,0,3,2,0,0,0,2,2,2,3,2,0,0,2,0,3,0,2,0,0,3,3,2,2,2,2,0,0,
0,2,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 2,2,2,3,2,3,3,3,3,3,3,3,3,3,3,2,2,2,2,0,0,0,2,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,
+ 3,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,3,0,2,0,0,0,0,0,
2,0,0,2,0,0,0,0,0,0,0,3,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,
3,0,3,3,0,3,3,3,3,3,3,0,3,3,3,3,3,3,0,0,0,2,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,2,3,0,0,0,3,0,3,3,2,3,0,3,2,0,2,2,2,0,0,2,3,2,0,2,2,0,2,0,0,0,0,0,0,2,
+ 3,3,3,2,3,0,0,2,3,0,3,3,2,3,0,3,2,0,2,2,2,0,0,2,3,2,0,2,0,2,2,0,0,0,0,0,0,2,
0,0,0,3,0,3,2,2,3,0,3,2,3,3,3,3,3,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,0,3,2,2,0,0,2,2,3,0,0,0,0,0,2,2,2,2,0,0,0,0,2,2,2,0,2,2,0,2,0,0,2,0,0,
- 0,0,0,3,2,3,3,3,3,3,3,0,3,3,3,2,3,2,0,0,0,2,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,2,2,0,2,0,0,0,2,3,0,2,0,0,0,0,0,0,2,0,0,0,0,3,0,0,0,0,0,0,0,2,2,0,0,0,
+ 3,3,3,0,3,2,2,0,0,2,2,3,2,2,2,0,0,2,2,2,2,0,0,0,0,2,2,2,2,0,2,2,0,2,2,2,0,0,
+ 0,0,0,3,3,3,3,3,3,3,3,0,3,3,3,2,2,2,0,0,0,0,2,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,3,0,0,3,0,2,3,0,2,2,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,2,2,0,2,0,0,0,2,3,0,2,0,0,0,0,0,2,0,0,0,0,0,3,0,0,0,0,0,0,2,0,0,2,0,2,
0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,2,3,2,2,3,2,3,2,3,2,2,0,2,2,2,0,0,0,0,0,3,0,2,0,2,0,0,0,2,0,2,0,0,0,
- 3,3,3,2,3,2,2,2,3,2,2,2,2,0,0,2,0,2,3,0,0,0,0,0,0,0,2,0,0,0,0,2,2,0,2,0,0,0,
- 0,0,0,3,0,2,3,3,2,3,2,0,3,2,0,2,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,2,3,2,2,0,0,3,2,2,2,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0,2,0,0,0,
- 0,0,0,0,0,0,3,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,2,2,0,0,3,2,2,3,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,2,3,3,2,3,3,2,3,2,3,2,3,2,2,0,2,2,2,0,0,0,0,0,2,0,2,0,0,2,0,0,0,0,2,2,0,0,
+ 0,0,0,3,0,2,3,3,2,3,3,0,3,2,0,2,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,3,0,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,2,3,2,2,2,3,2,2,2,2,0,2,2,2,2,2,0,0,0,0,0,0,0,2,0,0,0,0,2,0,0,2,2,0,0,
+ 3,3,3,2,3,2,2,0,0,3,2,2,2,0,2,0,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,2,0,0,2,2,0,0,
+ 0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,2,0,2,3,2,2,3,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
};
+const SequenceModel Iso_8859_15PortugueseModel =
+{
+ Iso_8859_15_CharToOrderMap,
+ PortugueseLangModel,
+ 38,
+ (float)0.9952990712503466,
+ PR_TRUE,
+ "ISO-8859-15",
+ "pt"
+};
+
const SequenceModel Iso_8859_1PortugueseModel =
{
Iso_8859_1_CharToOrderMap,
PortugueseLangModel,
38,
- (float)0.9953179582313172,
+ (float)0.9952990712503466,
PR_TRUE,
"ISO-8859-1",
"pt"
};
-const SequenceModel Iso_8859_9PortugueseModel =
+const SequenceModel Windows_1252PortugueseModel =
{
- Iso_8859_9_CharToOrderMap,
+ Windows_1252_CharToOrderMap,
PortugueseLangModel,
38,
- (float)0.9953179582313172,
+ (float)0.9952990712503466,
PR_TRUE,
- "ISO-8859-9",
+ "WINDOWS-1252",
"pt"
};
-const SequenceModel Iso_8859_15PortugueseModel =
+const SequenceModel Iso_8859_9PortugueseModel =
{
- Iso_8859_15_CharToOrderMap,
+ Iso_8859_9_CharToOrderMap,
PortugueseLangModel,
38,
- (float)0.9953179582313172,
+ (float)0.9952990712503466,
PR_TRUE,
- "ISO-8859-15",
+ "ISO-8859-9",
"pt"
};
-const SequenceModel Windows_1252PortugueseModel =
+const LanguageModel PortugueseModel =
{
- Windows_1252_CharToOrderMap,
+ "pt",
+ Unicode_CharOrder,
+ 76,
PortugueseLangModel,
38,
- (float)0.9953179582313172,
- PR_TRUE,
- "WINDOWS-1252",
- "pt"
+ (float)0.9952990712503466,
};
diff --git a/src/LangModels/LangRomanianModel.cpp b/src/LangModels/LangRomanianModel.cpp
index cfb1b8d..430f51d 100644
--- a/src/LangModels/LangRomanianModel.cpp
+++ b/src/LangModels/LangRomanianModel.cpp
@@ -36,12 +36,13 @@
* ***** END LICENSE BLOCK ***** */
#include "../nsSBCharSetProber.h"
+#include "../nsLanguageDetector.h"
/********* Language model for: Romanian *********/
/**
* Generated by BuildLangModel.py
- * On: 2016-09-28 18:58:13.757152
+ * On: 2021-03-16 20:04:01.199893
**/
/* Character Mapping Table:
@@ -61,45 +62,45 @@
* ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1
* even though they are both used for French. Same for the euro sign.
*/
-static const unsigned char Iso_8859_16_CharToOrderMap[] =
+static const unsigned char Iso_8859_2_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
SYM, 2, 17, 9, 11, 0, 16, 15, 23, 1, 26, 27, 6, 12, 4, 8, /* 4X */
- 13, 32, 3, 10, 5, 7, 21, 29, 25, 28, 22,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ 13, 32, 3, 10, 5, 7, 20, 29, 25, 28, 22,SYM,SYM,SYM,SYM,SYM, /* 5X */
SYM, 2, 17, 9, 11, 0, 16, 15, 23, 1, 26, 27, 6, 12, 4, 8, /* 6X */
- 13, 32, 3, 10, 5, 7, 21, 29, 25, 28, 22,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ 13, 32, 3, 10, 5, 7, 20, 29, 25, 28, 22,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
- SYM, 60, 61, 46,SYM,SYM, 38,SYM, 38,SYM, 19,SYM, 62,SYM, 63, 64, /* AX */
- SYM,SYM, 41, 46, 40,SYM,SYM,SYM, 40, 41, 19,SYM, 65, 66, 67, 68, /* BX */
- 69, 30, 24, 14, 33, 35, 53, 42, 45, 31, 58, 49, 70, 37, 20, 48, /* CX */
- 43, 52, 59, 34, 71, 44, 36, 56, 50, 72, 47, 73, 39, 74, 18, 57, /* DX */
- 75, 30, 24, 14, 33, 35, 53, 42, 45, 31, 58, 49, 76, 37, 20, 48, /* EX */
- 43, 52, 59, 34, 77, 44, 36, 56, 50, 78, 47, 79, 39, 80, 18, 81, /* FX */
+ SYM, 63,SYM, 45,SYM, 64, 58,SYM,SYM, 34, 65, 66, 67,SYM, 35, 59, /* AX */
+ SYM, 68,SYM, 45,SYM, 69, 58,SYM,SYM, 34, 70, 71, 72,SYM, 35, 59, /* BX */
+ 73, 30, 24, 14, 36, 74, 37, 42, 33, 31, 75, 54, 50, 39, 21, 76, /* CX */
+ 46, 51, 77, 38, 53, 47, 40,SYM, 52, 78, 44, 62, 41, 55, 79, 57, /* DX */
+ 80, 30, 24, 14, 36, 81, 37, 42, 33, 31, 82, 54, 50, 39, 21, 83, /* EX */
+ 46, 51, 84, 38, 53, 47, 40,SYM, 52, 85, 44, 62, 41, 55, 86,SYM, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
-static const unsigned char Iso_8859_2_CharToOrderMap[] =
+static const unsigned char Iso_8859_16_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
SYM, 2, 17, 9, 11, 0, 16, 15, 23, 1, 26, 27, 6, 12, 4, 8, /* 4X */
- 13, 32, 3, 10, 5, 7, 21, 29, 25, 28, 22,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ 13, 32, 3, 10, 5, 7, 20, 29, 25, 28, 22,SYM,SYM,SYM,SYM,SYM, /* 5X */
SYM, 2, 17, 9, 11, 0, 16, 15, 23, 1, 26, 27, 6, 12, 4, 8, /* 6X */
- 13, 32, 3, 10, 5, 7, 21, 29, 25, 28, 22,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ 13, 32, 3, 10, 5, 7, 20, 29, 25, 28, 22,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
- SYM, 82,SYM, 46,SYM, 83, 56,SYM,SYM, 38, 84, 85, 86,SYM, 40, 87, /* AX */
- SYM, 88,SYM, 46,SYM, 89, 56,SYM,SYM, 38, 90, 91, 92,SYM, 40, 93, /* BX */
- 94, 30, 24, 14, 33, 95, 35, 42, 41, 31, 96, 49, 51, 37, 20, 97, /* CX */
- 43, 52, 98, 34, 99, 44, 36,SYM, 55,100, 47, 50, 39, 54,101, 57, /* DX */
- 102, 30, 24, 14, 33,103, 35, 42, 41, 31,104, 49, 51, 37, 20,105, /* EX */
- 43, 52,106, 34,107, 44, 36,SYM, 55,108, 47, 50, 39, 54,109,SYM, /* FX */
+ SYM, 87, 88, 45,SYM,SYM, 34,SYM, 34,SYM, 19,SYM, 89,SYM, 90, 59, /* AX */
+ SYM,SYM, 33, 45, 35,SYM,SYM,SYM, 35, 33, 19,SYM, 91, 92, 93, 59, /* BX */
+ 60, 30, 24, 14, 36, 37, 56, 42, 43, 31, 94, 54, 48, 39, 21, 49, /* CX */
+ 46, 51, 61, 38, 53, 47, 40, 58, 62, 95, 44, 96, 41, 97, 18, 57, /* DX */
+ 60, 30, 24, 14, 36, 37, 56, 42, 43, 31, 98, 54, 48, 39, 21, 49, /* EX */
+ 46, 51, 61, 38, 53, 47, 40, 58, 62, 99, 44,100, 41,101, 18,102, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
@@ -110,17 +111,17 @@ static const unsigned char Windows_1250_CharToOrderMap[] =
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
SYM, 2, 17, 9, 11, 0, 16, 15, 23, 1, 26, 27, 6, 12, 4, 8, /* 4X */
- 13, 32, 3, 10, 5, 7, 21, 29, 25, 28, 22,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ 13, 32, 3, 10, 5, 7, 20, 29, 25, 28, 22,SYM,SYM,SYM,SYM,SYM, /* 5X */
SYM, 2, 17, 9, 11, 0, 16, 15, 23, 1, 26, 27, 6, 12, 4, 8, /* 6X */
- 13, 32, 3, 10, 5, 7, 21, 29, 25, 28, 22,SYM,SYM,SYM,SYM,CTR, /* 7X */
- SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM, 38,SYM, 56,110, 40,111, /* 8X */
- ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 38,SYM, 56,112, 40,113, /* 9X */
- SYM,SYM,SYM, 46,SYM,114,SYM,SYM,SYM,SYM,115,SYM,SYM,SYM,SYM,116, /* AX */
- SYM,SYM,SYM, 46,SYM,SYM,SYM,SYM,SYM,117,118,SYM,119,SYM,120,121, /* BX */
- 122, 30, 24, 14, 33,123, 35, 42, 41, 31,124, 49, 51, 37, 20,125, /* CX */
- 43, 52,126, 34,127, 44, 36,SYM, 55,128, 47, 50, 39, 54,129, 57, /* DX */
- 130, 30, 24, 14, 33,131, 35, 42, 41, 31,132, 49, 51, 37, 20,133, /* EX */
- 43, 52,134, 34,135, 44, 36,SYM, 55,136, 47, 50, 39, 54,137,SYM, /* FX */
+ 13, 32, 3, 10, 5, 7, 20, 29, 25, 28, 22,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM, 34,SYM, 58,103, 35,104, /* 8X */
+ ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 34,SYM, 58,105, 35,106, /* 9X */
+ SYM,SYM,SYM, 45,SYM,107,SYM,SYM,SYM,SYM,108,SYM,SYM,SYM,SYM, 59, /* AX */
+ SYM,SYM,SYM, 45,SYM,SYM,SYM,SYM,SYM,109,110,SYM,111,SYM,112, 59, /* BX */
+ 113, 30, 24, 14, 36,114, 37, 42, 33, 31,115, 54, 50, 39, 21,116, /* CX */
+ 46, 51,117, 38, 53, 47, 40,SYM, 52,118, 44, 62, 41, 55,119, 57, /* DX */
+ 120, 30, 24, 14, 36,121, 37, 42, 33, 31,122, 54, 50, 39, 21,123, /* EX */
+ 46, 51,124, 38, 53, 47, 40,SYM, 52,125, 44, 62, 41, 55,126,SYM, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
@@ -131,85 +132,99 @@ static const unsigned char Ibm852_CharToOrderMap[] =
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
SYM, 2, 17, 9, 11, 0, 16, 15, 23, 1, 26, 27, 6, 12, 4, 8, /* 4X */
- 13, 32, 3, 10, 5, 7, 21, 29, 25, 28, 22,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ 13, 32, 3, 10, 5, 7, 20, 29, 25, 28, 22,SYM,SYM,SYM,SYM,SYM, /* 5X */
SYM, 2, 17, 9, 11, 0, 16, 15, 23, 1, 26, 27, 6, 12, 4, 8, /* 6X */
- 13, 32, 3, 10, 5, 7, 21, 29, 25, 28, 22,SYM,SYM,SYM,SYM,CTR, /* 7X */
- 42, 39, 31, 24, 33,138, 35, 42, 46, 49, 44, 44, 20,139, 33, 35, /* 8X */
- 31,140,141,142, 36,143,144, 56, 56, 36, 39,145,146, 46,SYM, 41, /* 9X */
- 30, 37, 34, 47,147,148, 40, 40,149,150,SYM,151, 41,152,SYM,SYM, /* AX */
- SYM,SYM,SYM,SYM,SYM, 30, 24, 51,153,SYM,SYM,SYM,SYM,154,155,SYM, /* BX */
+ 13, 32, 3, 10, 5, 7, 20, 29, 25, 28, 22,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ 42, 41, 31, 24, 36,127, 37, 42, 45, 54, 47, 47, 21,128, 36, 37, /* 8X */
+ 31,129,130, 53, 40,131,132, 58, 58, 40, 41,133,134, 45,SYM, 33, /* 9X */
+ 30, 39, 38, 44,135,136, 35, 35,137,138,SYM,139, 33,140,SYM,SYM, /* AX */
+ SYM,SYM,SYM,SYM,SYM, 30, 24, 50,141,SYM,SYM,SYM,SYM, 59, 59,SYM, /* BX */
SYM,SYM,SYM,SYM,SYM,SYM, 14, 14,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* CX */
- 43, 43,156, 49,157,158, 37, 20, 51,SYM,SYM,SYM,SYM,159,160,SYM, /* DX */
- 34, 57,161, 52, 52,162, 38, 38,163, 47,164, 50, 54, 54,165,SYM, /* EX */
- SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 50, 55, 55,SYM,SYM, /* FX */
+ 46, 46,142, 54,143,144, 39, 21, 50,SYM,SYM,SYM,SYM,145,146,SYM, /* DX */
+ 38, 57, 53, 51, 51,147, 34, 34,148, 44,149, 62, 55, 55,150,SYM, /* EX */
+ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 62, 52, 52,SYM,SYM, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
+static const int Unicode_Char_size = 66;
+static const unsigned int Unicode_CharOrder[] =
+{
+ 65, 2, 66, 17, 67, 9, 68, 11, 69, 0, 70, 16, 71, 15, 72, 23,
+ 73, 1, 74, 26, 75, 27, 76, 6, 77, 12, 78, 4, 79, 8, 80, 13,
+ 81, 32, 82, 3, 83, 10, 84, 5, 85, 7, 86, 20, 87, 29, 88, 25,
+ 89, 28, 90, 22, 97, 2, 98, 17, 99, 9, 100, 11, 101, 0,102, 16,
+ 103, 15, 104, 23, 105, 1, 106, 26, 107, 27, 108, 6, 109, 12,110, 4,
+ 111, 8, 112, 13, 113, 32, 114, 3, 115, 10, 116, 5, 117, 7,118, 20,
+ 119, 29, 120, 25, 121, 28, 122, 22, 193, 30, 194, 24, 201, 31,206, 21,
+ 225, 30, 226, 24, 233, 31, 238, 21, 258, 14, 259, 14, 536, 19,537, 19,
+ 538, 18, 539, 18,
+};
+
/* Model Table:
- * Total sequences: 981
- * First 512 sequences: 0.997762564143313
- * Next 512 sequences (512-1024): 0.002237435856687006
- * Rest: 3.0357660829594124e-18
+ * Total sequences: 1066
+ * First 512 sequences: 0.9975318123681904
+ * Next 512 sequences (512-1024): 0.002424831763747681
+ * Rest: 4.3355868061878584e-05
* Negative sequences: TODO
*/
static const PRUint8 RomanianLangModel[] =
{
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,2,0,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,0,3,3,3,2,3,3,3,2,2,0,0,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,0,3,3,3,0,3,3,3,3,3,0,2,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,2,3,3,2,2,2,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,0,3,3,3,3,2,3,3,3,3,2,2,2,
- 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,0,2,2,3,3,3,3,0,2,2,3,3,2,3,0,
- 3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,2,3,0,3,3,3,2,2,2,0,
- 3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,2,2,0,2,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,0,3,3,3,0,3,2,3,3,3,2,0,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,2,2,3,2,0,3,2,3,3,0,3,3,2,2,0,2,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,0,2,2,3,3,3,0,2,3,3,3,2,2,2,
- 3,3,3,3,3,2,3,3,3,2,3,3,3,2,3,3,2,3,0,0,0,3,2,3,3,0,2,2,3,3,3,2,0,
- 3,3,3,2,3,3,3,3,3,3,3,2,3,3,3,2,2,3,3,2,2,2,2,3,3,2,0,0,3,2,2,2,0,
- 3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,0,0,2,3,0,2,0,2,3,3,0,2,2,3,0,2,2,0,
- 2,3,0,3,3,3,3,3,0,3,3,3,3,3,0,3,0,3,3,3,0,3,3,0,0,0,2,2,0,0,0,0,0,
- 3,3,3,3,3,2,3,3,3,0,2,3,3,2,3,3,2,3,0,0,2,3,2,3,3,0,2,0,3,2,2,2,0,
- 3,3,3,3,0,3,3,3,3,2,2,2,3,2,3,2,3,0,0,0,0,0,0,2,3,0,0,0,2,0,2,2,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,3,3,2,0,2,2,2,3,0,2,2,3,2,2,2,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,0,3,3,2,3,3,3,2,2,2,0,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,1,3,3,0,3,3,3,3,3,0,2,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,2,2,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,2,2,2,
+ 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,0,2,3,2,3,3,3,0,2,2,3,3,2,3,0,
+ 3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,3,2,3,0,3,3,3,2,2,2,0,
+ 3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,2,3,3,3,2,2,0,2,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,3,3,3,3,2,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,2,2,3,2,3,2,2,3,3,2,2,3,3,2,0,2,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,2,3,3,3,0,2,3,3,3,2,2,2,
+ 3,3,3,3,3,2,3,3,3,2,3,3,3,2,3,3,3,3,0,0,3,0,2,2,3,0,2,2,3,3,3,2,0,
+ 3,3,3,2,3,3,3,3,3,3,3,2,3,3,3,2,2,3,3,2,2,2,2,2,3,2,2,0,3,2,2,2,0,
+ 3,3,3,3,2,3,3,3,3,2,3,2,2,3,3,0,0,2,3,0,0,2,0,3,3,0,2,2,2,0,2,2,0,
+ 2,3,1,3,3,3,3,3,0,3,3,3,3,3,0,3,1,3,3,3,3,2,3,0,0,0,2,2,0,0,0,0,0,
+ 3,3,3,3,3,2,3,3,3,2,3,3,3,2,3,3,2,3,0,0,3,1,2,3,3,0,1,2,3,2,1,2,0,
+ 3,3,3,3,2,3,3,3,3,2,2,2,3,2,3,3,3,1,0,0,0,0,0,2,3,0,2,1,2,2,2,2,0,
+ 3,3,3,3,3,2,3,3,3,3,3,3,2,2,3,2,2,3,3,2,2,2,2,2,3,0,2,2,3,2,2,2,0,
3,3,3,0,0,0,0,3,2,2,2,0,0,0,3,0,0,0,0,0,2,2,0,0,2,0,0,2,0,0,0,0,0,
- 3,3,3,0,3,3,3,3,3,3,0,2,2,0,3,0,0,0,0,0,0,2,0,0,2,0,0,2,0,0,0,0,0,
- 0,3,0,2,3,0,3,0,0,0,0,0,3,0,0,0,0,0,2,3,0,0,2,2,0,0,0,2,0,0,0,0,0,
- 3,3,3,3,3,2,3,3,3,2,2,3,2,0,3,2,2,2,0,0,0,0,0,0,3,0,2,2,2,0,2,0,0,
- 3,3,3,2,2,2,2,3,3,0,2,3,2,2,3,2,0,3,0,0,0,3,3,2,3,0,0,2,2,0,2,2,0,
- 3,3,3,3,3,3,3,3,3,2,3,2,2,2,3,0,2,3,0,0,0,2,2,0,2,0,2,2,3,2,2,2,0,
- 0,3,0,3,3,3,3,3,0,2,2,2,3,0,0,0,0,0,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,2,0,3,0,3,3,3,2,0,0,3,3,0,3,0,0,0,0,3,0,2,2,3,0,0,3,0,0,0,0,
- 3,3,3,2,2,2,3,3,3,0,2,2,2,0,2,0,0,2,0,0,0,2,0,0,2,0,0,2,0,0,2,0,0,
- 3,3,3,3,2,3,3,3,3,2,3,2,3,2,2,2,2,2,2,2,2,2,0,3,0,0,0,2,3,2,2,2,0,
- 3,2,3,3,3,2,3,2,3,3,3,3,3,2,0,2,0,2,0,0,0,2,2,2,0,0,2,2,0,2,2,0,0,
- 3,3,3,2,3,2,2,2,3,2,3,2,2,2,0,0,2,2,0,0,0,0,0,3,0,0,0,0,2,3,0,0,0,
- 2,3,0,3,3,2,2,0,0,2,2,2,2,0,0,2,0,0,0,0,0,0,2,0,0,0,0,2,0,0,0,0,0,
- 0,3,2,2,2,2,2,0,0,2,2,2,2,2,0,2,0,2,0,0,0,2,2,0,0,0,2,2,0,0,0,0,0,
- 0,0,2,0,0,2,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,
+ 3,3,3,0,3,3,2,3,3,3,0,0,2,2,2,0,0,0,0,0,2,0,0,0,2,0,0,2,0,0,0,0,0,
+ 3,3,3,3,3,2,3,3,3,2,2,3,2,1,3,2,0,2,0,0,0,2,2,2,3,0,2,2,2,0,2,0,0,
+ 0,3,0,2,3,0,3,0,0,2,0,0,3,0,0,2,0,0,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,2,2,2,2,3,3,0,2,3,2,2,3,2,0,3,0,0,3,0,3,2,3,0,2,2,2,2,2,2,2,
+ 3,3,3,3,3,3,3,3,3,2,3,3,2,2,3,0,2,3,0,0,2,0,2,0,2,0,2,2,3,2,2,2,0,
+ 0,3,0,3,3,3,3,3,0,1,2,0,3,0,0,0,0,0,2,3,0,0,2,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,2,2,3,0,3,3,3,2,2,2,3,3,0,3,2,0,0,3,0,0,0,2,3,0,0,3,2,0,0,0,
+ 3,3,3,2,2,2,3,3,3,2,2,2,2,0,2,0,0,2,0,0,1,0,2,0,2,0,0,2,0,0,2,0,0,
+ 3,3,3,3,2,2,3,3,3,2,3,1,3,2,2,2,2,2,1,2,2,0,2,3,2,0,2,2,3,2,2,2,0,
+ 3,2,3,3,3,2,3,2,3,3,3,3,3,2,0,2,2,2,0,0,2,0,2,2,0,2,2,2,0,2,2,0,0,
+ 3,3,3,3,3,2,3,2,3,2,3,2,2,2,0,0,2,2,0,0,0,0,0,3,0,0,0,2,2,3,0,0,0,
+ 2,3,0,2,3,2,2,0,0,2,2,2,2,2,0,2,0,0,0,0,2,0,2,2,0,0,0,2,2,0,0,0,0,
+ 0,3,2,2,1,2,2,0,2,2,2,2,2,2,0,2,0,2,0,0,2,0,2,0,0,0,2,2,0,0,0,0,0,
+ 0,2,2,0,0,2,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,
};
-const SequenceModel Iso_8859_16RomanianModel =
+const SequenceModel Iso_8859_2RomanianModel =
{
- Iso_8859_16_CharToOrderMap,
+ Iso_8859_2_CharToOrderMap,
RomanianLangModel,
33,
- (float)0.997762564143313,
+ (float)0.9975318123681904,
PR_TRUE,
- "ISO-8859-16",
+ "ISO-8859-2",
"ro"
};
-const SequenceModel Iso_8859_2RomanianModel =
+const SequenceModel Iso_8859_16RomanianModel =
{
- Iso_8859_2_CharToOrderMap,
+ Iso_8859_16_CharToOrderMap,
RomanianLangModel,
33,
- (float)0.997762564143313,
+ (float)0.9975318123681904,
PR_TRUE,
- "ISO-8859-2",
+ "ISO-8859-16",
"ro"
};
@@ -218,7 +233,7 @@ const SequenceModel Windows_1250RomanianModel =
Windows_1250_CharToOrderMap,
RomanianLangModel,
33,
- (float)0.997762564143313,
+ (float)0.9975318123681904,
PR_TRUE,
"WINDOWS-1250",
"ro"
@@ -229,8 +244,18 @@ const SequenceModel Ibm852RomanianModel =
Ibm852_CharToOrderMap,
RomanianLangModel,
33,
- (float)0.997762564143313,
+ (float)0.9975318123681904,
PR_TRUE,
"IBM852",
"ro"
};
+
+const LanguageModel RomanianModel =
+{
+ "ro",
+ Unicode_CharOrder,
+ 66,
+ RomanianLangModel,
+ 33,
+ (float)0.9975318123681904,
+};
diff --git a/src/LangModels/LangSlovakModel.cpp b/src/LangModels/LangSlovakModel.cpp
index 480b4b5..ffc3410 100644
--- a/src/LangModels/LangSlovakModel.cpp
+++ b/src/LangModels/LangSlovakModel.cpp
@@ -36,12 +36,13 @@
* ***** END LICENSE BLOCK ***** */
#include "../nsSBCharSetProber.h"
+#include "../nsLanguageDetector.h"
/********* Language model for: Slovak *********/
/**
* Generated by BuildLangModel.py
- * On: 2016-09-21 13:33:10.331339
+ * On: 2021-03-16 20:13:09.022988
**/
/* Character Mapping Table:
@@ -61,212 +62,242 @@
* ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1
* even though they are both used for French. Same for the euro sign.
*/
-static const unsigned char Ibm852_CharToOrderMap[] =
+static const unsigned char Iso_8859_2_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 1, 20, 15, 11, 2, 29, 30, 17, 4, 18, 7, 10, 12, 3, 0, /* 4X */
- 13, 40, 6, 8, 5, 14, 9, 37, 34, 19, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 1, 20, 15, 11, 2, 29, 30, 17, 4, 18, 7, 10, 12, 3, 0, /* 6X */
- 13, 40, 6, 8, 5, 14, 9, 37, 34, 19, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */
- 51, 46, 25, 62, 38, 48, 47, 51, 49, 54, 50, 50, 63, 64, 38, 47, /* 8X */
- 25, 42, 42, 32, 43, 33, 33, 65, 66, 43, 46, 31, 31, 49,SYM, 24, /* 9X */
- 21, 23, 35, 27, 67, 68, 26, 26, 69, 70,SYM, 71, 24, 59,SYM,SYM, /* AX */
- SYM,SYM,SYM,SYM,SYM, 21, 72, 41, 59,SYM,SYM,SYM,SYM, 61, 61,SYM, /* BX */
- SYM,SYM,SYM,SYM,SYM,SYM, 56, 56,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* CX */
- 55, 55, 39, 54, 39, 36, 23, 73, 41,SYM,SYM,SYM,SYM, 74, 48,SYM, /* DX */
- 35, 58, 32, 52, 52, 36, 28, 28, 44, 27, 44, 60, 22, 22, 75,SYM, /* EX */
- SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 60, 45, 45,SYM,SYM, /* FX */
+ SYM, 1, 20, 15, 11, 2, 28, 30, 17, 4, 18, 8, 10, 12, 3, 0, /* 4X */
+ 13, 40, 6, 7, 5, 14, 9, 37, 35, 21, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 1, 20, 15, 11, 2, 28, 30, 17, 4, 18, 8, 10, 12, 3, 0, /* 6X */
+ 13, 40, 6, 7, 5, 14, 9, 37, 35, 21, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
+ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
+ SYM, 64,SYM, 49,SYM, 33, 65,SYM,SYM, 29, 59, 31, 66,SYM, 26, 61, /* AX */
+ SYM, 67,SYM, 49,SYM, 33, 68,SYM,SYM, 29, 59, 31, 69,SYM, 26, 61, /* BX */
+ 45, 19, 70, 55, 38, 41, 47, 50, 24, 25, 57, 51, 42, 23, 71, 39, /* CX */
+ 53, 54, 36, 34, 32, 62, 43,SYM, 44, 48, 27, 56, 46, 22, 72, 60, /* DX */
+ 45, 19, 73, 55, 38, 41, 47, 50, 24, 25, 57, 51, 42, 23, 74, 39, /* EX */
+ 53, 54, 36, 34, 32, 62, 43,SYM, 44, 48, 27, 56, 46, 22, 75,SYM, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
-static const unsigned char Iso_8859_2_CharToOrderMap[] =
+static const unsigned char Windows_1250_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 1, 20, 15, 11, 2, 29, 30, 17, 4, 18, 7, 10, 12, 3, 0, /* 4X */
- 13, 40, 6, 8, 5, 14, 9, 37, 34, 19, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 1, 20, 15, 11, 2, 29, 30, 17, 4, 18, 7, 10, 12, 3, 0, /* 6X */
- 13, 40, 6, 8, 5, 14, 9, 37, 34, 19, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */
- CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
- CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
- SYM, 76,SYM, 49,SYM, 33, 77,SYM,SYM, 28, 59, 31, 78,SYM, 26, 61, /* AX */
- SYM, 79,SYM, 49,SYM, 33, 80,SYM,SYM, 28, 59, 31, 81,SYM, 26, 61, /* BX */
- 44, 21, 82, 56, 38, 42, 47, 51, 24, 25, 83, 54, 41, 23, 84, 39, /* CX */
- 55, 52, 36, 35, 32, 50, 43,SYM, 45, 48, 27, 60, 46, 22, 85, 58, /* DX */
- 44, 21, 86, 56, 38, 42, 47, 51, 24, 25, 87, 54, 41, 23, 88, 39, /* EX */
- 55, 52, 36, 35, 32, 50, 43,SYM, 45, 48, 27, 60, 46, 22, 89,SYM, /* FX */
+ SYM, 1, 20, 15, 11, 2, 28, 30, 17, 4, 18, 8, 10, 12, 3, 0, /* 4X */
+ 13, 40, 6, 7, 5, 14, 9, 37, 35, 21, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 1, 20, 15, 11, 2, 28, 30, 17, 4, 18, 8, 10, 12, 3, 0, /* 6X */
+ 13, 40, 6, 7, 5, 14, 9, 37, 35, 21, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM, 29,SYM, 76, 31, 26, 77, /* 8X */
+ ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 29,SYM, 78, 31, 26, 79, /* 9X */
+ SYM,SYM,SYM, 49,SYM, 80,SYM,SYM,SYM,SYM, 59,SYM,SYM,SYM,SYM, 61, /* AX */
+ SYM,SYM,SYM, 49,SYM,SYM,SYM,SYM,SYM, 81, 59,SYM, 33,SYM, 33, 61, /* BX */
+ 45, 19, 82, 55, 38, 41, 47, 50, 24, 25, 57, 51, 42, 23, 83, 39, /* CX */
+ 53, 54, 36, 34, 32, 62, 43,SYM, 44, 48, 27, 56, 46, 22, 84, 60, /* DX */
+ 45, 19, 85, 55, 38, 41, 47, 50, 24, 25, 57, 51, 42, 23, 86, 39, /* EX */
+ 53, 54, 36, 34, 32, 62, 43,SYM, 44, 48, 27, 56, 46, 22, 87,SYM, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
-static const unsigned char Mac_Centraleurope_CharToOrderMap[] =
+static const unsigned char Ibm852_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 1, 20, 15, 11, 2, 29, 30, 17, 4, 18, 7, 10, 12, 3, 0, /* 4X */
- 13, 40, 6, 8, 5, 14, 9, 37, 34, 19, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 1, 20, 15, 11, 2, 29, 30, 17, 4, 18, 7, 10, 12, 3, 0, /* 6X */
- 13, 40, 6, 8, 5, 14, 9, 37, 34, 19, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */
- 38, 90, 91, 25, 92, 43, 46, 21, 93, 24, 38, 24, 47, 47, 25, 94, /* 8X */
- 95, 39, 23, 39, 96, 97, 98, 35, 99, 32, 43,100, 27, 41, 41, 46, /* 9X */
- SYM,SYM,101,SYM,SYM,SYM,SYM, 58,SYM,SYM,SYM,102,SYM,SYM,103,104, /* AX */
- 105, 57,SYM,SYM, 57,106,SYM,SYM, 49,107,108, 33, 33, 42, 42,109, /* BX */
- 110, 52,SYM,SYM, 52, 36,SYM,SYM,SYM,SYM,SYM, 36, 50,111, 50, 53, /* CX */
- SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 53, 44, 44, 45,SYM,SYM, 45,112, /* DX */
- 113, 28,SYM,SYM, 28,114,115, 21, 31, 31, 23, 26, 26,116, 35, 32, /* EX */
- 117, 48, 27, 48, 60, 60,118,119, 22, 22,120, 61, 49, 61,121,SYM, /* FX */
+ SYM, 1, 20, 15, 11, 2, 28, 30, 17, 4, 18, 8, 10, 12, 3, 0, /* 4X */
+ 13, 40, 6, 7, 5, 14, 9, 37, 35, 21, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 1, 20, 15, 11, 2, 28, 30, 17, 4, 18, 8, 10, 12, 3, 0, /* 6X */
+ 13, 40, 6, 7, 5, 14, 9, 37, 35, 21, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ 50, 46, 25, 88, 38, 48, 47, 50, 49, 51, 62, 62, 89, 90, 38, 47, /* 8X */
+ 25, 41, 41, 32, 43, 33, 33, 91, 92, 43, 46, 31, 31, 49,SYM, 24, /* 9X */
+ 19, 23, 34, 27, 93, 94, 26, 26, 57, 57,SYM, 95, 24, 59,SYM,SYM, /* AX */
+ SYM,SYM,SYM,SYM,SYM, 19, 96, 42, 59,SYM,SYM,SYM,SYM, 61, 61,SYM, /* BX */
+ SYM,SYM,SYM,SYM,SYM,SYM, 55, 55,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* CX */
+ 53, 53, 39, 51, 39, 36, 23, 97, 42,SYM,SYM,SYM,SYM, 98, 48,SYM, /* DX */
+ 34, 60, 32, 54, 54, 36, 29, 29, 45, 27, 45, 56, 22, 22, 99,SYM, /* EX */
+ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 56, 44, 44,SYM,SYM, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
-static const unsigned char Windows_1250_CharToOrderMap[] =
+static const unsigned char Mac_Centraleurope_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 1, 20, 15, 11, 2, 29, 30, 17, 4, 18, 7, 10, 12, 3, 0, /* 4X */
- 13, 40, 6, 8, 5, 14, 9, 37, 34, 19, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 1, 20, 15, 11, 2, 29, 30, 17, 4, 18, 7, 10, 12, 3, 0, /* 6X */
- 13, 40, 6, 8, 5, 14, 9, 37, 34, 19, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */
- SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM, 28,SYM,122, 31, 26,123, /* 8X */
- ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 28,SYM,124, 31, 26,125, /* 9X */
- SYM,SYM,SYM, 49,SYM,126,SYM,SYM,SYM,SYM, 59,SYM,SYM,SYM,SYM, 61, /* AX */
- SYM,SYM,SYM, 49,SYM,SYM,SYM,SYM,SYM,127, 59,SYM, 33,SYM, 33, 61, /* BX */
- 44, 21,128, 56, 38, 42, 47, 51, 24, 25,129, 54, 41, 23,130, 39, /* CX */
- 55, 52, 36, 35, 32, 50, 43,SYM, 45, 48, 27, 60, 46, 22,131, 58, /* DX */
- 44, 21,132, 56, 38, 42, 47, 51, 24, 25,133, 54, 41, 23,134, 39, /* EX */
- 55, 52, 36, 35, 32, 50, 43,SYM, 45, 48, 27, 60, 46, 22,135,SYM, /* FX */
+ SYM, 1, 20, 15, 11, 2, 28, 30, 17, 4, 18, 8, 10, 12, 3, 0, /* 4X */
+ 13, 40, 6, 7, 5, 14, 9, 37, 35, 21, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 1, 20, 15, 11, 2, 28, 30, 17, 4, 18, 8, 10, 12, 3, 0, /* 6X */
+ 13, 40, 6, 7, 5, 14, 9, 37, 35, 21, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ 38, 63, 63, 25,100, 43, 46, 19,101, 24, 38, 24, 47, 47, 25,102, /* 8X */
+ 103, 39, 23, 39,104,105,106, 34,107, 32, 43,108, 27, 42, 42, 46, /* 9X */
+ SYM,SYM, 57,SYM,SYM,SYM,SYM, 60,SYM,SYM,SYM, 57,SYM,SYM,109,110, /* AX */
+ 111, 58,SYM,SYM, 58,112,SYM,SYM, 49,113,114, 33, 33, 41, 41,115, /* BX */
+ 116, 54,SYM,SYM, 54, 36,SYM,SYM,SYM,SYM,SYM, 36, 62,117, 62, 52, /* CX */
+ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 52, 45, 45, 44,SYM,SYM, 44,118, /* DX */
+ 119, 29,SYM,SYM, 29,120,121, 19, 31, 31, 23, 26, 26,122, 34, 32, /* EX */
+ 123, 48, 27, 48, 56, 56,124,125, 22, 22,126, 61, 49, 61,127,SYM, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
+static const int Unicode_Char_size = 92;
+static const unsigned int Unicode_CharOrder[] =
+{
+ 65, 1, 66, 20, 67, 15, 68, 11, 69, 2, 70, 28, 71, 30, 72, 17,
+ 73, 4, 74, 18, 75, 8, 76, 10, 77, 12, 78, 3, 79, 0, 80, 13,
+ 81, 40, 82, 6, 83, 7, 84, 5, 85, 14, 86, 9, 87, 37, 88, 35,
+ 89, 21, 90, 16, 97, 1, 98, 20, 99, 15, 100, 11, 101, 2,102, 28,
+ 103, 30, 104, 17, 105, 4, 106, 18, 107, 8, 108, 10, 109, 12,110, 3,
+ 111, 0, 112, 13, 113, 40, 114, 6, 115, 7, 116, 5, 117, 14,118, 9,
+ 119, 37, 120, 35, 121, 21, 122, 16, 193, 19, 196, 38, 201, 25,205, 23,
+ 211, 34, 212, 32, 214, 43, 218, 27, 221, 22, 225, 19, 228, 38,233, 25,
+ 237, 23, 243, 34, 244, 32, 246, 43, 250, 27, 253, 22, 268, 24,269, 24,
+ 270, 39, 271, 39, 282, 42, 283, 42, 313, 41, 314, 41, 317, 33,318, 33,
+ 327, 36, 328, 36, 340, 45, 341, 45, 344, 44, 345, 44, 352, 29,353, 29,
+ 356, 31, 357, 31, 381, 26, 382, 26,
+};
+
/* Model Table:
- * Total sequences: 1181
- * First 512 sequences: 0.9733303573968434
- * Next 512 sequences (512-1024): 0.026317344239265295
- * Rest: 0.0003522983638913346
+ * Total sequences: 1198
+ * First 512 sequences: 0.9724967373205526
+ * Next 512 sequences (512-1024): 0.02707798928941092
+ * Rest: 0.00042527339003644096
* Negative sequences: TODO
*/
static const PRUint8 SlovakLangModel[] =
{
- 2,2,2,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,
- 0,0,3,2,3,1,2,3,3,1,0,3,2,0,3,2,0,1,2,0,0,0,0,
- 2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,
- 0,0,3,0,3,0,3,3,3,3,0,2,3,1,2,2,0,2,2,0,0,0,0,
- 3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,
- 0,2,3,0,3,2,3,3,3,2,0,3,3,3,3,2,0,3,2,0,0,1,0,
- 3,3,3,3,3,3,2,3,3,2,2,3,2,2,3,3,2,2,2,3,2,3,
- 3,3,3,3,2,3,3,2,3,0,2,0,0,2,0,2,0,0,2,2,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,
- 0,3,3,2,3,0,3,3,3,3,0,2,2,3,2,2,0,0,2,0,0,0,0,
- 3,3,3,3,3,2,3,3,3,3,3,1,3,2,3,2,3,3,2,3,2,3,
- 3,3,2,3,0,3,2,2,2,1,0,2,0,3,2,2,2,2,1,2,1,1,2,
- 3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,2,2,2,3,3,3,
- 3,3,3,3,2,2,2,2,3,2,3,0,2,3,2,2,2,0,2,0,0,1,0,
- 3,3,3,2,3,3,3,2,2,3,3,3,3,1,3,3,2,2,2,3,2,3,
- 3,2,2,3,2,3,2,2,2,0,3,2,0,2,2,2,0,0,0,0,0,2,1,
- 3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,2,2,2,3,2,3,
- 2,3,2,2,1,3,0,2,2,3,2,2,0,2,2,2,0,0,2,0,0,0,0,
- 3,3,3,3,3,2,3,2,3,0,3,3,2,3,3,2,3,2,0,3,2,3,
- 3,3,2,3,2,2,3,1,2,0,2,0,0,0,2,0,3,2,0,2,2,1,2,
- 3,3,3,3,3,3,2,3,3,2,3,2,2,2,3,2,2,2,2,3,3,3,
- 3,3,2,3,2,3,2,2,3,0,1,0,0,3,2,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,3,3,2,2,3,2,3,
- 3,3,2,2,2,2,2,2,2,0,3,3,2,2,2,2,0,0,0,2,2,0,0,
- 3,3,3,3,3,3,2,2,3,1,2,2,3,3,3,2,0,0,2,3,3,3,
- 2,3,0,2,2,2,0,0,2,0,3,0,1,2,1,0,3,0,2,0,0,2,2,
- 3,3,3,3,3,3,3,2,2,0,3,2,2,2,3,2,1,2,0,2,2,3,
- 2,3,1,2,0,2,2,0,1,2,3,1,0,2,2,0,2,0,0,2,2,0,0,
- 2,2,2,3,2,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,3,
- 0,3,3,1,3,1,2,2,3,2,0,2,2,0,1,2,0,2,2,0,0,0,0,
- 3,3,3,3,3,2,2,3,2,2,2,2,2,0,3,2,2,3,0,2,0,2,
- 1,3,0,2,0,3,0,1,2,2,0,0,0,2,2,0,0,0,2,0,0,0,0,
- 3,3,3,3,3,2,3,3,3,3,3,3,3,2,3,2,2,3,2,3,2,3,
- 3,2,1,2,1,2,2,0,2,2,0,2,0,2,2,2,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,2,2,2,3,2,3,1,3,2,0,2,1,3,2,3,
- 3,2,2,2,0,2,2,1,0,0,0,2,0,1,2,2,1,0,0,0,2,1,2,
- 3,3,3,3,3,3,2,2,3,3,2,2,3,2,3,2,2,2,2,0,2,2,
- 0,3,2,0,0,3,3,0,2,1,0,2,0,2,0,0,1,0,0,0,0,0,0,
- 2,2,2,3,2,3,3,3,3,3,3,2,3,3,2,3,3,3,2,0,3,0,
- 0,0,2,0,2,2,3,1,2,3,0,1,0,1,2,1,0,0,0,0,0,0,0,
- 3,3,3,3,3,1,3,2,3,2,3,3,2,0,3,2,2,2,3,3,2,3,
- 2,2,2,2,1,2,2,0,2,0,1,0,0,1,2,0,1,0,0,0,0,1,0,
- 0,0,0,3,0,3,3,3,3,3,3,3,3,3,2,3,3,2,3,0,3,0,
- 0,0,2,0,2,0,3,0,1,0,0,2,0,0,2,0,0,2,0,0,0,0,0,
- 0,0,0,2,0,2,3,2,2,3,2,2,3,2,0,3,3,2,2,0,2,0,
- 0,0,1,0,2,0,2,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
- 0,2,0,3,0,3,3,3,3,3,3,2,3,3,0,3,2,2,2,0,2,0,
- 0,0,2,0,2,0,3,0,1,2,0,2,0,0,1,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,2,3,2,0,3,0,2,0,2,2,0,0,0,0,0,2,
- 0,3,0,1,1,1,3,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,
- 1,0,0,2,0,3,3,2,2,2,2,2,3,2,0,3,3,3,0,0,1,0,
- 0,0,2,0,0,0,2,2,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
- 3,3,3,3,3,0,1,2,3,1,1,3,1,0,3,0,0,0,2,0,2,0,
- 0,3,0,1,0,0,2,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0,
- 1,0,0,3,0,3,3,2,3,3,3,3,2,3,0,3,3,2,0,0,2,0,
- 0,0,3,0,2,0,2,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,0,2,2,2,3,1,2,3,2,0,0,2,0,0,2,1,
- 0,3,2,1,0,1,2,0,0,2,0,2,0,0,1,0,0,0,0,0,0,0,0,
- 3,3,3,2,3,2,3,0,2,1,2,0,2,0,3,0,0,2,0,3,0,2,
- 0,2,0,2,0,1,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,2,3,2,3,0,2,2,3,2,2,0,3,2,0,2,0,2,0,2,
- 0,0,0,2,0,2,0,0,2,0,0,0,0,2,0,1,0,0,0,0,0,2,0,
- 3,3,0,0,1,0,2,1,0,0,0,2,0,1,2,0,0,0,0,0,0,1,
+ 2,2,2,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,0,
+ 0,3,2,3,1,3,2,3,1,0,3,0,2,3,2,0,1,2,0,0,0,1,0,
+ 2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,0,
+ 0,3,0,3,0,3,3,3,3,0,2,1,3,2,2,0,2,2,0,0,0,2,0,
+ 3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,
+ 2,3,0,3,2,3,3,3,2,0,3,3,3,3,2,0,3,2,0,0,1,0,0,
+ 3,3,3,3,3,3,2,3,3,2,2,3,2,2,3,3,2,2,2,3,2,3,3,
+ 3,3,3,2,3,2,3,3,0,2,0,2,0,0,2,0,0,2,0,2,0,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,
+ 3,3,2,3,0,3,3,3,3,0,2,3,2,2,2,0,0,2,0,0,1,2,0,
+ 3,3,3,3,3,2,3,3,3,3,3,1,3,2,3,2,3,3,2,3,3,3,3,
+ 3,2,3,0,3,2,2,2,1,0,2,3,0,2,1,2,2,1,1,2,2,0,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,2,3,3,3,3,
+ 3,3,3,2,2,2,2,3,2,2,0,3,2,2,2,2,0,2,0,0,2,0,0,
+ 3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,2,2,2,3,2,3,2,
+ 3,2,2,1,3,2,0,2,3,2,2,1,0,2,2,0,0,2,0,0,1,0,0,
+ 3,3,3,2,3,3,3,2,2,3,3,3,2,2,3,3,2,2,2,3,2,3,3,
+ 2,2,3,2,3,2,2,2,0,3,2,2,0,2,2,0,0,0,0,0,2,2,1,
+ 3,3,3,3,3,2,3,3,2,0,3,3,1,3,3,2,3,2,0,3,2,3,3,
+ 3,2,3,2,2,1,3,2,0,2,0,0,0,2,0,3,2,0,2,2,1,2,2,
+ 3,3,3,3,3,3,2,3,3,2,3,2,2,2,3,2,2,2,2,3,2,3,3,
+ 3,2,3,2,3,2,2,3,0,1,0,3,0,2,0,0,0,0,0,0,0,0,0,
+ 3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,3,3,2,2,3,2,3,3,
+ 3,2,2,2,2,2,2,2,0,3,3,2,2,2,2,0,0,0,2,2,0,2,0,
+ 3,3,3,3,3,3,2,3,2,0,2,2,3,3,3,2,0,0,2,3,3,3,2,
+ 3,1,2,2,2,0,0,1,0,3,0,2,1,1,0,3,0,1,0,0,1,0,2,
+ 3,3,3,3,3,3,3,2,2,0,3,2,2,2,3,2,1,2,0,3,2,2,2,
+ 3,1,2,0,2,2,2,0,2,3,1,2,0,2,0,2,0,0,2,2,0,2,0,
+ 2,2,2,3,2,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,0,
+ 2,3,1,3,1,2,3,3,2,0,2,0,2,1,2,0,2,2,0,0,0,0,0,
+ 3,3,3,3,3,2,2,2,3,2,2,2,1,0,3,2,2,3,0,2,0,2,1,
+ 3,0,2,0,3,1,0,2,2,0,0,2,0,2,0,0,0,2,0,0,0,0,0,
+ 3,3,3,3,3,2,3,3,3,3,3,3,3,2,3,2,2,3,2,3,2,3,3,
+ 2,1,2,1,2,0,2,2,2,0,2,2,0,2,2,0,0,0,0,0,0,0,0,
+ 3,3,3,3,3,3,3,2,2,2,3,2,3,1,3,2,0,2,1,3,2,3,3,
+ 2,1,2,0,2,1,2,0,0,0,2,1,0,2,2,1,0,0,2,0,2,1,2,
+ 3,3,3,3,3,3,2,3,2,3,2,2,3,2,3,2,2,2,2,2,2,0,0,
+ 3,2,0,0,3,0,3,2,0,0,2,2,0,1,0,1,0,0,0,0,0,0,0,
+ 0,0,0,3,0,3,3,3,3,3,3,3,3,3,2,3,3,2,3,0,3,0,0,
+ 0,2,0,2,0,0,3,1,0,0,2,0,0,2,0,0,2,0,0,0,0,2,0,
+ 3,3,3,3,3,2,3,3,2,2,3,3,2,0,3,2,2,2,3,3,2,3,2,
+ 2,2,2,1,2,0,2,2,0,1,0,1,0,2,0,1,0,0,0,0,1,2,0,
+ 2,2,2,3,2,3,3,3,3,3,3,2,3,3,2,3,3,3,2,0,3,0,0,
+ 0,2,0,2,2,1,3,2,3,0,1,1,0,2,1,0,0,0,0,0,0,0,0,
+ 0,0,0,2,0,2,3,2,2,3,2,2,3,2,0,3,3,2,2,0,2,0,0,
+ 0,2,0,1,0,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
+ 0,2,0,3,0,3,3,3,3,3,3,2,3,3,0,3,2,2,2,0,2,0,0,
+ 0,2,0,2,0,0,3,1,2,0,2,0,0,1,0,0,0,0,0,0,0,0,0,
+ 3,3,3,3,3,3,2,2,3,0,3,0,2,0,2,2,0,0,0,2,0,0,0,
+ 3,0,1,1,1,0,3,0,0,0,0,2,0,2,0,0,0,0,0,0,0,0,0,
+ 2,0,2,2,0,3,3,2,2,2,2,2,3,2,0,3,3,3,0,0,1,0,0,
+ 0,2,0,0,0,2,2,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
+ 3,3,3,3,3,0,1,2,2,1,2,3,0,0,3,0,0,0,1,0,2,0,0,
+ 3,0,2,0,0,0,2,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0,0,
+ 1,0,0,3,0,3,3,3,2,3,3,3,2,3,0,3,3,2,0,0,2,0,0,
+ 0,3,0,2,0,2,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,2,3,2,3,1,2,1,2,0,2,0,3,0,0,1,0,2,0,3,0,
+ 2,0,2,0,1,2,1,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,3,3,3,0,2,2,2,3,1,2,3,2,0,0,2,0,1,2,0,0,
+ 3,2,1,0,1,0,2,0,2,0,2,0,0,1,0,0,0,0,0,0,0,0,0,
+ 3,3,3,2,3,2,3,2,0,2,3,2,2,1,3,1,0,2,1,2,0,2,0,
+ 0,0,2,0,2,0,0,2,0,0,0,2,0,0,1,0,0,0,0,0,2,0,0,
+ 3,3,0,0,1,0,2,0,1,0,0,2,0,1,2,0,0,0,0,2,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,2,3,3,3,3,3,2,1,0,0,2,3,0,0,0,2,0,
- 0,0,0,0,3,0,1,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,0,3,0,0,0,3,2,2,0,0,2,0,3,0,1,1,0,0,2,0,
- 0,0,1,0,0,2,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 2,2,2,2,3,2,0,1,0,0,2,0,2,3,2,0,0,0,0,2,0,0,
- 0,2,0,0,0,0,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,3,0,2,3,1,2,1,2,3,3,2,0,2,2,0,0,0,3,0,
- 0,0,0,0,1,0,0,1,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,0,0,0,0,0,0,0,0,0,1,2,0,2,0,0,2,0,0,0,2,
+ 0,0,0,0,0,2,3,3,3,3,3,2,1,0,0,2,2,0,0,0,2,0,0,
+ 0,0,0,3,0,0,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,0,3,0,0,0,2,3,2,0,0,2,0,3,0,1,1,0,0,2,0,0,
+ 0,2,0,0,2,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,3,0,2,3,2,1,1,2,3,3,3,0,1,2,0,0,0,3,0,0,
+ 0,0,0,1,0,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 2,2,2,2,3,3,0,0,1,0,2,0,2,2,2,0,0,1,0,0,0,1,0,
+ 1,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,
+ 3,3,0,0,0,0,0,0,0,0,0,1,2,0,2,0,0,2,0,2,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 2,2,2,1,3,2,0,1,2,0,2,0,2,1,0,0,0,2,0,1,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
- 0,0,0,2,0,2,1,1,0,0,0,2,0,0,0,2,2,0,0,0,2,0,
- 0,0,3,0,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,
- 2,3,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
- 0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,2,0,0,1,0,0,0,0,2,0,0,0,0,0,0,2,
+ 2,2,2,1,3,2,0,2,1,0,2,0,1,0,0,0,0,2,0,0,1,1,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
+ 0,0,0,2,0,2,1,0,1,0,0,2,0,0,0,2,2,0,0,0,2,0,0,
+ 0,3,0,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
+ 2,3,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
+ 0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,2,0,0,1,0,0,0,0,2,0,0,0,0,2,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,2,0,2,2,2,0,0,1,0,1,0,0,1,2,0,2,0,0,0,
- 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2,0,
- 0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
- 0,0,0,2,0,2,2,0,2,0,1,2,1,0,0,0,0,2,0,0,2,0,
- 0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,2,0,0,0,0,0,0,1,0,0,0,0,2,0,0,0,0,
- 0,0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
+ 0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2,0,0,
+ 0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
+ 0,0,0,2,0,2,2,0,2,0,2,0,1,0,0,1,2,0,2,0,0,0,0,
+ 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,2,0,2,2,2,0,0,2,2,2,0,0,0,0,2,0,0,1,0,0,
+ 0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,2,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,
+ 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,2,0,0,0,0,0,0,1,0,0,0,0,2,0,0,0,0,0,
+ 0,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
};
-const SequenceModel Ibm852SlovakModel =
+const SequenceModel Iso_8859_2SlovakModel =
{
- Ibm852_CharToOrderMap,
+ Iso_8859_2_CharToOrderMap,
SlovakLangModel,
- 45,
- (float)0.9733303573968434,
+ 46,
+ (float)0.9724967373205526,
PR_TRUE,
- "IBM852",
+ "ISO-8859-2",
"sk"
};
-const SequenceModel Iso_8859_2SlovakModel =
+const SequenceModel Windows_1250SlovakModel =
{
- Iso_8859_2_CharToOrderMap,
+ Windows_1250_CharToOrderMap,
SlovakLangModel,
- 45,
- (float)0.9733303573968434,
+ 46,
+ (float)0.9724967373205526,
PR_TRUE,
- "ISO-8859-2",
+ "WINDOWS-1250",
+ "sk"
+};
+
+const SequenceModel Ibm852SlovakModel =
+{
+ Ibm852_CharToOrderMap,
+ SlovakLangModel,
+ 46,
+ (float)0.9724967373205526,
+ PR_TRUE,
+ "IBM852",
"sk"
};
@@ -274,20 +305,19 @@ const SequenceModel Mac_CentraleuropeSlovakModel =
{
Mac_Centraleurope_CharToOrderMap,
SlovakLangModel,
- 45,
- (float)0.9733303573968434,
+ 46,
+ (float)0.9724967373205526,
PR_TRUE,
"MAC-CENTRALEUROPE",
"sk"
};
-const SequenceModel Windows_1250SlovakModel =
+const LanguageModel SlovakModel =
{
- Windows_1250_CharToOrderMap,
+ "sk",
+ Unicode_CharOrder,
+ 92,
SlovakLangModel,
- 45,
- (float)0.9733303573968434,
- PR_TRUE,
- "WINDOWS-1250",
- "sk"
+ 46,
+ (float)0.9724967373205526,
};
diff --git a/src/LangModels/LangSloveneModel.cpp b/src/LangModels/LangSloveneModel.cpp
index 160f054..ccb4f7f 100644
--- a/src/LangModels/LangSloveneModel.cpp
+++ b/src/LangModels/LangSloveneModel.cpp
@@ -36,12 +36,13 @@
* ***** END LICENSE BLOCK ***** */
#include "../nsSBCharSetProber.h"
+#include "../nsLanguageDetector.h"
/********* Language model for: Slovene *********/
/**
* Generated by BuildLangModel.py
- * On: 2016-09-28 22:06:46.134717
+ * On: 2021-03-16 20:20:05.416974
**/
/* Character Mapping Table:
@@ -67,18 +68,18 @@ static const unsigned char Iso_8859_2_CharToOrderMap[] =
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 0, 18, 19, 13, 1, 24, 17, 20, 2, 8, 12, 9, 14, 4, 3, /* 4X */
- 11, 28, 5, 6, 7, 16, 10, 27, 25, 26, 15,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 0, 18, 19, 13, 1, 24, 17, 20, 2, 8, 12, 9, 14, 4, 3, /* 6X */
- 11, 28, 5, 6, 7, 16, 10, 27, 25, 26, 15,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ SYM, 0, 18, 19, 13, 1, 24, 17, 20, 2, 9, 12, 8, 14, 4, 3, /* 4X */
+ 11, 28, 5, 6, 7, 16, 10, 25, 27, 26, 15,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 0, 18, 19, 13, 1, 24, 17, 20, 2, 9, 12, 8, 14, 4, 3, /* 6X */
+ 11, 28, 5, 6, 7, 16, 10, 25, 27, 26, 15,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
SYM, 41,SYM, 42,SYM, 43, 44,SYM,SYM, 22, 45, 46, 47,SYM, 23, 48, /* AX */
SYM, 49,SYM, 50,SYM, 51, 52,SYM,SYM, 22, 53, 54, 55,SYM, 23, 56, /* BX */
- 57, 32, 58, 59, 60, 61, 37, 34, 21, 29, 62, 36, 63, 30, 64, 65, /* CX */
- 66, 67, 68, 31, 35, 69, 70,SYM, 71, 72, 39, 73, 74, 40, 75, 76, /* DX */
- 77, 32, 78, 79, 80, 81, 37, 34, 21, 29, 82, 36, 83, 30, 84, 85, /* EX */
- 86, 87, 88, 31, 35, 89, 90,SYM, 91, 92, 39, 93, 94, 40, 95,SYM, /* FX */
+ 57, 33, 58, 59, 60, 61, 62, 31, 21, 29, 63, 37, 64, 30, 65, 66, /* CX */
+ 67, 68, 69, 32, 36, 70, 71,SYM, 72, 73, 39, 74, 75, 40, 76, 77, /* DX */
+ 78, 33, 79, 80, 81, 82, 83, 31, 21, 29, 84, 37, 85, 30, 86, 87, /* EX */
+ 88, 89, 90, 32, 36, 91, 92,SYM, 93, 94, 39, 95, 96, 40, 97,SYM, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
@@ -88,18 +89,18 @@ static const unsigned char Iso_8859_16_CharToOrderMap[] =
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 0, 18, 19, 13, 1, 24, 17, 20, 2, 8, 12, 9, 14, 4, 3, /* 4X */
- 11, 28, 5, 6, 7, 16, 10, 27, 25, 26, 15,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 0, 18, 19, 13, 1, 24, 17, 20, 2, 8, 12, 9, 14, 4, 3, /* 6X */
- 11, 28, 5, 6, 7, 16, 10, 27, 25, 26, 15,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ SYM, 0, 18, 19, 13, 1, 24, 17, 20, 2, 9, 12, 8, 14, 4, 3, /* 4X */
+ 11, 28, 5, 6, 7, 16, 10, 25, 27, 26, 15,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 0, 18, 19, 13, 1, 24, 17, 20, 2, 9, 12, 8, 14, 4, 3, /* 6X */
+ 11, 28, 5, 6, 7, 16, 10, 25, 27, 26, 15,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
- SYM, 96, 97, 98,SYM,SYM, 22,SYM, 22,SYM, 99,SYM,100,SYM,101,102, /* AX */
- SYM,SYM, 21,103, 23,SYM,SYM,SYM, 23, 21,104,SYM,105,106,107,108, /* BX */
- 109, 32,110,111,112, 37,113, 34,114, 29, 33, 36,115, 30,116,117, /* CX */
- 118,119,120, 31, 35,121,122,123,124,125, 39,126,127,128,129,130, /* DX */
- 131, 32,132,133,134, 37,135, 34,136, 29, 33, 36,137, 30,138,139, /* EX */
- 140,141,142, 31, 35,143,144,145,146,147, 39,148,149,150,151,152, /* FX */
+ SYM, 98, 99,100,SYM,SYM, 22,SYM, 22,SYM,101,SYM,102,SYM,103,104, /* AX */
+ SYM,SYM, 21,105, 23,SYM,SYM,SYM, 23, 21,106,SYM,107,108,109,110, /* BX */
+ 111, 33,112,113,114,115,116, 31, 35, 29, 34, 37,117, 30,118,119, /* CX */
+ 120,121,122, 32, 36,123,124,125,126,127, 39,128,129,130,131,132, /* DX */
+ 133, 33,134,135,136,137,138, 31, 35, 29, 34, 37,139, 30,140,141, /* EX */
+ 142,143,144, 32, 36,145,146,147,148,149, 39,150,151,152,153,154, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
@@ -109,102 +110,115 @@ static const unsigned char Windows_1250_CharToOrderMap[] =
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 0, 18, 19, 13, 1, 24, 17, 20, 2, 8, 12, 9, 14, 4, 3, /* 4X */
- 11, 28, 5, 6, 7, 16, 10, 27, 25, 26, 15,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 0, 18, 19, 13, 1, 24, 17, 20, 2, 8, 12, 9, 14, 4, 3, /* 6X */
- 11, 28, 5, 6, 7, 16, 10, 27, 25, 26, 15,SYM,SYM,SYM,SYM,CTR, /* 7X */
- SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM, 22,SYM,153,154, 23,155, /* 8X */
- ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 22,SYM,156,157, 23,158, /* 9X */
- SYM,SYM,SYM,159,SYM,160,SYM,SYM,SYM,SYM,161,SYM,SYM,SYM,SYM,162, /* AX */
- SYM,SYM,SYM,163,SYM,SYM,SYM,SYM,SYM,164,165,SYM,166,SYM,167,168, /* BX */
- 169, 32,170,171,172,173, 37, 34, 21, 29,174, 36,175, 30,176,177, /* CX */
- 178,179,180, 31, 35,181,182,SYM,183,184, 39,185,186, 40,187,188, /* DX */
- 189, 32,190,191,192,193, 37, 34, 21, 29,194, 36,195, 30,196,197, /* EX */
- 198,199,200, 31, 35,201,202,SYM,203,204, 39,205,206, 40,207,SYM, /* FX */
+ SYM, 0, 18, 19, 13, 1, 24, 17, 20, 2, 9, 12, 8, 14, 4, 3, /* 4X */
+ 11, 28, 5, 6, 7, 16, 10, 25, 27, 26, 15,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 0, 18, 19, 13, 1, 24, 17, 20, 2, 9, 12, 8, 14, 4, 3, /* 6X */
+ 11, 28, 5, 6, 7, 16, 10, 25, 27, 26, 15,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM, 22,SYM,155,156, 23,157, /* 8X */
+ ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 22,SYM,158,159, 23,160, /* 9X */
+ SYM,SYM,SYM,161,SYM,162,SYM,SYM,SYM,SYM,163,SYM,SYM,SYM,SYM,164, /* AX */
+ SYM,SYM,SYM,165,SYM,SYM,SYM,SYM,SYM,166,167,SYM,168,SYM,169,170, /* BX */
+ 171, 33,172,173,174,175,176, 31, 21, 29,177, 37,178, 30,179,180, /* CX */
+ 181,182,183, 32, 36,184,185,SYM,186,187, 39,188,189, 40,190,191, /* DX */
+ 192, 33,193,194,195,196,197, 31, 21, 29,198, 37,199, 30,200,201, /* EX */
+ 202,203,204, 32, 36,205,206,SYM,207,208, 39,209,210, 40,211,SYM, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
-static const unsigned char Mac_Centraleurope_CharToOrderMap[] =
+static const unsigned char Ibm852_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 0, 18, 19, 13, 1, 24, 17, 20, 2, 8, 12, 9, 14, 4, 3, /* 4X */
- 11, 28, 5, 6, 7, 16, 10, 27, 25, 26, 15,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 0, 18, 19, 13, 1, 24, 17, 20, 2, 8, 12, 9, 14, 4, 3, /* 6X */
- 11, 28, 5, 6, 7, 16, 10, 27, 25, 26, 15,SYM,SYM,SYM,SYM,CTR, /* 7X */
- 208,209,210, 29,211,212,213, 32,214, 21,215, 21, 37, 37, 29,216, /* 8X */
- 217,218, 30,219, 38, 38,220, 31,221, 35,222,223, 39,224,225,226, /* 9X */
- SYM,SYM,227,SYM,SYM,SYM,SYM,228,SYM,SYM,SYM,229,SYM,SYM,230,231, /* AX */
- 232,233,SYM,SYM,234,235,SYM,SYM,236,237,238,239,240,241,242,243, /* BX */
- 244,245,SYM,SYM,246,247,SYM,SYM,SYM,SYM,SYM,248,249,249,249,249, /* CX */
- SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,249,249,249,249,SYM,SYM,249,249, /* DX */
- 249, 22,SYM,SYM, 22,249,249, 32,249,249, 30, 23, 23,249, 31, 35, /* EX */
- 249,249, 39,249,249,249,249,249, 40, 40,249,249,249,249,249,SYM, /* FX */
+ SYM, 0, 18, 19, 13, 1, 24, 17, 20, 2, 9, 12, 8, 14, 4, 3, /* 4X */
+ 11, 28, 5, 6, 7, 16, 10, 25, 27, 26, 15,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 0, 18, 19, 13, 1, 24, 17, 20, 2, 9, 12, 8, 14, 4, 3, /* 6X */
+ 11, 28, 5, 6, 7, 16, 10, 25, 27, 26, 15,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ 31,212, 29,213,214,215,216, 31,217, 37,218,219,220,221,222,223, /* 8X */
+ 29,224,225, 36,226,227,228,229,230,231,232,233,234,235,SYM, 21, /* 9X */
+ 33, 30, 32, 39,236,237, 23, 23,238,239,SYM,240, 21,241,SYM,SYM, /* AX */
+ SYM,SYM,SYM,SYM,SYM, 33,242,243,244,SYM,SYM,SYM,SYM,245,246,SYM, /* BX */
+ SYM,SYM,SYM,SYM,SYM,SYM,247,248,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* CX */
+ 249,249,249, 37,249,249, 30,249,249,SYM,SYM,SYM,SYM,249,249,SYM, /* DX */
+ 32,249, 36,249,249,249, 22, 22,249, 39,249,249, 40, 40,249,SYM, /* EX */
+ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,249,249,249,SYM,SYM, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
-static const unsigned char Ibm852_CharToOrderMap[] =
+static const unsigned char Mac_Centraleurope_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 0, 18, 19, 13, 1, 24, 17, 20, 2, 8, 12, 9, 14, 4, 3, /* 4X */
- 11, 28, 5, 6, 7, 16, 10, 27, 25, 26, 15,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 0, 18, 19, 13, 1, 24, 17, 20, 2, 8, 12, 9, 14, 4, 3, /* 6X */
- 11, 28, 5, 6, 7, 16, 10, 27, 25, 26, 15,SYM,SYM,SYM,SYM,CTR, /* 7X */
- 34,249, 29,249,249,249, 37, 34,249, 36,249,249,249,249,249, 37, /* 8X */
- 29,249,249, 35,249,249,249,249,249,249,249,249,249,249,SYM, 21, /* 9X */
- 32, 30, 31, 39,249,249, 23, 23,249,249,SYM,249, 21,249,SYM,SYM, /* AX */
- SYM,SYM,SYM,SYM,SYM, 32,249,249,249,SYM,SYM,SYM,SYM,249,249,SYM, /* BX */
- SYM,SYM,SYM,SYM,SYM,SYM,249,249,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* CX */
- 249,249,249, 36,249,249, 30,249,249,SYM,SYM,SYM,SYM,249,249,SYM, /* DX */
- 31,249, 35,249,249,249, 22, 22,249, 39,249,249, 40, 40,249,SYM, /* EX */
- SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,249,249,249,SYM,SYM, /* FX */
+ SYM, 0, 18, 19, 13, 1, 24, 17, 20, 2, 9, 12, 8, 14, 4, 3, /* 4X */
+ 11, 28, 5, 6, 7, 16, 10, 25, 27, 26, 15,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 0, 18, 19, 13, 1, 24, 17, 20, 2, 9, 12, 8, 14, 4, 3, /* 6X */
+ 11, 28, 5, 6, 7, 16, 10, 25, 27, 26, 15,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ 249,249,249, 29,249,249,249, 33,249, 21,249, 21,249,249, 29,249, /* 8X */
+ 249,249, 30,249, 38, 38,249, 32,249, 36,249,249, 39,249,249,249, /* 9X */
+ SYM,SYM,249,SYM,SYM,SYM,SYM,249,SYM,SYM,SYM,249,SYM,SYM,249,249, /* AX */
+ 249,249,SYM,SYM,249,249,SYM,SYM,249,249,249,249,249,249,249,249, /* BX */
+ 249,249,SYM,SYM,249,249,SYM,SYM,SYM,SYM,SYM,249,249,249,249,249, /* CX */
+ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,249,249,249,249,SYM,SYM,249,249, /* DX */
+ 249, 22,SYM,SYM, 22,249,249, 33,249,249, 30, 23, 23,249, 32, 36, /* EX */
+ 249,249, 39,249,249,249,249,249, 40, 40,249,249,249,249,249,SYM, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
+static const int Unicode_Char_size = 58;
+static const unsigned int Unicode_CharOrder[] =
+{
+ 65, 0, 66, 18, 67, 19, 68, 13, 69, 1, 70, 24, 71, 17, 72, 20,
+ 73, 2, 74, 9, 75, 12, 76, 8, 77, 14, 78, 4, 79, 3, 80, 11,
+ 81, 28, 82, 5, 83, 6, 84, 7, 85, 16, 86, 10, 87, 25, 88, 27,
+ 89, 26, 90, 15, 97, 0, 98, 18, 99, 19, 100, 13, 101, 1,102, 24,
+ 103, 17, 104, 20, 105, 2, 106, 9, 107, 12, 108, 8, 109, 14,110, 4,
+ 111, 3, 112, 11, 113, 28, 114, 5, 115, 6, 116, 7, 117, 16,118, 10,
+ 119, 25, 120, 27, 121, 26, 122, 15, 268, 21, 269, 21, 352, 22,353, 22,
+ 381, 23, 382, 23,
+};
+
/* Model Table:
- * Total sequences: 727
- * First 512 sequences: 0.9983524317161332
- * Next 512 sequences (512-1024): 0.0016475682838668457
- * Rest: -3.859759734048396e-17
+ * Total sequences: 698
+ * First 512 sequences: 0.998296272473889
+ * Next 512 sequences (512-1024): 0.00170372752611106
+ * Rest: -2.8189256484623115e-17
* Negative sequences: TODO
*/
static const PRUint8 SloveneLangModel[] =
{
- 2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,
+ 2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,2,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,2,
- 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,3,2,3,3,3,2,0,0,3,2,3,3,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,0,0,0,3,2,3,3,0,
- 3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,2,3,2,3,3,3,2,3,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,2,3,3,2,3,2,0,
- 3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,2,3,3,3,3,2,2,2,2,0,0,
- 3,3,3,3,3,3,3,3,2,3,0,3,3,3,2,2,3,3,3,3,3,2,2,0,0,0,3,2,2,
- 3,3,3,3,3,3,3,3,3,3,3,0,2,3,3,2,3,0,2,3,3,0,3,0,2,0,3,2,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,2,3,2,0,
- 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,0,3,2,3,3,2,2,2,0,2,2,3,2,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,0,2,0,0,0,
- 3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,2,0,0,
- 3,3,3,3,3,3,3,2,0,3,3,3,2,2,2,0,3,2,3,2,3,0,0,0,2,2,2,2,0,
- 3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,0,3,0,2,2,0,3,3,2,2,0,3,0,0,
- 3,3,3,3,3,3,3,3,0,3,2,3,3,3,2,2,3,2,2,3,3,0,0,0,2,2,3,2,2,
- 3,3,3,3,3,3,2,3,0,3,3,3,3,2,2,2,3,0,2,0,0,2,0,0,2,0,2,2,0,
- 3,3,3,3,3,3,0,0,3,3,2,2,3,2,0,0,3,0,2,2,0,0,2,0,0,0,0,0,0,
- 3,3,3,3,3,2,0,3,3,3,2,3,3,0,0,0,3,0,0,0,0,3,0,2,0,0,0,0,0,
- 3,3,3,2,3,2,0,2,3,3,2,0,3,0,0,0,3,2,3,2,0,0,0,2,0,0,0,0,0,
- 3,3,3,3,2,3,3,3,0,3,0,0,0,2,2,0,3,2,0,2,2,0,0,0,3,2,2,2,0,
- 3,3,3,3,2,2,2,3,0,0,2,3,0,2,2,0,3,2,3,3,2,0,0,0,2,2,2,2,0,
- 3,3,2,3,3,2,3,3,3,3,0,2,2,2,2,0,2,2,2,3,2,0,0,0,0,2,0,2,0,
- 3,3,3,3,3,0,3,0,0,2,0,0,0,0,2,0,2,2,2,0,2,0,0,0,2,0,2,3,0,
- 0,0,0,0,2,0,0,2,0,2,0,0,0,0,0,0,3,0,0,2,0,0,0,0,0,0,0,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,2,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,2,
+ 3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,3,2,3,3,3,2,2,0,3,3,3,2,2,
+ 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,0,2,0,3,3,3,3,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,0,3,3,2,3,0,0,
+ 3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,2,3,3,3,2,3,0,0,0,0,0,0,
+ 3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,2,3,3,3,3,2,2,0,2,2,0,
+ 3,3,3,3,3,3,3,3,3,0,0,3,3,3,3,0,3,3,2,3,3,2,2,0,0,0,3,2,0,
+ 3,3,3,3,3,3,3,3,3,3,3,2,0,2,3,0,3,2,2,3,3,0,3,0,0,2,3,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,3,2,0,
+ 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,0,3,2,3,3,3,2,2,0,2,3,3,2,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,0,0,2,2,0,
+ 3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,0,2,3,0,
+ 3,3,3,3,3,3,3,2,3,0,3,3,0,0,2,2,3,3,3,2,3,0,0,0,0,0,2,2,0,
+ 3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,0,3,0,3,3,0,3,2,2,2,0,3,0,0,
+ 3,3,3,3,2,3,3,3,3,0,2,3,3,3,2,2,3,2,3,3,3,0,0,0,2,2,2,2,2,
+ 3,3,3,3,3,3,2,3,3,0,3,2,3,3,2,3,3,0,2,0,0,2,0,0,2,2,2,0,0,
+ 3,3,3,3,3,3,0,2,3,3,2,2,3,2,0,0,3,0,2,2,0,0,0,0,0,0,0,0,0,
+ 3,3,3,3,3,2,0,3,3,3,2,3,3,0,0,0,3,0,0,0,0,3,0,0,0,0,0,0,0,
+ 3,3,3,2,3,2,0,0,2,3,2,0,3,0,0,0,2,2,3,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,3,2,3,3,3,3,2,0,0,0,0,2,0,3,2,0,0,0,0,0,0,3,2,0,0,0,
+ 3,3,3,3,3,0,3,2,2,0,0,0,0,2,0,0,2,2,2,0,2,0,0,0,0,3,2,0,0,
+ 2,3,2,3,3,2,3,2,3,3,0,2,2,2,3,0,2,2,2,3,2,0,0,0,0,2,0,0,0,
+ 3,2,3,3,0,2,2,3,0,0,2,3,0,2,2,0,3,2,3,3,0,0,0,0,0,2,2,3,0,
+ 0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,
};
@@ -213,7 +227,7 @@ const SequenceModel Iso_8859_2SloveneModel =
Iso_8859_2_CharToOrderMap,
SloveneLangModel,
29,
- (float)0.9983524317161332,
+ (float)0.998296272473889,
PR_TRUE,
"ISO-8859-2",
"sl"
@@ -224,7 +238,7 @@ const SequenceModel Iso_8859_16SloveneModel =
Iso_8859_16_CharToOrderMap,
SloveneLangModel,
29,
- (float)0.9983524317161332,
+ (float)0.998296272473889,
PR_TRUE,
"ISO-8859-16",
"sl"
@@ -235,30 +249,40 @@ const SequenceModel Windows_1250SloveneModel =
Windows_1250_CharToOrderMap,
SloveneLangModel,
29,
- (float)0.9983524317161332,
+ (float)0.998296272473889,
PR_TRUE,
"WINDOWS-1250",
"sl"
};
+const SequenceModel Ibm852SloveneModel =
+{
+ Ibm852_CharToOrderMap,
+ SloveneLangModel,
+ 29,
+ (float)0.998296272473889,
+ PR_TRUE,
+ "IBM852",
+ "sl"
+};
+
const SequenceModel Mac_CentraleuropeSloveneModel =
{
Mac_Centraleurope_CharToOrderMap,
SloveneLangModel,
29,
- (float)0.9983524317161332,
+ (float)0.998296272473889,
PR_TRUE,
"MAC-CENTRALEUROPE",
"sl"
};
-const SequenceModel Ibm852SloveneModel =
+const LanguageModel SloveneModel =
{
- Ibm852_CharToOrderMap,
+ "sl",
+ Unicode_CharOrder,
+ 58,
SloveneLangModel,
29,
- (float)0.9983524317161332,
- PR_TRUE,
- "IBM852",
- "sl"
+ (float)0.998296272473889,
};
diff --git a/src/LangModels/LangSwedishModel.cpp b/src/LangModels/LangSwedishModel.cpp
index 3dca8e8..e07efba 100644
--- a/src/LangModels/LangSwedishModel.cpp
+++ b/src/LangModels/LangSwedishModel.cpp
@@ -36,12 +36,13 @@
* ***** END LICENSE BLOCK ***** */
#include "../nsSBCharSetProber.h"
+#include "../nsLanguageDetector.h"
/********* Language model for: Swedish *********/
/**
* Generated by BuildLangModel.py
- * On: 2016-09-28 22:29:21.480940
+ * On: 2021-03-16 20:24:13.934277
**/
/* Character Mapping Table:
@@ -61,163 +62,186 @@
* ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1
* even though they are both used for French. Same for the euro sign.
*/
-static const unsigned char Windows_1252_CharToOrderMap[] =
+static const unsigned char Iso_8859_1_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 0, 22, 20, 9, 1, 14, 12, 18, 6, 23, 10, 7, 11, 3, 8, /* 4X */
- 15, 30, 2, 5, 4, 16, 13, 26, 25, 24, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 0, 22, 20, 9, 1, 14, 12, 18, 6, 23, 10, 7, 11, 3, 8, /* 6X */
- 15, 30, 2, 5, 4, 16, 13, 26, 25, 24, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */
- SYM,ILL,SYM, 34,SYM,SYM,SYM,SYM,SYM,SYM, 48,SYM, 49,ILL, 50,ILL, /* 8X */
- ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 51,SYM, 52,ILL, 53, 54, /* 9X */
+ SYM, 0, 21, 20, 9, 1, 13, 12, 17, 6, 23, 11, 7, 10, 3, 8, /* 4X */
+ 18, 29, 2, 5, 4, 15, 14, 26, 25, 24, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 0, 21, 20, 9, 1, 13, 12, 17, 6, 23, 11, 7, 10, 3, 8, /* 6X */
+ 18, 29, 2, 5, 4, 15, 14, 26, 25, 24, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
+ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
- SYM,SYM,SYM,SYM,SYM, 55,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
- 56, 44, 57, 58, 17, 19, 38, 40, 32, 28, 45, 59, 60, 61, 47, 62, /* CX */
- 63, 64, 65, 66, 35, 67, 21,SYM, 37, 68, 69, 70, 31, 71, 72, 73, /* DX */
- 74, 44, 75, 76, 17, 19, 38, 40, 32, 28, 45, 77, 78, 79, 47, 80, /* EX */
- 81, 82, 83, 84, 35, 85, 21,SYM, 37, 86, 87, 88, 31, 89, 90, 91, /* FX */
+ SYM,SYM,SYM,SYM,SYM, 34,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
+ 49, 33, 50, 51, 16, 19, 37, 40, 32, 28, 42, 52, 53, 38, 43, 54, /* CX */
+ 55, 56, 57, 58, 59, 60, 22,SYM, 39, 61, 62, 63, 31, 64, 65, 66, /* DX */
+ 67, 33, 68, 69, 16, 19, 37, 40, 32, 28, 42, 70, 71, 38, 43, 72, /* EX */
+ 73, 74, 75, 76, 77, 78, 22,SYM, 39, 79, 80, 81, 31, 82, 83, 84, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
-static const unsigned char Iso_8859_9_CharToOrderMap[] =
+static const unsigned char Iso_8859_4_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 0, 22, 20, 9, 1, 14, 12, 18, 6, 23, 10, 7, 11, 3, 8, /* 4X */
- 15, 30, 2, 5, 4, 16, 13, 26, 25, 24, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 0, 22, 20, 9, 1, 14, 12, 18, 6, 23, 10, 7, 11, 3, 8, /* 6X */
- 15, 30, 2, 5, 4, 16, 13, 26, 25, 24, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ SYM, 0, 21, 20, 9, 1, 13, 12, 17, 6, 23, 11, 7, 10, 3, 8, /* 4X */
+ 18, 29, 2, 5, 4, 15, 14, 26, 25, 24, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 0, 21, 20, 9, 1, 13, 12, 17, 6, 23, 11, 7, 10, 3, 8, /* 6X */
+ 18, 29, 2, 5, 4, 15, 14, 26, 25, 24, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
- SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
- SYM,SYM,SYM,SYM,SYM, 92,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
- 93, 44, 94, 95, 17, 19, 38, 40, 32, 28, 45, 96, 97, 98, 47, 99, /* CX */
- 100,101,102,103, 35,104, 21,SYM, 37,105,106,107, 31,108,109,110, /* DX */
- 111, 44,112,113, 17, 19, 38, 40, 32, 28, 45,114,115,116, 47,117, /* EX */
- 118,119,120,121, 35,122, 21,SYM, 37,123,124,125, 31, 42,126,127, /* FX */
+ SYM, 85, 86, 87,SYM, 88, 89,SYM,SYM, 90, 91, 92, 93,SYM, 94,SYM, /* AX */
+ SYM, 95,SYM, 96,SYM, 97, 98,SYM,SYM, 99,100,101,102, 46,103, 46, /* BX */
+ 30, 33,104,105, 16, 19, 37,106,107, 28,108,109, 44, 38, 43, 48, /* CX */
+ 110,111, 35,112,113,114, 22,SYM, 39, 41,115,116, 31,117, 47,118, /* DX */
+ 30, 33,119,120, 16, 19, 37,121,122, 28,123,124, 44, 38, 43, 48, /* EX */
+ 125,126, 35,127,128,129, 22,SYM, 39, 41,130,131, 31,132, 47,SYM, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
-static const unsigned char Iso_8859_1_CharToOrderMap[] =
+static const unsigned char Iso_8859_9_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 0, 22, 20, 9, 1, 14, 12, 18, 6, 23, 10, 7, 11, 3, 8, /* 4X */
- 15, 30, 2, 5, 4, 16, 13, 26, 25, 24, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 0, 22, 20, 9, 1, 14, 12, 18, 6, 23, 10, 7, 11, 3, 8, /* 6X */
- 15, 30, 2, 5, 4, 16, 13, 26, 25, 24, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ SYM, 0, 21, 20, 9, 1, 13, 12, 17, 6, 23, 11, 7, 10, 3, 8, /* 4X */
+ 18, 29, 2, 5, 4, 15, 14, 26, 25, 24, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 0, 21, 20, 9, 1, 13, 12, 17, 6, 23, 11, 7, 10, 3, 8, /* 6X */
+ 18, 29, 2, 5, 4, 15, 14, 26, 25, 24, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
- SYM,SYM,SYM,SYM,SYM,128,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
- 129, 44,130,131, 17, 19, 38, 40, 32, 28, 45,132,133,134, 47,135, /* CX */
- 136,137,138,139, 35,140, 21,SYM, 37,141,142,143, 31,144,145,146, /* DX */
- 147, 44,148,149, 17, 19, 38, 40, 32, 28, 45,150,151,152, 47,153, /* EX */
- 154,155,156,157, 35,158, 21,SYM, 37,159,160,161, 31,162,163,164, /* FX */
+ SYM,SYM,SYM,SYM,SYM, 34,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
+ 133, 33,134,135, 16, 19, 37, 40, 32, 28, 42,136,137, 38, 43,138, /* CX */
+ 139,140,141,142,143,144, 22,SYM, 39,145,146,147, 31,148,149,150, /* DX */
+ 151, 33,152,153, 16, 19, 37, 40, 32, 28, 42,154,155, 38, 43,156, /* EX */
+ 157,158,159,160,161,162, 22,SYM, 39,163,164,165, 31, 45,166,167, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
-static const unsigned char Iso_8859_4_CharToOrderMap[] =
+static const unsigned char Iso_8859_15_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 0, 22, 20, 9, 1, 14, 12, 18, 6, 23, 10, 7, 11, 3, 8, /* 4X */
- 15, 30, 2, 5, 4, 16, 13, 26, 25, 24, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 0, 22, 20, 9, 1, 14, 12, 18, 6, 23, 10, 7, 11, 3, 8, /* 6X */
- 15, 30, 2, 5, 4, 16, 13, 26, 25, 24, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ SYM, 0, 21, 20, 9, 1, 13, 12, 17, 6, 23, 11, 7, 10, 3, 8, /* 4X */
+ 18, 29, 2, 5, 4, 15, 14, 26, 25, 24, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 0, 21, 20, 9, 1, 13, 12, 17, 6, 23, 11, 7, 10, 3, 8, /* 6X */
+ 18, 29, 2, 5, 4, 15, 14, 26, 25, 24, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
- SYM,165,166,167,SYM,168,169,SYM,SYM,170,171,172,173,SYM,174,SYM, /* AX */
- SYM,175,SYM,176,SYM,177,178,SYM,SYM,179,180,181,182, 43,183, 43, /* BX */
- 29, 44,184,185, 17, 19, 38,186,187, 28,188,189, 39,190, 47, 41, /* CX */
- 191,192, 33,193, 35,194, 21,SYM, 37, 36,195,196, 31,197, 46,198, /* DX */
- 29, 44,199,200, 17, 19, 38,201,202, 28,203,204, 39,205, 47, 41, /* EX */
- 206,207, 33,208, 35,209, 21,SYM, 37, 36,210,211, 31,212, 46,SYM, /* FX */
+ SYM,SYM,SYM,SYM,SYM,SYM,168,SYM,169,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
+ SYM,SYM,SYM,SYM,170, 34,SYM,SYM,171,SYM,SYM,SYM,172,173,174,SYM, /* BX */
+ 175, 33,176,177, 16, 19, 37, 40, 32, 28, 42,178,179, 38, 43,180, /* CX */
+ 181,182,183,184,185,186, 22,SYM, 39,187,188,189, 31,190,191,192, /* DX */
+ 193, 33,194,195, 16, 19, 37, 40, 32, 28, 42,196,197, 38, 43,198, /* EX */
+ 199,200,201,202,203,204, 22,SYM, 39,205,206,207, 31,208,209,210, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
-static const unsigned char Iso_8859_15_CharToOrderMap[] =
+static const unsigned char Windows_1252_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 0, 22, 20, 9, 1, 14, 12, 18, 6, 23, 10, 7, 11, 3, 8, /* 4X */
- 15, 30, 2, 5, 4, 16, 13, 26, 25, 24, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 0, 22, 20, 9, 1, 14, 12, 18, 6, 23, 10, 7, 11, 3, 8, /* 6X */
- 15, 30, 2, 5, 4, 16, 13, 26, 25, 24, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */
- CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
- CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
- SYM,SYM,SYM,SYM,SYM,SYM,213,SYM,214,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
- SYM,SYM,SYM,SYM,215,216,SYM,SYM,217,SYM,SYM,SYM,218,219,220,SYM, /* BX */
- 221, 44,222,223, 17, 19, 38, 40, 32, 28, 45,224,225,226, 47,227, /* CX */
- 228,229,230,231, 35,232, 21,SYM, 37,233,234,235, 31,236,237,238, /* DX */
- 239, 44,240,241, 17, 19, 38, 40, 32, 28, 45,242,243,244, 47,245, /* EX */
- 246,247,248,249, 35,249, 21,SYM, 37,249,249,249, 31,249,249,249, /* FX */
+ SYM, 0, 21, 20, 9, 1, 13, 12, 17, 6, 23, 11, 7, 10, 3, 8, /* 4X */
+ 18, 29, 2, 5, 4, 15, 14, 26, 25, 24, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 0, 21, 20, 9, 1, 13, 12, 17, 6, 23, 11, 7, 10, 3, 8, /* 6X */
+ 18, 29, 2, 5, 4, 15, 14, 26, 25, 24, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ SYM,ILL,SYM, 36,SYM,SYM,SYM,SYM,SYM,SYM,211,SYM,212,ILL,213,ILL, /* 8X */
+ ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,214,SYM,215,ILL,216,217, /* 9X */
+ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
+ SYM,SYM,SYM,SYM,SYM, 34,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
+ 218, 33,219,220, 16, 19, 37, 40, 32, 28, 42,221,222, 38, 43,223, /* CX */
+ 224,225,226,227,228,229, 22,SYM, 39,230,231,232, 31,233,234,235, /* DX */
+ 236, 33,237,238, 16, 19, 37, 40, 32, 28, 42,239,240, 38, 43,241, /* EX */
+ 242,243,244,245,246,247, 22,SYM, 39,248,249,249, 31,249,249,249, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
+static const int Unicode_Char_size = 60;
+static const unsigned int Unicode_CharOrder[] =
+{
+ 65, 0, 66, 21, 67, 20, 68, 9, 69, 1, 70, 13, 71, 12, 72, 17,
+ 73, 6, 74, 23, 75, 11, 76, 7, 77, 10, 78, 3, 79, 8, 80, 18,
+ 81, 29, 82, 2, 83, 5, 84, 4, 85, 15, 86, 14, 87, 26, 88, 25,
+ 89, 24, 90, 27, 97, 0, 98, 21, 99, 20, 100, 9, 101, 1,102, 13,
+ 103, 12, 104, 17, 105, 6, 106, 23, 107, 11, 108, 7, 109, 10,110, 3,
+ 111, 8, 112, 18, 113, 29, 114, 2, 115, 5, 116, 4, 117, 15,118, 14,
+ 119, 26, 120, 25, 121, 24, 122, 27, 196, 16, 197, 19, 201, 28,214, 22,
+ 228, 16, 229, 19, 233, 28, 246, 22,
+};
+
/* Model Table:
- * Total sequences: 748
- * First 512 sequences: 0.997323508584682
- * Next 512 sequences (512-1024): 0.0026764914153179875
- * Rest: 1.7780915628762273e-17
+ * Total sequences: 752
+ * First 512 sequences: 0.996987580875875
+ * Next 512 sequences (512-1024): 0.00301241912412493
+ * Rest: 4.640385298237959e-17
* Negative sequences: TODO
*/
static const PRUint8 SwedishLangModel[] =
{
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,0,3,2,3,3,3,3,3,2,0,0,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,3,2,3,3,3,3,3,3,0,0,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,0,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,0,2,2,2,2,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,2,2,0,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,2,2,3,0,0,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,0,2,2,2,0,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,3,3,2,3,3,2,2,0,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,0,2,0,2,0,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,0,2,0,2,2,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,0,2,0,3,3,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,0,2,2,0,2,0,
- 3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,2,3,3,3,3,0,2,3,2,0,0,0,2,0,0,0,
- 3,3,3,2,3,2,3,3,3,2,0,2,2,2,3,2,3,3,0,3,2,3,0,3,3,0,0,0,2,0,0,
- 3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,3,2,2,2,2,3,2,0,2,3,2,0,
- 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,2,0,2,0,3,2,3,2,0,3,0,0,0,2,0,
- 2,2,3,3,3,3,0,3,0,3,3,3,3,3,3,3,2,2,0,0,3,0,3,0,0,3,0,0,0,0,0,
- 3,3,3,3,3,2,3,2,3,2,2,2,2,0,0,0,3,3,2,3,2,3,2,3,3,0,0,3,0,2,0,
- 2,3,3,3,3,3,2,3,0,3,3,3,3,3,2,0,0,0,2,0,0,2,3,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,2,3,3,3,2,3,2,2,2,2,0,3,0,3,0,3,2,2,0,3,0,0,2,2,0,2,
- 3,3,3,3,3,3,2,3,2,3,3,3,3,3,2,3,0,2,2,0,3,2,2,3,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,2,2,2,0,2,2,2,3,3,2,3,3,3,3,3,3,0,0,2,2,0,0,
- 3,3,0,2,2,3,2,3,3,3,2,0,0,0,2,0,3,3,0,0,0,3,2,0,0,0,0,0,2,0,0,
- 3,2,3,3,3,3,2,3,3,3,3,3,3,2,3,3,2,0,2,0,3,0,3,2,0,3,0,2,0,0,0,
- 3,3,0,3,3,0,3,2,3,0,2,2,0,0,2,3,2,0,2,0,0,0,2,0,2,2,0,0,0,0,0,
- 3,3,2,2,2,3,3,2,3,2,2,0,0,0,0,0,2,0,2,0,0,0,0,0,2,0,2,2,0,0,0,
- 3,3,0,2,2,0,2,0,3,0,2,0,0,0,0,0,2,0,2,0,0,0,2,0,2,0,0,2,0,0,0,
- 0,3,2,2,0,2,0,2,2,2,0,0,0,2,0,2,0,0,2,0,0,2,2,0,0,0,0,0,0,0,0,
- 0,0,0,2,0,0,2,0,3,0,2,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,0,3,3,2,3,3,3,3,2,0,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,2,3,3,2,3,3,3,3,2,0,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,0,2,3,2,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,2,2,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,2,2,3,0,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,0,2,2,0,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,2,3,3,3,3,2,2,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,0,2,0,2,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,0,2,0,2,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,2,3,3,2,3,0,2,0,2,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,0,2,2,0,0,
+ 3,3,3,2,3,2,3,3,3,2,2,0,2,3,2,3,3,0,2,3,2,0,3,3,3,0,0,0,2,0,
+ 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,2,3,0,3,2,2,0,0,0,2,0,0,
+ 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,0,2,3,0,3,3,2,2,0,3,2,2,0,0,
+ 2,2,3,3,3,3,2,3,0,3,3,3,3,3,3,2,2,0,3,0,3,3,0,0,0,3,0,0,0,0,
+ 3,3,3,3,3,2,3,3,3,2,2,2,2,2,0,3,3,2,0,3,2,2,3,3,3,0,0,3,0,0,
+ 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,2,3,2,2,3,0,0,2,2,0,
+ 2,3,3,3,3,3,2,3,0,3,2,3,3,2,3,0,0,2,0,0,0,2,2,2,0,0,0,0,0,0,
+ 3,3,3,3,3,2,3,3,3,2,2,3,2,2,2,3,0,3,0,0,3,2,0,0,3,0,0,2,2,2,
+ 3,3,3,3,3,3,3,3,3,2,2,2,0,2,2,3,3,2,3,3,3,3,3,3,3,0,0,2,2,0,
+ 3,3,3,3,3,3,2,3,2,3,3,3,3,2,3,0,2,2,3,0,3,2,2,3,0,0,0,0,0,0,
+ 3,3,0,2,2,3,2,3,3,3,0,2,0,2,0,3,3,2,0,0,0,2,3,0,0,0,0,0,0,0,
+ 3,2,3,3,3,3,2,3,3,3,3,3,3,3,2,2,0,2,3,0,3,3,0,2,0,3,2,2,0,0,
+ 3,3,2,2,3,0,3,3,3,0,2,2,0,2,0,2,0,2,3,0,0,2,0,0,2,2,0,0,0,0,
+ 3,3,2,2,2,2,3,2,3,2,0,2,0,2,0,2,0,3,0,0,0,0,0,0,2,0,2,2,0,0,
+ 3,3,0,2,2,0,2,2,3,0,0,2,0,2,0,2,0,2,0,0,0,2,0,0,2,0,0,2,0,0,
+ 0,3,2,2,0,2,0,2,0,2,0,0,0,0,2,2,0,2,2,0,0,0,2,0,0,0,0,0,0,0,
+ 0,0,0,2,0,0,2,0,0,0,0,0,0,0,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
};
-const SequenceModel Windows_1252SwedishModel =
+const SequenceModel Iso_8859_1SwedishModel =
{
- Windows_1252_CharToOrderMap,
+ Iso_8859_1_CharToOrderMap,
SwedishLangModel,
- 31,
- (float)0.997323508584682,
+ 30,
+ (float)0.996987580875875,
PR_TRUE,
- "WINDOWS-1252",
+ "ISO-8859-1",
+ "sv"
+};
+
+const SequenceModel Iso_8859_4SwedishModel =
+{
+ Iso_8859_4_CharToOrderMap,
+ SwedishLangModel,
+ 30,
+ (float)0.996987580875875,
+ PR_TRUE,
+ "ISO-8859-4",
"sv"
};
@@ -225,42 +249,41 @@ const SequenceModel Iso_8859_9SwedishModel =
{
Iso_8859_9_CharToOrderMap,
SwedishLangModel,
- 31,
- (float)0.997323508584682,
+ 30,
+ (float)0.996987580875875,
PR_TRUE,
"ISO-8859-9",
"sv"
};
-const SequenceModel Iso_8859_1SwedishModel =
+const SequenceModel Iso_8859_15SwedishModel =
{
- Iso_8859_1_CharToOrderMap,
+ Iso_8859_15_CharToOrderMap,
SwedishLangModel,
- 31,
- (float)0.997323508584682,
+ 30,
+ (float)0.996987580875875,
PR_TRUE,
- "ISO-8859-1",
+ "ISO-8859-15",
"sv"
};
-const SequenceModel Iso_8859_4SwedishModel =
+const SequenceModel Windows_1252SwedishModel =
{
- Iso_8859_4_CharToOrderMap,
+ Windows_1252_CharToOrderMap,
SwedishLangModel,
- 31,
- (float)0.997323508584682,
+ 30,
+ (float)0.996987580875875,
PR_TRUE,
- "ISO-8859-4",
+ "WINDOWS-1252",
"sv"
};
-const SequenceModel Iso_8859_15SwedishModel =
+const LanguageModel SwedishModel =
{
- Iso_8859_15_CharToOrderMap,
+ "sv",
+ Unicode_CharOrder,
+ 60,
SwedishLangModel,
- 31,
- (float)0.997323508584682,
- PR_TRUE,
- "ISO-8859-15",
- "sv"
+ 30,
+ (float)0.996987580875875,
};
diff --git a/src/LangModels/LangThaiModel.cpp b/src/LangModels/LangThaiModel.cpp
index 9880e09..847745b 100644
--- a/src/LangModels/LangThaiModel.cpp
+++ b/src/LangModels/LangThaiModel.cpp
@@ -36,12 +36,13 @@
* ***** END LICENSE BLOCK ***** */
#include "../nsSBCharSetProber.h"
+#include "../nsLanguageDetector.h"
/********* Language model for: Thai *********/
/**
* Generated by BuildLangModel.py
- * On: 2015-12-04 03:05:06.182099
+ * On: 2021-03-16 20:29:56.647545
**/
/* Character Mapping Table:
@@ -61,207 +62,230 @@
* ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1
* even though they are both used for French. Same for the euro sign.
*/
-static const unsigned char Tis_620_CharToOrderMap[] =
+static const unsigned char Iso_8859_11_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 66, 70, 67, 80, 78, 87, 85, 73, 79, 93, 88, 84, 68, 77, 81, /* 4X */
- 75,101, 74, 61, 71, 86, 96, 90,103,100, 99,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 35, 64, 48, 52, 32, 60, 65, 54, 36, 97, 76, 46, 56, 41, 40, /* 6X */
- 59,104, 43, 45, 44, 55, 72, 82, 94, 57, 92,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ SYM, 66, 74, 57, 76, 86, 83, 87, 80, 64, 95, 94, 79, 78, 77, 82, /* 4X */
+ 75,104, 72, 56, 71, 93, 81, 89,103, 96, 97,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 37, 73, 51, 54, 38, 68, 67, 49, 39, 99, 84, 48, 58, 42, 44, /* 6X */
+ 65,101, 45, 47, 46, 50, 70, 85,100, 61, 92,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
- ILL, 3, 23,105, 15,106, 89, 5, 21, 63, 26, 31,102, 42, 69, 58, /* AX */
- 49, 91, 83, 34, 9, 17, 30, 12, 39, 1, 16, 19, 33, 62, 22, 47, /* BX */
- 38, 7, 10, 2, 50, 11,107, 8, 28, 37, 13, 18, 98, 4, 53, 95, /* CX */
- 14,SYM, 0, 29,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,ILL,ILL,ILL,SYM, /* DX */
- 6, 20, 27, 24, 25,108, 51,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,109, /* EX */
+ SYM, 3, 25,106, 15,107, 88, 6, 22, 55, 23, 29,102, 41, 69, 59, /* AX */
+ 40, 90, 63, 35, 11, 14, 32, 13, 33, 1, 17, 18, 31, 62, 21, 43, /* BX */
+ 34, 7, 9, 2, 53, 10,108, 8, 26, 36, 12, 20, 91, 4, 52, 98, /* CX */
+ 16,SYM, 0, 30,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,ILL,ILL,ILL,SYM, /* DX */
+ 5, 19, 27, 24, 28,105, 60,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,109, /* EX */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,110,111,ILL,ILL,ILL,ILL, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
-static const unsigned char Iso_8859_11_CharToOrderMap[] =
+static const unsigned char Tis_620_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 66, 70, 67, 80, 78, 87, 85, 73, 79, 93, 88, 84, 68, 77, 81, /* 4X */
- 75,101, 74, 61, 71, 86, 96, 90,103,100, 99,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 35, 64, 48, 52, 32, 60, 65, 54, 36, 97, 76, 46, 56, 41, 40, /* 6X */
- 59,104, 43, 45, 44, 55, 72, 82, 94, 57, 92,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ SYM, 66, 74, 57, 76, 86, 83, 87, 80, 64, 95, 94, 79, 78, 77, 82, /* 4X */
+ 75,104, 72, 56, 71, 93, 81, 89,103, 96, 97,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 37, 73, 51, 54, 38, 68, 67, 49, 39, 99, 84, 48, 58, 42, 44, /* 6X */
+ 65,101, 45, 47, 46, 50, 70, 85,100, 61, 92,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
- SYM, 3, 23,112, 15,113, 89, 5, 21, 63, 26, 31,102, 42, 69, 58, /* AX */
- 49, 91, 83, 34, 9, 17, 30, 12, 39, 1, 16, 19, 33, 62, 22, 47, /* BX */
- 38, 7, 10, 2, 50, 11,114, 8, 28, 37, 13, 18, 98, 4, 53, 95, /* CX */
- 14,SYM, 0, 29,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,ILL,ILL,ILL,SYM, /* DX */
- 6, 20, 27, 24, 25,115, 51,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,116, /* EX */
- NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,117,118,ILL,ILL,ILL,ILL, /* FX */
+ ILL, 3, 25,112, 15,113, 88, 6, 22, 55, 23, 29,102, 41, 69, 59, /* AX */
+ 40, 90, 63, 35, 11, 14, 32, 13, 33, 1, 17, 18, 31, 62, 21, 43, /* BX */
+ 34, 7, 9, 2, 53, 10,114, 8, 26, 36, 12, 20, 91, 4, 52, 98, /* CX */
+ 16,SYM, 0, 30,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,ILL,ILL,ILL,SYM, /* DX */
+ 5, 19, 27, 24, 28,105, 60,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,115, /* EX */
+ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,116,117,ILL,ILL,ILL,ILL, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
+static const int Unicode_Char_size = 64;
+static const unsigned int Unicode_CharOrder[] =
+{
+ 67, 57, 83, 56, 97, 37, 99, 51, 100, 54, 101, 38, 104, 49, 105, 39,
+ 108, 48, 109, 58, 110, 42, 111, 44, 114, 45, 115, 47, 116, 46, 117, 50,
+ 121, 61, 3585, 3, 3586, 25, 3588, 15, 3591, 6, 3592, 22, 3593, 55,3594, 23,
+ 3595, 29, 3597, 41, 3599, 59, 3600, 40, 3602, 63, 3603, 35, 3604, 11,3605, 14,
+ 3606, 32, 3607, 13, 3608, 33, 3609, 1, 3610, 17, 3611, 18, 3612, 31,3613, 62,
+ 3614, 21, 3615, 43, 3616, 34, 3617, 7, 3618, 9, 3619, 2, 3620, 53,3621, 10,
+ 3623, 8, 3624, 26, 3625, 36, 3626, 12, 3627, 20, 3629, 4, 3630, 52,3632, 16,
+ 3634, 0, 3635, 30, 3648, 5, 3649, 19, 3650, 27, 3651, 24, 3652, 28,3654, 60,
+};
+
/* Model Table:
- * Total sequences: 2324
- * First 512 sequences: 0.8815720594354438
- * Next 512 sequences (512-1024): 0.0920860122682917
- * Rest: 0.026341928296264486
+ * Total sequences: 2704
+ * First 512 sequences: 0.8690353564146914
+ * Next 512 sequences (512-1024): 0.09940380137019393
+ * Rest: 0.03156084221511464
* Negative sequences: TODO
*/
static const PRUint8 ThaiLangModel[] =
{
- 0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,3,
- 0,2,3,0,0,3,2,3,0,0,2,0,0,0,0,2,0,1,1,1,0,2,0,0,0,0,1,0,0,0,1,1,
- 3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,
- 0,3,0,0,0,1,3,3,0,0,1,0,0,0,0,2,0,2,1,2,0,1,0,0,0,0,0,0,0,0,2,1,
- 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,3,3,3,3,3,3,2,2,2,3,1,3,2,
- 0,2,3,0,0,2,2,1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,2,1,
- 3,3,3,3,3,2,3,3,3,3,2,3,3,3,2,3,2,3,3,3,3,3,3,3,3,3,2,3,2,3,2,3,
- 0,2,1,0,0,3,2,1,0,0,0,0,0,0,0,1,0,3,3,1,0,1,0,0,0,0,3,0,0,0,1,1,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,2,2,2,3,3,2,2,1,2,2,2,
- 0,2,0,0,0,0,2,2,0,0,1,0,0,0,0,2,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,
- 3,3,3,3,3,2,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,
- 0,3,0,0,0,1,2,2,0,0,1,0,0,0,0,2,0,1,1,2,0,2,0,0,0,0,0,0,0,0,2,1,
- 0,3,3,3,3,2,0,3,3,3,3,3,3,3,0,3,3,3,3,3,0,3,3,3,0,0,3,0,3,0,1,3,
- 0,2,0,0,0,2,2,2,0,0,0,0,0,0,0,3,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,3,
- 3,3,3,3,3,2,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3,2,3,3,3,3,2,2,1,0,2,1,
- 0,2,2,0,1,2,2,1,0,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,0,1,1,
- 3,3,3,3,3,3,3,3,2,3,3,3,3,2,2,2,3,2,2,2,3,3,3,2,2,2,2,2,2,0,2,2,
- 0,1,2,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,3,1,0,1,0,0,0,0,0,0,0,0,1,1,
- 3,3,3,3,3,3,3,2,3,2,3,3,3,3,0,3,2,3,2,2,3,2,2,3,3,3,2,2,1,3,2,1,
- 0,1,0,0,0,0,2,1,0,0,0,0,0,0,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,0,1,1,
- 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,2,1,2,2,
- 0,2,0,0,0,0,3,1,0,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,0,1,1,
- 3,3,2,3,3,3,3,3,3,3,2,3,3,3,3,2,2,3,2,2,2,2,1,3,2,2,2,2,1,3,1,2,
- 0,1,0,0,0,0,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,1,
- 3,3,3,1,2,1,2,1,2,3,3,1,1,2,2,3,2,1,2,1,1,1,2,1,1,1,1,1,3,3,0,1,
- 0,0,0,0,0,1,1,3,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,2,3,3,3,3,3,3,2,3,2,2,2,2,3,3,3,2,2,1,1,1,2,2,1,2,1,3,3,2,
- 0,1,0,0,0,0,2,2,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
- 0,3,3,3,3,1,3,3,3,3,3,2,3,3,0,3,3,3,3,3,3,3,3,2,3,3,3,3,2,0,2,2,
- 0,2,1,0,0,0,2,2,0,0,1,0,0,0,0,1,0,1,1,0,0,2,0,0,0,0,1,0,0,0,1,1,
- 3,3,3,1,3,2,2,3,3,2,2,3,1,1,2,2,1,2,1,2,1,3,1,1,1,1,1,2,0,3,0,1,
- 0,0,2,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,
- 3,3,3,3,3,1,3,2,3,3,2,3,3,3,1,3,3,3,3,3,3,2,2,2,3,3,2,2,2,2,2,2,
- 0,2,0,0,0,0,2,1,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,
- 3,3,3,3,3,1,2,1,2,1,3,2,2,2,3,1,2,2,1,1,2,1,1,2,2,1,1,2,1,3,3,1,
- 0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
- 3,3,3,1,2,1,0,3,3,1,2,3,1,1,1,0,0,3,1,1,0,0,1,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,2,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,2,3,3,3,1,2,1,2,2,2,3,2,2,2,1,1,2,1,2,2,2,1,1,2,2,1,1,1,0,2,1,
- 0,1,0,0,0,0,1,1,0,0,1,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,3,0,0,0,0,0,
- 0,3,3,3,3,1,0,3,2,2,2,3,3,3,0,3,3,3,3,3,0,1,2,2,0,0,1,0,0,0,3,3,
- 0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,
- 3,3,3,3,3,1,3,2,2,2,1,1,2,2,3,2,1,2,1,1,2,3,3,2,2,2,1,2,0,3,1,2,
- 0,1,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
- 3,1,3,2,3,1,2,2,3,2,3,3,3,2,0,1,3,1,1,1,2,2,1,2,1,1,1,1,1,1,1,0,
- 0,1,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,1,1,3,0,1,1,2,1,2,1,2,1,0,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1,1,1,
- 0,0,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,3,0,3,0,0,0,0,0,2,1,0,0,2,0,1,1,3,3,1,0,3,0,0,0,0,3,0,0,0,0,0,
- 0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,1,3,2,2,0,0,3,3,3,0,2,3,1,0,2,2,2,2,3,0,1,1,3,0,0,1,0,0,0,1,2,
- 0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,
- 3,3,1,2,3,1,2,2,2,1,2,2,2,2,1,1,2,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,
- 0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
- 0,3,3,2,3,0,0,2,1,3,2,3,3,1,0,3,2,3,1,2,0,2,2,1,0,0,1,0,1,0,1,2,
- 0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,
- 3,3,2,2,2,0,2,2,2,1,2,1,2,2,0,1,1,2,1,1,2,2,1,2,2,2,1,1,1,0,1,1,
- 0,0,0,0,0,2,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,0,
- 0,3,3,3,2,2,3,2,2,2,1,3,2,2,0,3,2,2,3,1,3,1,2,2,3,2,1,2,1,0,2,1,
- 0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,
- 3,2,1,1,2,1,2,2,2,1,1,2,2,1,1,1,2,1,1,1,2,1,1,1,2,1,1,1,1,0,1,0,
- 0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
- 3,3,1,1,3,2,2,1,1,1,1,2,1,0,1,1,1,2,0,1,1,0,0,0,0,1,1,1,0,0,0,1,
- 0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
- 2,0,0,2,2,0,0,0,2,3,0,3,2,3,3,0,2,0,0,0,2,0,1,2,2,1,0,2,2,1,0,0,
- 1,2,0,1,0,1,1,1,1,1,2,3,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,
+ 1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,2,
+ 2,3,2,3,3,0,0,0,1,2,0,2,0,0,0,0,0,0,0,0,2,1,0,1,0,0,0,1,0,0,1,0,
+ 3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,
+ 3,3,3,1,1,0,0,0,2,1,0,2,0,0,0,0,0,0,0,0,1,2,0,1,0,0,0,0,1,0,2,0,
+ 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,2,3,2,2,1,2,
+ 3,1,2,3,3,0,0,0,2,1,0,2,0,0,0,0,0,0,0,0,1,1,0,2,0,0,0,1,0,0,1,1,
+ 3,3,3,3,3,3,2,3,3,2,3,3,3,3,3,3,2,3,3,3,2,2,3,2,3,3,2,3,3,3,3,2,
+ 2,1,2,1,3,0,0,0,2,1,0,3,0,0,0,0,0,0,0,0,1,3,0,1,1,0,0,2,1,0,1,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,3,2,1,2,3,2,3,2,
+ 2,2,2,0,1,0,0,0,1,1,0,3,0,0,0,0,0,0,0,0,2,1,0,1,0,0,0,0,0,0,1,0,
+ 0,3,3,3,3,0,2,3,3,3,3,3,3,3,3,3,0,3,3,1,3,3,3,3,0,3,3,0,0,3,0,2,
+ 1,3,2,2,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,2,0,0,3,0,0,0,1,0,0,1,0,
+ 3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,2,3,3,3,1,3,
+ 3,1,2,0,1,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,0,2,0,
+ 3,3,3,3,3,3,2,3,3,2,2,3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,2,3,3,1,0,2,
+ 2,1,3,2,2,0,0,0,1,1,0,2,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,1,0,
+ 3,3,3,3,3,3,3,3,2,3,3,2,2,3,2,2,2,3,2,2,2,2,3,2,2,2,1,3,3,2,0,1,
+ 1,0,1,2,0,0,0,0,0,2,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,
+ 3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,1,2,
+ 2,2,2,1,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,1,0,2,0,
+ 3,3,2,3,3,3,3,2,2,2,3,3,3,3,2,2,3,2,3,2,2,1,2,3,2,2,1,2,2,2,2,1,
+ 1,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,
+ 3,3,3,3,3,3,3,2,2,3,3,2,3,3,3,2,1,2,2,3,2,2,3,2,3,3,1,2,2,2,2,2,
+ 1,1,2,0,1,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,0,1,0,
+ 3,3,3,3,3,3,3,3,3,1,2,3,2,2,3,1,2,2,2,2,3,1,1,1,2,1,1,3,2,2,3,1,
+ 3,2,3,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,
+ 3,3,3,1,3,2,1,2,3,3,1,3,2,1,1,3,2,2,1,1,3,2,1,1,1,1,3,1,1,1,3,1,
+ 0,3,1,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,3,3,3,1,2,3,3,3,1,3,2,2,1,3,3,1,2,1,1,2,2,2,2,1,1,2,1,2,2,
+ 1,1,2,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,
+ 3,3,3,2,3,3,2,3,3,2,3,2,2,2,2,2,2,2,1,2,2,1,1,1,2,1,1,3,1,3,3,1,
+ 1,1,1,3,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,1,0,
+ 0,3,3,3,3,3,1,3,3,3,2,3,3,3,3,3,1,3,3,3,3,3,3,3,3,2,2,3,3,2,0,2,
+ 2,3,2,1,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,2,1,0,1,0,0,0,0,0,0,1,0,
+ 3,3,3,3,3,3,1,2,3,2,2,3,3,3,3,3,1,3,3,3,2,2,2,2,2,2,1,3,3,2,1,3,
+ 1,1,2,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,2,1,0,1,0,
+ 3,3,3,3,3,2,1,1,2,2,3,2,2,2,2,1,2,1,2,2,1,1,2,1,2,1,1,2,2,1,0,1,
+ 2,1,1,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,3,0,0,1,0,
+ 0,3,3,3,3,0,1,3,2,2,3,3,3,3,3,3,0,3,3,0,3,3,2,2,0,3,0,0,0,2,0,3,
+ 2,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,
+ 3,3,3,2,1,0,1,3,3,2,3,1,1,1,3,1,1,1,2,0,0,1,0,0,0,0,1,0,0,0,0,0,
+ 0,0,2,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,2,3,1,2,2,1,2,2,3,3,1,2,2,1,1,1,3,1,2,1,1,1,2,2,2,2,1,1,2,1,1,
+ 1,1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,3,0,0,0,0,0,0,0,0,1,0,
+ 3,3,3,2,3,2,1,3,1,1,2,2,2,2,2,1,3,2,1,2,1,2,3,1,2,2,1,1,2,1,3,1,
+ 1,1,0,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,
+ 3,3,1,2,3,2,1,2,2,2,2,1,2,2,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,0,0,0,0,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
+ 0,3,0,2,0,0,0,0,0,1,0,2,2,0,3,1,0,2,0,0,3,0,3,3,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
+ 3,3,1,1,3,1,1,1,2,2,1,1,1,1,3,1,1,2,1,1,1,1,1,1,1,1,0,1,1,1,1,0,
+ 1,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,2,3,2,2,2,1,2,2,2,2,1,2,2,3,1,1,2,1,2,1,1,3,1,2,2,1,1,2,2,0,1,
+ 1,0,1,0,2,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,
+ 0,3,3,2,3,0,1,3,1,2,3,3,1,2,3,3,0,2,3,0,1,3,2,1,0,1,1,0,0,2,0,1,
+ 1,1,1,1,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,
+ 0,2,3,2,2,0,1,3,3,0,2,3,1,3,2,2,0,2,3,0,2,2,1,1,0,2,1,0,0,2,0,0,
+ 1,1,0,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,
+ 3,2,1,1,2,2,1,2,1,1,2,1,1,1,2,1,1,2,1,1,1,1,0,1,0,0,0,2,1,1,0,1,
+ 0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 1,3,3,3,2,3,2,2,2,1,3,2,2,2,1,3,0,2,2,3,3,1,2,1,3,2,1,1,2,1,0,1,
+ 1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,
+ 1,2,0,1,1,0,1,1,1,2,3,1,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,1,2,2,1,1,1,1,1,1,1,1,2,2,3,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1,1,
- 0,0,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
- 1,0,0,1,2,0,0,0,1,3,0,3,3,2,3,0,2,0,0,0,2,0,1,1,2,2,0,2,1,1,0,0,
- 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 2,0,0,3,1,0,0,0,3,3,0,2,3,3,2,0,3,0,0,0,2,0,1,1,2,0,0,1,1,0,0,0,
- 3,1,1,2,1,0,1,1,1,1,2,0,2,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1,1,
- 0,1,3,0,0,1,2,0,0,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,1,0,0,0,1,0,
- 3,0,2,1,1,0,0,1,0,0,1,0,2,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,1,3,1,2,1,1,2,1,1,1,0,1,1,0,1,1,1,1,1,1,0,0,1,1,1,0,1,1,1,0,0,
- 0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 2,0,0,1,1,0,0,0,1,3,0,3,2,2,2,0,2,0,0,0,2,0,1,2,2,1,0,2,3,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
- 3,0,0,2,2,0,0,0,2,2,0,1,3,2,1,0,2,0,0,0,3,0,1,1,1,1,0,0,1,0,0,0,
- 3,1,1,1,1,0,2,1,1,0,0,1,2,1,0,1,1,1,2,1,1,1,1,1,2,1,2,1,1,0,1,1,
- 0,0,0,0,0,0,1,0,0,0,3,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,
+ 3,2,1,1,2,2,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,
+ 1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,2,3,1,2,1,1,2,1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,
+ 0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,
+ 3,1,1,1,1,0,0,0,0,1,0,0,1,1,1,1,1,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,
+ 0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,
+ 3,1,2,2,1,1,1,1,1,1,1,1,2,1,1,1,3,1,2,1,2,1,1,1,1,1,2,1,1,1,0,0,
+ 0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
+ 3,1,1,1,1,1,0,1,1,2,1,1,1,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,
+ 1,0,2,3,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,1,1,2,0,0,3,0,1,3,3,2,3,2,1,2,0,0,2,0,0,1,2,0,0,2,0,0,
+ 0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,
+ 0,0,0,0,0,2,1,2,0,0,3,1,1,3,2,3,2,1,1,2,0,0,2,0,0,0,2,0,0,1,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,0,0,3,3,0,0,0,2,2,0,2,2,2,1,0,2,0,0,0,2,0,1,1,1,2,0,1,1,0,0,0,
+ 0,0,0,0,0,2,2,0,0,0,3,0,3,2,3,3,2,1,1,3,0,0,2,0,0,0,2,0,0,1,0,0,
+ 3,1,1,2,2,2,0,2,1,1,1,1,2,1,1,1,0,3,2,2,1,1,1,1,1,1,1,1,1,1,0,1,
+ 1,2,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,
+ 3,1,1,2,1,2,0,1,1,1,1,1,1,1,1,1,1,1,1,2,2,1,1,2,1,1,0,1,1,1,0,0,
+ 1,1,1,0,0,0,0,0,1,2,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,0,0,2,3,0,0,0,2,1,0,2,2,2,1,0,1,0,0,0,1,0,3,2,1,2,0,1,1,0,0,0,
- 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 2,0,0,1,2,0,0,0,2,1,0,1,3,2,1,0,2,0,0,0,1,0,2,1,1,1,0,1,0,0,0,0,
+ 0,0,0,0,0,3,2,2,0,0,1,0,2,1,3,2,1,1,1,2,0,0,2,0,0,1,1,0,0,1,0,0,
+ 2,2,2,1,3,3,0,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,0,0,2,2,0,0,0,2,2,0,0,1,1,2,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,0,0,
- 1,1,3,2,2,0,2,1,1,1,1,2,1,1,0,1,1,2,1,0,1,1,1,1,1,1,1,1,0,0,0,1,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 2,0,0,2,2,0,0,0,2,0,0,1,2,1,1,0,1,0,0,0,0,0,2,1,0,1,0,0,0,0,0,0,
- 3,1,1,1,2,0,1,2,1,0,0,0,1,2,0,1,2,1,1,1,1,0,0,0,1,1,0,1,1,0,0,1,
- 0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,3,0,0,0,0,0,2,0,0,1,0,0,1,0,2,2,0,0,1,0,0,0,0,0,0,2,0,1,0,
- 0,0,0,0,0,3,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,1,1,0,1,1,1,0,0,0,1,0,0,1,0,1,0,0,1,0,1,1,1,1,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,1,1,1,0,0,3,0,2,3,2,1,2,1,2,2,0,0,2,0,0,0,2,0,0,1,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,
+ 0,0,0,0,0,3,3,3,0,0,2,0,2,1,2,2,1,1,2,2,0,0,2,0,0,1,1,0,0,2,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 2,0,0,2,2,0,0,0,2,0,0,1,0,1,1,0,1,0,0,0,1,0,1,1,1,2,0,0,2,0,0,0,
- 2,1,1,0,2,0,2,1,1,1,1,2,1,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,2,3,3,0,0,1,0,2,2,1,1,1,2,2,1,0,0,1,0,0,1,1,0,0,2,0,0,
+ 0,0,1,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,
+ 0,0,0,0,0,1,2,2,0,0,1,1,2,1,3,2,1,2,1,1,0,0,1,0,0,0,1,0,0,1,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,0,0,2,2,0,0,0,2,1,0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,
+ 0,0,0,0,0,2,2,2,0,0,1,0,2,0,2,1,2,0,1,1,0,0,1,0,0,0,1,0,0,1,0,0,
+ 0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,2,3,2,0,0,1,0,2,2,1,1,1,1,2,1,0,0,1,0,0,0,1,0,0,1,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 1,0,0,1,1,0,0,0,0,2,0,2,2,2,2,0,2,0,0,0,2,0,1,0,1,1,0,1,1,1,0,0,
+ 0,0,0,0,0,2,1,1,0,0,2,0,1,2,2,2,2,1,0,1,0,0,1,0,0,0,2,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 2,0,0,2,2,0,0,0,2,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,2,0,1,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
- 1,0,0,1,1,0,0,0,1,1,0,0,1,2,1,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,
- 1,0,1,2,1,0,1,1,1,1,0,1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,0,0,0,1,1,
+ 0,0,0,0,0,2,2,2,0,0,0,0,2,1,2,1,1,2,1,1,0,0,0,0,0,1,0,0,0,1,0,0,
+ 2,1,1,1,2,1,0,1,1,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,1,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,1,0,1,0,0,0,1,0,0,0,2,0,2,2,1,0,0,0,0,1,1,0,1,0,0,2,0,0,0,0,0,
+ 0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,2,2,2,0,0,1,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,0,0,1,0,0,
+ 2,1,1,1,0,0,1,1,1,1,2,1,0,0,0,0,1,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 2,0,0,2,1,0,0,0,2,0,0,2,1,1,2,0,0,0,0,0,0,0,2,1,1,2,0,1,0,0,0,0,
+ 0,0,0,0,0,1,1,1,0,0,1,0,1,1,2,0,1,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 2,0,0,1,2,0,0,0,2,1,0,2,1,0,0,0,0,0,0,0,0,0,0,2,0,1,0,0,2,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 1,0,0,1,1,0,0,0,1,0,0,0,2,0,0,0,2,0,0,0,0,0,1,1,1,1,0,1,1,1,0,0,
- 0,1,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,2,1,1,0,0,0,0,3,1,0,0,1,2,1,0,0,0,0,0,1,1,0,0,0,1,0,0,
+ 0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,2,2,2,0,0,1,0,2,1,1,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,
+ 1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,0,
+ 1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
+ 0,0,0,0,1,1,0,1,1,0,0,1,1,1,1,1,0,0,1,1,1,0,1,0,1,1,0,1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 2,1,1,0,0,0,0,1,1,1,1,2,0,0,1,0,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
+ 0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,2,1,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,0,0,
+ 1,1,2,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 1,3,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
+ 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
};
+const SequenceModel Iso_8859_11ThaiModel =
+{
+ Iso_8859_11_CharToOrderMap,
+ ThaiLangModel,
+ 64,
+ (float)0.8690353564146914,
+ PR_FALSE,
+ "ISO-8859-11",
+ "th"
+};
+
const SequenceModel Tis_620ThaiModel =
{
Tis_620_CharToOrderMap,
ThaiLangModel,
64,
- (float)0.8815720594354438,
+ (float)0.8690353564146914,
PR_FALSE,
"TIS-620",
"th"
};
-const SequenceModel Iso_8859_11ThaiModel =
+const LanguageModel ThaiModel =
{
- Iso_8859_11_CharToOrderMap,
+ "th",
+ Unicode_CharOrder,
+ 64,
ThaiLangModel,
64,
- (float)0.8815720594354438,
- PR_FALSE,
- "ISO-8859-11",
- "th"
+ (float)0.8690353564146914,
};
diff --git a/src/LangModels/LangTurkishModel.cpp b/src/LangModels/LangTurkishModel.cpp
index 16c133f..c1b16c1 100644
--- a/src/LangModels/LangTurkishModel.cpp
+++ b/src/LangModels/LangTurkishModel.cpp
@@ -36,12 +36,13 @@
* ***** END LICENSE BLOCK ***** */
#include "../nsSBCharSetProber.h"
+#include "../nsLanguageDetector.h"
/********* Language model for: Turkish *********/
/**
* Generated by BuildLangModel.py
- * On: 2015-12-04 02:24:44.730727
+ * On: 2021-03-16 20:34:51.083622
**/
/* Character Mapping Table:
@@ -67,18 +68,18 @@ static const unsigned char Iso_8859_3_CharToOrderMap[] =
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 0, 15, 21, 7, 1, 26, 22, 19, 6, 28, 9, 5, 11, 3, 14, /* 4X */
- 23, 34, 4, 10, 8, 12, 20, 29, 32, 13, 18,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 0, 15, 21, 7, 1, 26, 22, 19, 2, 28, 9, 5, 11, 3, 14, /* 6X */
- 23, 34, 4, 10, 8, 12, 20, 29, 32, 13, 18,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ SYM, 0, 15, 22, 9, 1, 27, 21, 19, 6, 28, 7, 5, 11, 3, 14, /* 4X */
+ 23, 35, 4, 10, 8, 12, 18, 29, 33, 13, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 0, 15, 22, 9, 1, 27, 21, 19, 2, 28, 7, 5, 11, 3, 14, /* 6X */
+ 23, 35, 4, 10, 8, 12, 18, 29, 33, 13, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
- SYM, 48,SYM,SYM,SYM,ILL, 49,SYM,SYM, 2, 17, 25, 50,SYM,ILL, 51, /* AX */
- SYM, 52,SYM,SYM,SYM,SYM, 53,SYM,SYM, 6, 17, 25, 54,SYM,ILL, 55, /* BX */
- 41, 36, 30,ILL, 39, 56, 57, 24, 42, 33, 58, 45, 59, 37, 31, 60, /* CX */
- ILL, 47, 61, 38, 62, 63, 27,SYM, 64, 65, 40, 35, 16, 66, 67, 68, /* DX */
- 41, 36, 30,ILL, 39, 69, 70, 24, 42, 33, 71, 45, 72, 37, 31, 73, /* EX */
- ILL, 47, 74, 38, 75, 76, 27,SYM, 77, 78, 40, 35, 16, 79, 80,SYM, /* FX */
+ SYM, 54,SYM,SYM,SYM,ILL, 55,SYM,SYM, 2, 17, 26, 56,SYM,ILL, 50, /* AX */
+ SYM, 57,SYM,SYM,SYM,SYM, 58,SYM,SYM, 6, 17, 26, 59,SYM,ILL, 50, /* BX */
+ 48, 36, 30,ILL, 39, 60, 61, 24, 40, 34, 62, 44, 63, 37, 31, 64, /* CX */
+ ILL, 41, 52, 38, 46, 51, 25,SYM, 65, 66, 45, 32, 16, 67, 68, 69, /* DX */
+ 48, 36, 30,ILL, 39, 70, 71, 24, 40, 34, 72, 44, 73, 37, 31, 74, /* EX */
+ ILL, 41, 52, 38, 46, 51, 25,SYM, 75, 76, 45, 32, 16, 77, 78,SYM, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
@@ -88,67 +89,78 @@ static const unsigned char Iso_8859_9_CharToOrderMap[] =
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 0, 15, 21, 7, 1, 26, 22, 19, 6, 28, 9, 5, 11, 3, 14, /* 4X */
- 23, 34, 4, 10, 8, 12, 20, 29, 32, 13, 18,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 0, 15, 21, 7, 1, 26, 22, 19, 2, 28, 9, 5, 11, 3, 14, /* 6X */
- 23, 34, 4, 10, 8, 12, 20, 29, 32, 13, 18,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ SYM, 0, 15, 22, 9, 1, 27, 21, 19, 6, 28, 7, 5, 11, 3, 14, /* 4X */
+ 23, 35, 4, 10, 8, 12, 18, 29, 33, 13, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 0, 15, 22, 9, 1, 27, 21, 19, 2, 28, 7, 5, 11, 3, 14, /* 6X */
+ 23, 35, 4, 10, 8, 12, 18, 29, 33, 13, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
- SYM,SYM,SYM,SYM,SYM, 81,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
- 41, 36, 30, 44, 39, 82, 46, 24, 42, 33, 83, 45, 84, 37, 31, 85, /* CX */
- 25, 47, 86, 38, 87, 88, 27,SYM, 43, 89, 40, 35, 16, 2, 17, 90, /* DX */
- 41, 36, 30, 44, 39, 91, 46, 24, 42, 33, 92, 45, 93, 37, 31, 94, /* EX */
- 25, 47, 95, 38, 96, 97, 27,SYM, 43, 98, 40, 35, 16, 6, 17, 99, /* FX */
+ SYM,SYM,SYM,SYM,SYM, 79,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
+ 48, 36, 30, 47, 39, 42, 49, 24, 40, 34, 80, 44, 81, 37, 31, 82, /* CX */
+ 26, 41, 52, 38, 46, 83, 25,SYM, 43, 84, 45, 32, 16, 2, 17, 85, /* DX */
+ 48, 36, 30, 47, 39, 42, 49, 24, 40, 34, 86, 44, 87, 37, 31, 88, /* EX */
+ 26, 41, 52, 38, 46, 89, 25,SYM, 43, 90, 45, 32, 16, 6, 17, 53, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
+static const int Unicode_Char_size = 66;
+static const unsigned int Unicode_CharOrder[] =
+{
+ 65, 0, 66, 15, 67, 22, 68, 9, 69, 1, 70, 27, 71, 21, 72, 19,
+ 73, 2, 73, 6, 74, 28, 75, 7, 76, 5, 77, 11, 78, 3, 79, 14,
+ 80, 23, 82, 4, 83, 10, 84, 8, 85, 12, 86, 18, 87, 29, 89, 13,
+ 90, 20, 97, 0, 98, 15, 99, 22, 100, 9, 101, 1, 102, 27,103, 21,
+ 104, 19, 105, 2, 106, 28, 107, 7, 108, 5, 109, 11, 110, 3,111, 14,
+ 112, 23, 114, 4, 115, 10, 116, 8, 117, 12, 118, 18, 119, 29,121, 13,
+ 122, 20, 194, 30, 199, 24, 206, 31, 214, 25, 219, 32, 220, 16,226, 30,
+ 231, 24, 238, 31, 246, 25, 251, 32, 252, 16, 286, 26, 287, 26,305, 6,
+ 350, 17, 351, 17,
+};
+
/* Model Table:
- * Total sequences: 935
- * First 512 sequences: 0.991865243864388
- * Next 512 sequences (512-1024): 0.008134756135611957
- * Rest: 2.949029909160572e-17
+ * Total sequences: 1097
+ * First 512 sequences: 0.9923593121944019
+ * Next 512 sequences (512-1024): 0.007545326169453709
+ * Rest: 9.536163614441446e-05
* Negative sequences: TODO
*/
static const PRUint8 TurkishLangModel[] =
{
- 3,2,3,3,3,3,2,3,3,3,3,3,3,3,2,3,0,3,3,3,3,3,3,3,3,3,3,0,3,3,0,2,2,2,2,0,
- 3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,3,2,0,3,0,2,0,
- 3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,0,2,2,2,0,2,0,2,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,0,3,2,2,2,2,2,2,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,3,2,2,2,2,2,2,2,
- 3,3,3,2,2,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,2,3,0,3,2,2,2,2,3,0,2,2,2,
- 3,2,0,3,3,3,3,3,3,3,3,3,2,3,2,3,0,3,3,2,3,3,2,3,2,3,2,0,0,0,0,0,2,0,0,0,
- 3,3,3,2,3,3,3,3,2,2,2,2,3,3,3,2,3,0,2,2,2,2,2,2,0,0,0,3,2,3,2,2,0,0,0,0,
- 3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,3,3,2,2,2,3,0,2,3,2,2,3,2,2,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,3,2,2,2,2,3,0,2,3,2,2,3,0,0,0,0,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,3,3,3,2,3,3,0,2,3,0,2,2,0,0,2,2,2,
- 3,3,3,2,3,3,3,3,2,2,3,3,3,3,3,3,3,2,3,3,0,3,2,3,2,0,2,2,0,2,3,2,2,2,2,2,
- 3,3,3,3,3,3,0,3,3,3,3,3,2,3,2,3,0,3,3,3,3,3,3,3,3,3,2,0,2,2,0,0,2,2,0,0,
- 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,2,2,2,3,2,2,0,2,3,0,2,2,0,0,2,0,2,
- 3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,3,0,2,2,2,0,0,
- 3,3,3,3,3,3,3,3,0,2,2,3,3,3,3,3,3,0,2,2,2,2,0,2,0,0,0,3,2,2,2,0,0,2,0,0,
- 2,2,2,3,3,3,0,3,3,3,3,3,0,3,2,3,0,3,3,3,3,3,2,3,3,3,3,0,2,0,0,0,0,0,0,0,
- 3,3,3,0,2,3,3,2,3,3,2,3,3,2,2,3,3,2,0,2,2,2,2,2,3,0,2,2,0,0,2,2,0,0,0,0,
- 3,3,3,2,2,3,3,3,2,2,0,3,3,3,3,2,3,0,2,2,0,3,3,0,0,0,0,2,0,0,2,2,0,0,0,0,
- 3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,2,2,2,2,2,0,2,3,0,2,0,0,2,3,2,0,2,0,2,
- 3,3,3,2,3,3,2,2,0,2,3,2,3,3,3,2,2,2,2,2,3,2,2,0,0,0,2,0,0,0,2,2,0,0,0,0,
- 3,3,3,2,3,3,3,2,3,3,2,2,3,2,3,2,3,0,2,3,0,2,0,0,0,0,0,2,0,0,2,0,0,2,2,2,
- 3,3,3,2,3,3,3,2,2,2,2,0,3,2,3,0,3,0,2,3,2,0,2,2,0,0,2,3,2,2,2,0,0,2,0,0,
- 3,3,3,0,3,3,3,2,3,2,3,3,3,2,3,2,2,0,2,3,0,2,2,3,2,0,2,0,0,2,2,0,2,2,0,0,
- 3,3,3,0,2,3,3,2,3,2,0,3,3,2,3,2,3,2,0,0,0,0,2,2,0,0,0,3,0,0,0,0,0,0,0,0,
- 3,3,3,0,3,3,3,3,0,0,0,3,3,0,0,2,3,2,2,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,2,3,3,3,2,3,2,2,0,3,3,3,2,2,0,0,2,0,2,2,0,2,0,2,2,2,0,2,2,0,0,0,0,
- 0,0,0,3,3,3,0,3,3,3,3,3,0,3,0,2,0,2,3,2,2,0,0,2,3,3,2,0,2,0,0,0,0,0,0,0,
- 3,3,3,0,0,2,2,2,0,2,0,0,3,0,3,0,2,0,0,0,0,2,2,2,0,0,0,2,0,0,2,0,0,0,0,0,
- 3,3,3,2,2,2,0,0,0,2,2,2,2,2,3,2,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,2,0,0,
- 0,0,2,3,3,3,0,3,2,2,2,2,0,2,0,2,0,2,2,3,2,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,
- 0,0,0,2,0,2,0,2,2,0,0,2,0,2,0,0,0,2,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
- 3,2,2,0,0,0,2,0,2,0,0,0,0,2,2,0,0,0,0,0,2,0,0,2,0,0,2,0,0,2,0,0,0,0,0,0,
- 2,0,2,2,2,2,0,2,2,0,2,2,2,0,0,2,0,0,2,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
- 2,0,2,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,2,2,0,2,0,0,2,2,0,0,0,2,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
+ 3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,3,3,2,2,0,0,
+ 3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,0,3,3,3,3,0,0,0,
+ 3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,2,3,3,2,2,2,2,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,0,3,2,2,2,2,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,3,3,2,2,3,1,
+ 3,3,3,2,2,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,2,2,3,2,2,3,3,2,
+ 2,2,0,3,3,3,3,3,3,3,3,3,2,3,2,3,0,3,2,2,3,2,3,3,2,2,3,3,2,2,0,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,2,2,2,2,3,3,0,2,2,2,3,2,3,
+ 3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,0,3,3,2,2,2,2,3,3,0,2,2,2,2,2,0,
+ 3,3,3,2,3,3,3,2,2,3,3,2,3,3,3,2,3,0,2,2,2,2,2,2,0,3,0,2,2,2,2,2,0,
+ 3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,0,3,3,2,2,3,3,3,3,0,2,1,2,2,2,2,
+ 3,3,3,2,3,3,3,2,2,3,3,3,3,3,3,3,3,2,2,3,3,2,3,3,2,3,0,2,2,2,2,3,2,
+ 3,3,3,3,3,3,0,3,3,3,3,3,2,3,2,3,0,3,3,3,3,3,3,3,3,0,3,3,2,0,0,0,0,
+ 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,2,2,3,3,2,2,3,0,2,2,2,2,0,2,
+ 2,2,2,3,3,3,0,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,2,0,3,3,3,3,0,1,0,
+ 3,3,3,2,3,3,3,2,2,3,2,3,3,3,3,3,3,2,2,2,2,2,2,0,0,3,0,2,2,2,2,2,2,
+ 2,2,2,3,3,3,0,3,3,3,3,3,0,3,1,3,0,3,3,2,3,2,3,3,3,2,3,3,2,0,0,0,0,
+ 3,3,3,2,2,3,3,3,3,2,2,3,3,2,2,3,3,1,2,2,0,3,2,2,3,2,0,2,0,0,2,2,1,
+ 3,3,3,2,3,3,3,2,2,3,2,2,3,3,3,2,2,2,3,2,2,2,3,0,2,0,0,2,2,0,2,2,0,
+ 3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,2,3,2,2,2,2,0,2,2,3,2,0,2,2,2,3,2,0,
+ 3,3,3,2,2,3,3,2,2,3,2,3,3,3,3,2,3,0,2,2,2,3,3,2,1,2,0,1,0,2,2,2,1,
+ 3,3,3,2,3,3,3,2,2,2,2,2,3,2,3,2,3,0,2,3,2,2,1,2,0,3,0,0,0,2,2,0,0,
+ 3,3,3,2,3,3,3,3,2,2,2,2,3,2,3,3,3,0,0,3,2,2,2,1,0,2,0,2,0,0,2,0,0,
+ 3,3,3,2,3,3,3,2,3,2,3,3,3,2,3,2,2,1,0,3,2,2,1,2,3,0,0,2,0,0,2,0,0,
+ 3,3,3,0,2,3,3,2,3,0,2,3,3,2,3,2,3,0,0,2,0,2,2,0,2,2,0,0,0,0,0,0,0,
+ 0,0,0,3,3,3,0,3,3,3,3,3,0,3,0,2,0,2,2,2,3,2,2,2,2,0,3,2,0,2,0,0,0,
+ 3,3,3,2,3,3,3,0,1,3,2,3,3,0,2,2,3,0,2,0,2,2,2,0,0,0,0,1,0,0,0,0,0,
+ 3,3,3,2,3,3,3,3,3,0,2,2,3,3,3,2,2,1,0,2,2,3,2,0,2,2,0,2,1,0,2,2,0,
+ 3,3,3,2,2,2,2,2,0,2,2,0,3,2,3,0,2,0,0,0,0,2,2,2,0,2,0,1,0,0,0,0,0,
+ 3,3,3,2,2,2,0,2,0,2,2,2,2,2,2,0,0,2,0,2,0,0,1,2,0,0,0,2,0,2,0,0,0,
+ 2,2,2,3,2,3,0,2,2,2,2,2,0,2,0,2,0,2,2,2,2,0,0,0,0,0,2,2,0,0,0,2,0,
+ 0,0,0,2,2,2,0,2,1,2,2,2,0,2,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,
+ 2,0,0,2,2,2,0,2,2,2,2,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,
};
@@ -156,8 +168,8 @@ const SequenceModel Iso_8859_3TurkishModel =
{
Iso_8859_3_CharToOrderMap,
TurkishLangModel,
- 36,
- (float)0.991865243864388,
+ 33,
+ (float)0.9923593121944019,
PR_FALSE,
"ISO-8859-3",
"tr"
@@ -167,9 +179,19 @@ const SequenceModel Iso_8859_9TurkishModel =
{
Iso_8859_9_CharToOrderMap,
TurkishLangModel,
- 36,
- (float)0.991865243864388,
+ 33,
+ (float)0.9923593121944019,
PR_FALSE,
"ISO-8859-9",
"tr"
};
+
+const LanguageModel TurkishModel =
+{
+ "tr",
+ Unicode_CharOrder,
+ 66,
+ TurkishLangModel,
+ 33,
+ (float)0.9923593121944019,
+};
diff --git a/src/LangModels/LangVietnameseModel.cpp b/src/LangModels/LangVietnameseModel.cpp
index 0569887..ad9129a 100644
--- a/src/LangModels/LangVietnameseModel.cpp
+++ b/src/LangModels/LangVietnameseModel.cpp
@@ -36,12 +36,13 @@
* ***** END LICENSE BLOCK ***** */
#include "../nsSBCharSetProber.h"
+#include "../nsLanguageDetector.h"
/********* Language model for: Vietnamese *********/
/**
* Generated by BuildLangModel.py
- * On: 2016-02-13 03:42:06.561440
+ * On: 2021-03-16 20:57:28.726718
**/
/* Character Mapping Table:
@@ -67,162 +68,179 @@ static const unsigned char Windows_1258_CharToOrderMap[] =
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 6, 17, 3, 22, 21, 66, 5, 1, 4, 75, 24, 14, 8, 0, 9, /* 4X */
- 16, 36, 11, 19, 2, 7, 13, 69, 54, 20, 82,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 6, 17, 3, 22, 21, 66, 5, 1, 4, 75, 24, 14, 8, 0, 9, /* 6X */
- 16, 36, 11, 19, 2, 7, 13, 69, 54, 20, 82,SYM,SYM,SYM,SYM,CTR, /* 7X */
- SYM,ILL,SYM,101,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,100,ILL,ILL,ILL, /* 8X */
- ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,100,ILL,ILL,102, /* 9X */
+ SYM, 6, 18, 3, 21, 24, 71, 5, 1, 4, 78, 22, 14, 8, 0, 9, /* 4X */
+ 16, 32, 13, 19, 2, 7, 12, 74, 53, 20, 83,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 6, 18, 3, 21, 24, 71, 5, 1, 4, 78, 22, 14, 8, 0, 9, /* 6X */
+ 16, 32, 13, 19, 2, 7, 12, 74, 53, 20, 83,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ SYM,ILL,SYM,105,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 97,ILL,ILL,ILL, /* 8X */
+ ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 97,ILL,ILL,104, /* 9X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
- SYM,SYM,SYM,SYM,SYM,103,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
- 12, 15, 25, 51, 97,104, 98, 91, 90, 62, 27,105,SYM, 47,106,107, /* CX */
- 10,108,SYM, 33, 29, 46, 93,SYM, 94, 58, 67,109, 96, 18,SYM, 99, /* DX */
- 12, 15, 25, 51, 97,110, 98, 91, 90, 62, 27,111,SYM, 47,112,113, /* EX */
- 10,114,SYM, 33, 29, 46, 93,SYM, 94, 58, 67,115, 96, 18,116,117, /* FX */
+ SYM,SYM,SYM,SYM,SYM,107,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
+ 11, 15, 27, 44,101,106, 95, 92, 90, 73, 28,108,SYM, 39,103,102, /* CX */
+ 10,100,SYM, 36, 29, 47, 98,SYM, 96, 62, 61,109, 93, 17,SYM, 99, /* DX */
+ 11, 15, 27, 44,101,106, 95, 92, 90, 73, 28,110,SYM, 39,103,102, /* EX */
+ 10,100,SYM, 36, 29, 47, 98,SYM, 96, 62, 61,111, 93, 17,112,104, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
static const unsigned char Viscii_CharToOrderMap[] =
{
- CTR,CTR, 88,CTR,CTR, 95, 77,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
- CTR,CTR,CTR,CTR, 80,CTR,CTR,CTR,CTR, 79,CTR,CTR,CTR,CTR, 92,CTR, /* 1X */
+ CTR,CTR, 85,CTR,CTR, 91, 77,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
+ CTR,CTR,CTR,CTR, 82,CTR,CTR,CTR,CTR, 84,CTR,CTR,CTR,CTR, 94,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 6, 17, 3, 22, 21, 66, 5, 1, 4, 75, 24, 14, 8, 0, 9, /* 4X */
- 16, 36, 11, 19, 2, 7, 13, 69, 54, 20, 82,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 6, 17, 3, 22, 21, 66, 5, 1, 4, 75, 24, 14, 8, 0, 9, /* 6X */
- 16, 36, 11, 19, 2, 7, 13, 69, 54, 20, 82,SYM,SYM,SYM,SYM,CTR, /* 7X */
- 30, 57, 71, 65, 41, 43, 78, 49, 83, 89, 23, 45, 39, 74, 28, 32, /* 8X */
- 53, 60, 84, 31, 37, 40, 38, 59, 42, 81, 44, 73, 35, 72, 48, 76, /* 9X */
- 86, 57, 71, 65, 41, 43, 78, 49, 83, 89, 23, 45, 39, 74, 28, 32, /* AX */
- 53, 60, 84, 87, 46, 31, 38, 59, 42, 56, 52, 55, 70, 46, 40, 18, /* BX */
- 12, 15, 25, 61, 34, 51, 88, 95, 90, 62, 27, 85, 50, 47, 64, 76, /* CX */
- 10, 52, 63, 33, 29, 30, 80, 55, 70, 58, 67, 79, 92, 68, 87, 18, /* DX */
- 12, 15, 25, 61, 34, 51, 26, 77, 90, 62, 27, 85, 50, 47, 64, 73, /* EX */
- 10, 56, 63, 33, 29, 86, 81, 44, 48, 58, 67, 72, 35, 68, 37, 26, /* FX */
+ SYM, 6, 18, 3, 21, 24, 71, 5, 1, 4, 78, 22, 14, 8, 0, 9, /* 4X */
+ 16, 32, 13, 19, 2, 7, 12, 74, 53, 20, 83,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 6, 18, 3, 21, 24, 71, 5, 1, 4, 78, 22, 14, 8, 0, 9, /* 6X */
+ 16, 32, 13, 19, 2, 7, 12, 74, 53, 20, 83,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ 31, 60, 68, 64, 42, 51, 76, 46, 80, 89, 23, 38, 43, 72, 26, 30, /* 8X */
+ 49, 59, 81, 25, 41, 37, 40, 57, 45, 75, 54, 70, 35, 69, 50, 79, /* 9X */
+ 86, 60, 68, 64, 42, 51, 76, 46, 80, 89, 23, 38, 43, 72, 26, 30, /* AX */
+ 49, 59, 81, 87, 47, 25, 40, 57, 45, 48, 55, 58, 65, 47, 37, 17, /* BX */
+ 11, 15, 27, 56, 33, 44, 85, 91, 90, 73, 28, 88, 52, 39, 67, 79, /* CX */
+ 10, 55, 63, 36, 29, 31, 82, 58, 65, 62, 61, 84, 94, 66, 87, 17, /* DX */
+ 11, 15, 27, 56, 33, 44, 34, 77, 90, 73, 28, 88, 52, 39, 67, 70, /* EX */
+ 10, 48, 63, 36, 29, 86, 75, 54, 50, 62, 61, 69, 35, 66, 41, 34, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
+static const int Unicode_Char_size = 108;
+static const unsigned int Unicode_CharOrder[] =
+{
+ 65, 6, 66, 18, 67, 3, 68, 21, 69, 24, 71, 5, 72, 1, 73, 4,
+ 75, 22, 76, 14, 77, 8, 78, 0, 79, 9, 80, 16, 81, 32, 82, 13,
+ 83, 19, 84, 2, 85, 7, 86, 12, 88, 53, 89, 20, 97, 6, 98, 18,
+ 99, 3, 100, 21, 101, 24, 103, 5, 104, 1, 105, 4, 107, 22, 108, 14,
+ 109, 8, 110, 0, 111, 9, 112, 16, 113, 32, 114, 13, 115, 19, 116, 2,
+ 117, 7, 118, 12, 120, 53, 121, 20, 192, 11, 193, 15, 194, 27, 202, 28,
+ 204, 52, 205, 39, 211, 36, 212, 29, 224, 11, 225, 15, 226, 27, 234, 28,
+ 236, 52, 237, 39, 243, 36, 244, 29, 258, 44, 259, 44, 272, 10, 273, 10,
+ 416, 47, 417, 47, 431, 17, 432, 17, 7840, 31, 7841, 31, 7842, 33,7843, 33,
+ 7844, 42, 7845, 42, 7846, 51, 7847, 51, 7852, 46, 7853, 46, 7870, 23,7871, 23,
+ 7872, 38, 7873, 38, 7874, 43, 7875, 43, 7878, 26, 7879, 26, 7882, 45,7883, 45,
+ 7888, 30, 7889, 30, 7890, 49, 7891, 49, 7896, 25, 7897, 25, 7898, 37,7899, 37,
+ 7900, 40, 7901, 40, 7906, 41, 7907, 41, 7908, 50, 7909, 50, 7910, 35,7911, 35,
+ 7918, 34, 7919, 34, 7920, 48, 7921, 48,
+};
+
/* Model Table:
- * Total sequences: 1494
- * First 512 sequences: 0.9321889118082535
- * Next 512 sequences (512-1024): 0.06092051479986333
- * Rest: 0.0068905733918831966
+ * Total sequences: 1890
+ * First 512 sequences: 0.9336493792477815
+ * Next 512 sequences (512-1024): 0.05889427825209051
+ * Rest: 0.007456342500128027
* Negative sequences: TODO
*/
static const PRUint8 VietnameseLangModel[] =
{
- 3,3,3,3,3,3,3,2,2,3,0,2,3,1,1,1,1,2,3,3,2,3,3,3,2,1,2,
- 3,0,3,2,2,2,3,1,0,1,1,2,0,0,1,0,1,0,2,2,1,0,0,0,3,0,0,2,
- 2,1,2,0,3,0,3,3,2,3,0,2,3,0,2,3,0,0,3,1,3,3,1,3,1,3,3,
- 3,3,3,3,3,3,3,3,3,0,3,3,3,2,3,3,3,3,2,3,3,3,3,3,2,3,2,0,
- 2,3,2,2,3,1,3,3,1,3,1,3,3,2,2,3,2,0,3,2,2,3,1,3,0,3,0,
- 3,1,3,3,3,3,2,3,2,0,0,2,1,2,2,2,2,0,0,1,3,2,3,2,2,2,2,0,
- 2,3,2,2,3,0,3,3,2,3,0,2,2,1,2,3,1,1,2,2,2,3,1,0,2,2,0,
- 0,0,3,2,3,2,3,3,3,1,1,2,0,0,2,0,3,0,0,2,0,2,2,0,2,3,1,1,
- 3,1,3,3,3,3,3,2,3,3,1,3,2,2,3,3,2,2,0,3,1,3,3,3,2,0,3,
- 3,3,1,0,0,3,1,3,0,2,0,2,3,3,2,0,0,2,3,0,0,0,1,0,1,0,0,2,
- 2,3,2,2,3,1,3,3,1,3,0,3,3,0,2,2,0,1,3,2,2,3,1,1,1,2,3,
- 0,0,3,3,1,2,2,0,1,0,2,2,0,0,1,1,3,3,0,0,0,1,1,2,1,0,3,0,
- 3,2,3,3,3,2,2,3,3,3,0,3,0,2,3,0,2,3,0,3,3,2,3,0,2,0,0,
- 0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2,
- 3,1,3,2,3,2,3,1,3,2,0,3,1,2,3,2,2,2,0,3,3,3,2,2,2,3,0,
- 2,1,3,1,3,3,0,2,0,0,0,1,0,1,3,0,3,0,0,2,2,0,3,0,2,0,3,1,
- 2,1,0,2,3,0,3,3,2,3,0,0,3,0,2,3,2,2,3,2,2,3,2,0,0,1,0,
- 0,2,3,3,3,2,2,1,0,0,0,2,0,3,3,0,1,2,2,0,0,3,2,2,1,2,1,1,
- 3,2,3,2,3,2,3,3,3,2,0,3,3,2,3,3,2,3,0,3,2,2,3,0,2,0,0,
- 0,0,0,3,0,0,1,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,2,
- 0,0,0,0,3,0,3,2,0,3,0,1,3,0,0,3,0,1,3,0,0,1,0,3,0,3,0,
- 2,3,3,3,3,3,3,3,2,0,1,3,3,1,3,3,3,3,3,2,2,0,1,2,2,3,3,0,
- 3,2,3,2,3,2,3,3,2,3,0,3,2,2,3,2,1,2,3,3,3,3,3,0,2,1,2,
- 3,1,2,2,3,2,0,2,0,0,2,2,1,0,3,3,2,3,0,1,2,2,2,3,3,1,2,0,
+ 2,3,3,2,3,3,3,2,1,3,1,3,1,2,2,1,1,3,1,3,2,3,2,2,3,3,1,
+ 2,3,3,2,2,1,1,2,0,3,1,2,1,0,1,1,0,3,1,1,2,0,1,0,1,0,1,
+ 2,1,2,1,3,0,3,3,2,3,1,3,1,2,1,3,1,3,1,1,2,2,1,3,3,3,3,
+ 3,3,3,3,3,1,3,3,3,3,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,
+ 2,3,2,2,3,1,3,3,2,3,0,3,1,3,2,3,1,3,1,2,2,1,1,3,3,3,1,
+ 3,3,3,3,3,0,3,0,1,2,3,1,3,2,0,3,0,2,3,3,1,3,2,2,2,3,1,
+ 2,3,2,2,3,0,3,3,1,3,2,2,1,2,2,3,1,3,1,2,2,1,2,0,3,3,0,
+ 2,0,3,2,2,1,3,1,3,3,1,0,1,1,1,3,0,2,0,2,3,2,0,2,3,0,1,
+ 3,1,3,3,3,2,3,2,3,3,1,2,2,3,3,3,2,1,2,3,1,2,2,3,3,0,3,
+ 1,3,2,2,1,2,3,3,0,1,3,3,0,2,0,2,3,1,0,1,1,0,0,1,1,0,2,
+ 2,3,1,1,3,1,3,3,1,3,1,3,1,2,2,2,1,3,1,2,2,1,1,1,3,2,0,
+ 2,1,3,2,2,0,1,3,1,2,1,0,0,1,2,1,0,2,1,1,1,2,3,1,2,2,1,
+ 3,2,3,3,3,2,1,3,3,3,1,0,2,3,3,0,2,0,2,3,3,3,2,0,2,0,0,
+ 0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,
+ 3,1,2,2,2,2,3,1,2,2,0,1,1,3,2,3,2,0,2,3,3,2,2,2,3,3,1,
+ 3,2,3,3,2,1,3,0,0,0,1,1,1,1,0,3,0,1,0,3,1,0,3,0,3,0,1,
+ 1,1,1,1,3,1,3,3,2,3,0,3,1,2,2,3,2,3,2,2,1,1,1,1,3,3,2,
+ 2,1,3,2,3,0,1,0,1,2,3,2,1,2,0,2,0,1,1,2,1,1,1,3,1,3,1,
+ 3,2,3,2,3,2,3,3,3,2,0,3,2,3,3,3,2,0,2,3,1,3,2,0,2,0,0,
+ 0,0,0,0,3,1,3,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,2,
+ 0,2,1,0,3,0,3,2,0,3,0,3,0,1,0,3,0,3,1,0,0,0,1,3,2,3,2,
+ 3,2,3,3,3,0,3,0,2,3,1,3,2,3,2,3,3,2,3,2,3,1,3,1,3,2,0,
3,0,0,0,3,0,0,2,3,3,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 2,0,0,0,3,0,3,3,0,2,0,1,3,0,1,1,0,0,2,1,1,3,1,1,0,2,1,
- 2,1,2,1,0,1,0,0,0,0,2,1,0,3,2,3,3,1,3,0,3,2,3,3,3,0,0,0,
- 0,2,2,1,3,2,3,3,2,3,0,0,3,2,3,2,2,2,3,2,2,3,2,1,1,2,1,
- 3,2,2,3,3,2,1,0,0,0,3,2,0,3,2,3,2,1,0,1,2,2,3,0,2,0,0,1,
- 3,0,3,3,3,1,0,2,3,3,0,1,0,0,1,0,3,0,0,1,3,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,3,2,0,3,0,3,2,1,3,0,3,0,0,2,0,2,1,0,2,2,3,1,0,0,0,0,
- 2,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
- 2,1,0,2,3,1,3,3,0,3,0,3,3,0,3,3,0,3,1,2,2,3,1,1,1,0,0,
- 2,1,0,2,3,3,2,3,0,0,0,1,0,2,2,3,2,0,1,0,2,1,2,3,0,2,3,0,
- 3,0,1,1,2,0,3,3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,3,3,0,3,0,0,0,0,0,3,0,0,0,0,0,0,0,0,
- 1,3,3,3,3,1,3,3,2,3,0,1,2,0,2,3,2,2,2,3,2,3,2,0,2,2,0,
- 0,0,2,1,0,3,2,2,0,1,1,1,1,1,1,0,0,0,0,2,0,1,0,0,1,2,1,0,
- 2,0,1,2,1,0,2,2,1,2,0,2,0,0,1,1,2,1,0,2,0,2,1,3,1,0,0,
- 3,2,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,
- 3,2,3,2,2,2,3,2,3,3,0,3,0,2,3,1,2,2,0,3,2,3,3,0,2,0,0,
- 0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,
- 1,1,1,2,3,1,3,3,0,3,0,3,3,1,2,1,0,0,3,2,2,3,2,0,1,3,1,
- 1,0,0,3,1,1,1,0,0,0,0,1,0,0,3,3,2,1,0,1,0,3,2,1,1,2,1,0,
+ 0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 2,1,1,0,3,1,3,2,0,2,0,3,0,1,1,1,1,3,0,1,1,1,1,1,3,1,2,
+ 2,1,2,2,2,0,1,1,0,1,3,3,3,1,2,3,0,3,3,3,1,2,0,3,2,3,0,
+ 3,2,3,2,3,2,3,3,2,3,0,2,2,2,2,3,1,3,2,2,2,2,2,1,3,2,1,
+ 1,3,2,2,2,1,2,2,1,1,0,0,3,2,2,3,1,2,3,2,1,2,2,2,2,3,1,
+ 1,2,2,1,3,2,3,3,2,3,0,3,2,1,3,2,2,3,2,2,2,2,1,0,3,2,3,
+ 2,3,2,2,3,1,0,1,0,1,3,1,2,2,2,2,0,2,3,3,1,3,1,2,3,1,1,
+ 3,0,3,3,3,1,0,2,3,3,0,0,1,1,1,0,3,0,1,1,2,1,1,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 1,3,2,1,2,1,3,2,1,3,0,0,0,2,2,1,2,1,1,2,1,1,0,0,3,0,0,
+ 1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,
+ 3,1,1,1,1,0,3,3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,3,0,0,3,3,0,0,0,0,0,3,0,0,0,0,0,0,
+ 2,1,1,3,3,0,3,3,1,3,0,3,1,2,2,3,0,2,3,2,2,1,1,2,3,3,2,
+ 1,2,1,3,2,0,3,1,0,2,1,2,2,2,0,2,1,1,3,2,1,1,3,1,2,3,1,
+ 1,2,3,3,3,1,3,3,1,3,0,2,1,1,2,3,2,3,2,2,2,1,2,0,3,1,0,
+ 2,1,2,3,1,1,3,1,1,2,2,0,1,1,2,1,0,1,0,1,3,3,1,1,1,1,0,
+ 2,0,1,1,1,1,2,1,2,2,0,0,0,2,2,0,2,0,2,2,1,1,1,3,2,0,3,
+ 0,3,0,0,0,0,0,0,0,1,0,3,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,
+ 2,1,1,2,3,2,3,3,1,3,0,3,1,2,1,1,1,3,1,2,1,1,1,0,3,1,1,
+ 3,1,0,1,3,0,1,1,0,1,0,1,1,1,0,3,1,0,3,2,1,3,1,3,2,1,0,
+ 1,3,1,0,3,0,2,2,2,2,0,0,2,2,1,1,0,0,1,2,1,1,1,3,2,0,1,
+ 0,2,2,0,1,0,0,0,1,1,0,0,2,0,0,0,2,1,2,0,1,0,0,1,1,1,0,
3,0,3,2,0,0,0,3,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 1,3,1,0,3,1,3,2,0,2,0,2,0,1,2,0,0,1,0,2,2,2,0,3,1,0,0,
- 2,0,1,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,3,0,0,2,0,0,0,1,
+ 0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,1,3,2,3,2,3,2,3,3,0,0,2,3,3,0,2,0,2,3,2,2,2,0,2,0,0,
+ 0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,
+ 3,0,3,3,3,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,0,3,3,0,0,0,3,3,0,0,0,0,0,0,0,3,0,0,0,0,0,1,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,0,1,1,0,0,0,3,3,0,0,0,0,0,1,0,1,0,0,0,3,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,0,0,0,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,0,2,1,0,0,0,3,3,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,
- 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,0,3,3,0,0,0,3,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,0,2,2,3,0,0,0,3,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,
- 0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,0,3,3,3,0,0,0,2,3,0,0,0,0,0,0,3,0,0,0,2,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,0,3,3,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,0,3,3,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,0,1,2,3,0,3,0,2,0,0,1,0,1,0,0,2,0,0,0,0,0,0,0,1,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,0,1,1,1,1,0,3,3,0,1,0,0,1,0,0,1,0,0,2,0,0,0,0,0,0,0,
+ 0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
+ 3,0,1,1,3,1,0,0,3,0,0,0,0,1,0,0,1,0,1,0,0,1,1,0,0,0,0,
+ 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,0,3,3,3,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,0,3,3,3,0,0,0,3,3,0,0,0,0,0,0,3,0,0,0,2,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,1,0,0,1,0,1,3,0,1,1,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,
+ 0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,0,0,0,3,0,0,0,2,3,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 2,0,0,0,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,3,0,0,0,2,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,0,2,3,3,0,0,2,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,0,0,0,0,0,0,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,0,2,3,3,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,0,3,0,0,0,0,3,3,0,0,0,0,0,0,0,3,0,0,0,3,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,0,1,3,0,0,3,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,0,0,0,1,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,0,1,3,3,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,0,0,0,0,0,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 2,0,1,0,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,1,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,0,1,2,3,0,3,1,2,0,0,0,0,1,0,0,2,0,1,1,0,0,0,0,1,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,0,1,3,3,0,0,1,2,0,0,0,0,0,0,0,2,0,0,0,0,0,1,0,0,0,0,
+ 0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,0,0,0,0,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,0,1,1,3,0,0,2,2,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,0,2,3,0,0,2,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,0,1,3,1,0,0,0,1,0,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,0,3,1,0,0,0,2,2,0,0,0,0,0,0,0,3,0,0,0,2,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,0,2,3,0,1,3,1,2,1,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,0,0,0,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,0,2,3,3,0,0,1,0,0,0,0,0,0,0,0,3,0,0,0,0,0,1,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,0,3,1,1,0,0,3,3,0,0,0,0,0,0,0,3,0,0,0,3,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,0,0,0,0,0,0,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,0,1,1,0,0,0,0,3,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,0,1,3,0,0,3,2,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,0,3,2,0,0,0,2,2,0,0,0,0,0,0,0,3,0,0,0,2,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,0,1,1,3,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 2,0,0,3,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,0,0,0,3,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,0,1,3,1,0,1,0,2,0,0,0,0,0,0,0,1,0,0,0,2,0,1,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,0,1,0,0,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,0,0,0,0,0,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,0,1,1,1,0,0,0,3,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,0,2,3,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,0,0,0,2,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 1,1,2,1,2,0,3,3,0,1,0,0,0,2,0,3,1,2,2,0,1,3,0,2,0,2,0,
- 2,0,2,1,1,0,1,2,0,0,0,1,0,0,1,0,0,0,0,1,2,0,0,1,1,2,0,2,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 1,1,2,1,3,0,2,3,1,2,0,1,2,0,1,2,1,2,0,0,1,1,0,2,3,1,0,
+ 3,1,3,0,1,0,2,0,0,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,1,2,
};
@@ -230,8 +248,8 @@ const SequenceModel Windows_1258VietnameseModel =
{
Windows_1258_CharToOrderMap,
VietnameseLangModel,
- 55,
- (float)0.9321889118082535,
+ 54,
+ (float)0.9336493792477815,
PR_FALSE,
"WINDOWS-1258",
"vi"
@@ -241,9 +259,19 @@ const SequenceModel VisciiVietnameseModel =
{
Viscii_CharToOrderMap,
VietnameseLangModel,
- 55,
- (float)0.9321889118082535,
+ 54,
+ (float)0.9336493792477815,
PR_FALSE,
"VISCII",
"vi"
};
+
+const LanguageModel VietnameseModel =
+{
+ "vi",
+ Unicode_CharOrder,
+ 108,
+ VietnameseLangModel,
+ 54,
+ (float)0.9336493792477815,
+};
diff --git a/src/nsLanguageDetector.h b/src/nsLanguageDetector.h
index 6c22d17..5300a4d 100644
--- a/src/nsLanguageDetector.h
+++ b/src/nsLanguageDetector.h
@@ -110,10 +110,30 @@ private:
};
extern const LanguageModel ArabicModel;
+extern const LanguageModel CroatianModel;
+extern const LanguageModel CzechModel;
extern const LanguageModel DanishModel;
+extern const LanguageModel EsperantoModel;
+extern const LanguageModel EstonianModel;
+extern const LanguageModel FinnishModel;
extern const LanguageModel FrenchModel;
extern const LanguageModel GermanModel;
+extern const LanguageModel GreekModel;
+extern const LanguageModel HungarianModel;
+extern const LanguageModel IrishModel;
extern const LanguageModel ItalianModel;
+extern const LanguageModel LatvianModel;
+extern const LanguageModel LithuanianModel;
+extern const LanguageModel MalteseModel;
+extern const LanguageModel PolishModel;
+extern const LanguageModel PortugueseModel;
+extern const LanguageModel RomanianModel;
+extern const LanguageModel SlovakModel;
+extern const LanguageModel SloveneModel;
extern const LanguageModel SpanishModel;
+extern const LanguageModel SwedishModel;
+extern const LanguageModel ThaiModel;
+extern const LanguageModel TurkishModel;
+extern const LanguageModel VietnameseModel;
#endif /* nsLanguageDetector_h__ */
diff --git a/src/nsMBCSGroupProber.cpp b/src/nsMBCSGroupProber.cpp
index 544a8dd..6144d2d 100644
--- a/src/nsMBCSGroupProber.cpp
+++ b/src/nsMBCSGroupProber.cpp
@@ -85,12 +85,34 @@ nsMBCSGroupProber::nsMBCSGroupProber(PRUint32 aLanguageFilter)
{
if (mProbers[i]->DecodeToUnicode())
{
- langDetectors[i][0] = new nsLanguageDetector(&FrenchModel);
- langDetectors[i][1] = new nsLanguageDetector(&ItalianModel);
- langDetectors[i][2] = new nsLanguageDetector(&DanishModel);
- langDetectors[i][3] = new nsLanguageDetector(&GermanModel);
- langDetectors[i][4] = new nsLanguageDetector(&ArabicModel);
- langDetectors[i][5] = new nsLanguageDetector(&SpanishModel);
+ int j = 0;
+
+ langDetectors[i][j++] = new nsLanguageDetector(&ArabicModel);
+ langDetectors[i][j++] = new nsLanguageDetector(&CroatianModel);
+ langDetectors[i][j++] = new nsLanguageDetector(&CzechModel);
+ langDetectors[i][j++] = new nsLanguageDetector(&DanishModel);
+ langDetectors[i][j++] = new nsLanguageDetector(&EsperantoModel);
+ langDetectors[i][j++] = new nsLanguageDetector(&EstonianModel);
+ langDetectors[i][j++] = new nsLanguageDetector(&FinnishModel);
+ langDetectors[i][j++] = new nsLanguageDetector(&FrenchModel);
+ langDetectors[i][j++] = new nsLanguageDetector(&GermanModel);
+ langDetectors[i][j++] = new nsLanguageDetector(&GreekModel);
+ langDetectors[i][j++] = new nsLanguageDetector(&HungarianModel);
+ langDetectors[i][j++] = new nsLanguageDetector(&IrishModel);
+ langDetectors[i][j++] = new nsLanguageDetector(&ItalianModel);
+ langDetectors[i][j++] = new nsLanguageDetector(&LatvianModel);
+ langDetectors[i][j++] = new nsLanguageDetector(&LithuanianModel);
+ langDetectors[i][j++] = new nsLanguageDetector(&MalteseModel);
+ langDetectors[i][j++] = new nsLanguageDetector(&PolishModel);
+ langDetectors[i][j++] = new nsLanguageDetector(&PortugueseModel);
+ langDetectors[i][j++] = new nsLanguageDetector(&RomanianModel);
+ langDetectors[i][j++] = new nsLanguageDetector(&SlovakModel);
+ langDetectors[i][j++] = new nsLanguageDetector(&SloveneModel);
+ langDetectors[i][j++] = new nsLanguageDetector(&SpanishModel);
+ langDetectors[i][j++] = new nsLanguageDetector(&SwedishModel);
+ langDetectors[i][j++] = new nsLanguageDetector(&ThaiModel);
+ langDetectors[i][j++] = new nsLanguageDetector(&TurkishModel);
+ langDetectors[i][j++] = new nsLanguageDetector(&VietnameseModel);
}
else
{
diff --git a/src/nsMBCSGroupProber.h b/src/nsMBCSGroupProber.h
index ee6669e..2ed028e 100644
--- a/src/nsMBCSGroupProber.h
+++ b/src/nsMBCSGroupProber.h
@@ -48,7 +48,7 @@
#include "nsEUCTWProber.h"
#define NUM_OF_PROBERS 7
-#define NUM_OF_LANGUAGES 6
+#define NUM_OF_LANGUAGES 26
class nsMBCSGroupProber: public nsCharSetProber {
public: