diff options
author | Jehan <jehan@girinstud.io> | 2021-03-17 02:07:17 +0100 |
---|---|---|
committer | Jehan <jehan@girinstud.io> | 2022-12-14 00:23:13 +0100 |
commit | eb8308d50a09821cb0b90aa0bca5a068d231d873 (patch) | |
tree | 8b77e5afbadcba9bf62d5ea637f1566f644430bb | |
parent | 5257fc1abf3171dad97a429830f56ff9971e91ff (diff) |
src, script: regenerate all existing language models.
Now making sure that we have a generic language model working with UTF-8
for all 26 supported models which had single-byte encoding support until
now.
43 files changed, 5426 insertions, 4708 deletions
diff --git a/script/BuildLangModelLogs/LangCroatianModel.log b/script/BuildLangModelLogs/LangCroatianModel.log index a79f123..542a251 100644 --- a/script/BuildLangModelLogs/LangCroatianModel.log +++ b/script/BuildLangModelLogs/LangCroatianModel.log @@ -1,157 +1,157 @@ = Logs of language model for Croatian (hr) = - Generated by BuildLangModel.py -- Started: 2016-09-25 23:41:35.999066 -- Maximum depth: 5 +- Started: 2021-03-16 19:09:36.740256 +- Maximum depth: 4 - Max number of pages: 100 == Parsed pages == -Fizika čvrstog stanja (revision 4776646) -Agregatno stanje (revision 4663090) +Fizika čvrstog stanja (revision 5777686) +Agregatno stanje (revision 5764830) Alnico (revision 3915185) -Aluminij (revision 4772363) -Amorfna tvar (revision 4659679) -Antimon (revision 4420072) -Antoine Henri Becquerel (revision 4634966) -Apsolutna nula (revision 4706683) -Arsen (revision 4540773) -Arthur Holly Compton (revision 4736068) -Atom (revision 4778162) -Atomska jezgra (revision 4540956) +Aluminij (revision 5755266) +Amorfna tvar (revision 5392804) +Antimon (revision 5435171) +Antoine Henri Becquerel (revision 5556977) +Apsolutna nula (revision 5482633) +Arsen (revision 5752189) +Arthur Holly Compton (revision 5313150) +Atom (revision 5730600) +Atomska jezgra (revision 5731544) Bell Labs (revision 4769518) -Bor (element) (revision 4602837) -Brian Josephson (revision 4403761) -Cink (revision 4537854) -Coulombov zakon (revision 4710338) -Dijamant (revision 4625335) -Dimenzija (revision 4669110) -Dinastija Han (revision 4541686) -Dislokacija (revision 4668021) -EV (revision 4538157) -Eksponencijalna funkcija (revision 4160157) -Električna struja (revision 4280621) -Električna vodljivost (revision 4460160) -Električni izolator (revision 4649046) -Električni luk (revision 4646980) -Električni naboj (revision 4727496) -Električni otpor (revision 4593314) -Električni vodič (revision 4333008) -Električno polje (revision 4705679) -Elektrolit (revision 4486319) -Elektromagnetsko zračenje (revision 4537368) -Elektron (revision 4630705) -Elektronika (revision 4090016) -Elektronska konfiguracija (revision 4420620) -Elektronski mikroskop (revision 4413214) -Elektrotehnika (revision 4596912) -Energetika (revision 4586277) -Energija (revision 4719089) +Bor (element) (revision 5549612) +Brian Josephson (revision 5446101) +Cink (revision 5556719) +Comptonov učinak (revision 5313303) +Coulombov zakon (revision 5436283) +Dijamant (revision 5775412) +Dimenzija (revision 5379791) +Dinastija Han (revision 5772176) +Dislokacija (revision 5431109) +EV (revision 5430610) +Eksponencijalna funkcija (revision 5523460) +Električna struja (revision 5653050) +Električna vodljivost (revision 5376333) +Električni izolator (revision 5258197) +Električni luk (revision 5437134) +Električni naboj (revision 5774260) +Električni otpor (revision 4904596) +Električni vodič (revision 5334900) +Električno polje (revision 5247154) +Elektrolit (revision 4858367) +Elektromagnetsko zračenje (revision 5760956) +Elektron (revision 5774256) +Elektronika (revision 5556766) +Elektronska konfiguracija (revision 4949752) +Elektronski mikroskop (revision 5439229) +Elektrotehnika (revision 5254565) +Energetika (revision 4908587) +Energija (revision 5767106) Fermi-Diracova statistika (revision 3934172) -Feromagnetizam (revision 4760511) -Fizika (revision 4769955) -Fizika kondenzirane tvari (revision 4769955) -Fizikalna veličina (revision 4621676) -Fosfor (revision 4602427) -Fotodioda (revision 3939069) -Fotoelektrični učinak (revision 4704417) -Foton (revision 4537522) -Fotonaponski sustavi (revision 4418887) -Francuski jezik (revision 4771366) -Galij (revision 4537855) -Genitiv (revision 4625199) -Germanij (revision 4537856) -Helij (revision 4747001) +Feromagnetizam (revision 5392729) +Fizika (revision 5777684) +Fizika kondenzirane tvari (revision 5455580) +Fizikalna veličina (revision 5497656) +Fosfor (revision 5556869) +Fotodioda (revision 5235215) +Fotoelektrični učinak (revision 5632628) +Foton (revision 5635311) +Fotonaponski sustavi (revision 5430012) +Francuski jezik (revision 5771033) +Galij (revision 5437600) +Genitiv (revision 5767472) +Germanij (revision 5437677) +Helij (revision 5556716) Henri (revision 3922500) -Indij (revision 4537867) -Integrirani krug (revision 4447159) -Ion (revision 4549144) -Ioniziranje (revision 4566703) -Izolator (revision 4649046) -John Bardeen (revision 4403736) -Kadmij (revision 3921860) -Kelvin (revision 4624351) -Keramika (revision 4599177) -Kinetička energija (revision 4719090) -Klasična mehanika (revision 4637127) -Kompas (revision 4702880) -Kondenzacija (revision 4477825) -Kondenzirana tvar (revision 4776646) +Indij (revision 5439698) +Integrirani krug (revision 5500904) +Ion (revision 5750157) +Ioniziranje (revision 5318213) +John Bardeen (revision 5182165) +Kadmij (revision 5440736) +Kelvin (revision 5240179) +Keramika (revision 5655772) +Kinetička energija (revision 5753997) +Klasična mehanika (revision 5656259) +Kompas (revision 5750313) +Kondenzacija (revision 5492249) +Kondenzirana tvar (revision 5455580) Konstrukcija (revision 4680450) -Kovalentna veza (revision 4641419) -Kristal (revision 4720329) -Kristalna rešetka (revision 4479184) +Kovalentna veza (revision 5751506) +Kristal (revision 5455704) +Kristalna rešetka (revision 5562348) Kristalografija (revision 4105956) -Krutine (revision 4625162) -Kubični kristalni sustav (revision 4344344) -Kubični metar (revision 4616551) -Kvantna mehanika (revision 4541215) -Latinski jezik (revision 4760544) -Luminiscencija (revision 4708222) -Magnet (revision 4603344) -Magnetizam (revision 4760040) +Krutine (revision 5196995) +Kubični kristalni sustav (revision 5610803) +Kubični metar (revision 5082862) +Kvantna mehanika (revision 5777687) +Latinski jezik (revision 5663325) +Luminiscencija (revision 5052601) +Magnet (revision 5743549) +Magnetizam (revision 5728489) Magnetska permeabilnost (revision 4675996) -Magnetska vodljivost (revision 4736934) -Magnetski moment (revision 4410235) -Magnetsko polje (revision 4678057) -Materijal (revision 4669230) -Mehanika (revision 4698699) -Metal (revision 4671710) -Metan (revision 4422418) -Metar (revision 4655527) -Mjerna veličina (revision 4621676) -Molekula (revision 4539232) -Molekule (revision 4539232) -Napon (revision 4585417) +Magnetska vodljivost (revision 4899860) +Magnetski moment (revision 5489691) +Magnetsko polje (revision 5671905) +Materijal (revision 5748275) +Mehanika (revision 5777691) +Metal (revision 5505185) +Metan (revision 5611051) +Metar (revision 5325605) +Mjerna veličina (revision 5497656) +Molekula (revision 5773190) +Molekule (revision 5773190) +Napon (revision 5556720) Niskotemperaturna fizika (revision 4657522) -Njemački jezik (revision 4731246) -Optika (revision 4768098) +Njemački jezik (revision 5710175) +Optika (revision 5316843) == End of Parsed pages == -- Wikipedia parsing ended at: 2016-09-25 23:50:27.589690 +- Wikipedia parsing ended at: 2021-03-16 19:18:55.485669 -49 characters appeared 500582 times. +49 characters appeared 643453 times. First 31 characters: -[ 0] Char a: 10.808019465342342 % -[ 1] Char i: 10.18554402675286 % -[ 2] Char e: 9.571259054460608 % -[ 3] Char o: 8.468143081453189 % -[ 4] Char n: 6.952906816465634 % -[ 5] Char t: 5.369549843981606 % -[ 6] Char r: 5.331993559496746 % -[ 7] Char j: 5.102860270644969 % -[ 8] Char s: 4.717109284792501 % -[ 9] Char k: 4.013927788054705 % -[10] Char l: 3.854713113935379 % -[11] Char u: 3.786792173909569 % -[12] Char m: 3.730058212240951 % -[13] Char v: 3.0989927724129114 % -[14] Char p: 2.67308852495695 % -[15] Char d: 2.6135578186990345 % -[16] Char z: 1.8931963194841206 % -[17] Char g: 1.5665765049482403 % -[18] Char č: 1.161048539500022 % -[19] Char b: 1.1440683044935693 % -[20] Char c: 1.007627122029957 % -[21] Char h: 0.8006680224219008 % -[22] Char f: 0.5159993767254915 % -[23] Char š: 0.422907735395999 % -[24] Char ž: 0.3611795869607777 % -[25] Char ć: 0.34959307366225717 % -[26] Char đ: 0.2195444502598975 % -[27] Char y: 0.11306838839590717 % -[28] Char w: 0.07291512679241363 % -[29] Char x: 0.04534721584076135 % -[30] Char q: 0.02477116636235422 % +[ 0] Char a: 10.677081309746011 % +[ 1] Char i: 9.900023777960474 % +[ 2] Char e: 9.741037806957152 % +[ 3] Char o: 8.583843730622128 % +[ 4] Char n: 6.852404138297591 % +[ 5] Char t: 5.517885533209108 % +[ 6] Char r: 5.292383437484944 % +[ 7] Char j: 5.03952891664193 % +[ 8] Char s: 4.730104607484929 % +[ 9] Char k: 4.032773178460587 % +[10] Char l: 3.9395262746463224 % +[11] Char m: 3.8557594727198414 % +[12] Char u: 3.7656207990327184 % +[13] Char v: 3.0636270248176634 % +[14] Char p: 2.654583940085756 % +[15] Char d: 2.6340696212466175 % +[16] Char z: 1.8657151338170777 % +[17] Char g: 1.5614194043698606 % +[18] Char č: 1.1537750231951673 % +[19] Char b: 1.1304632972416013 % +[20] Char c: 1.081042438220041 % +[21] Char h: 0.7697531909867543 % +[22] Char f: 0.4845730768214617 % +[23] Char š: 0.4174353060751912 % +[24] Char ž: 0.365217039939203 % +[25] Char ć: 0.35123000436706336 % +[26] Char đ: 0.22596833024323454 % +[27] Char y: 0.14857340007739495 % +[28] Char w: 0.06558365568269944 % +[29] Char x: 0.04988709354063157 % +[30] Char q: 0.030149832233278887 % -The first 31 characters have an accumulated ratio of 0.9997702674087363. +The first 31 characters have an accumulated ratio of 0.9998103979622444. -712 sequences found. +725 sequences found. -First 512 (typical positive ratio): 0.9989731099787131 -Next 512 (512-1024): 1.9976747066414694e-06 -Rest: 3.7513395167998453e-17 +First 512 (typical positive ratio): 0.9990568119867879 +Next 512 (512-1024): 0.00365217039939203 +Rest: -4.0440741033709315e-17 -- Processing end: 2016-09-25 23:50:27.987029 +- Processing end: 2021-03-16 19:18:56.030353 diff --git a/script/BuildLangModelLogs/LangCzechModel.log b/script/BuildLangModelLogs/LangCzechModel.log index 7d9c950..7d7cbd3 100644 --- a/script/BuildLangModelLogs/LangCzechModel.log +++ b/script/BuildLangModelLogs/LangCzechModel.log @@ -1,161 +1,158 @@ = Logs of language model for Czech (cs) = - Generated by BuildLangModel.py -- Started: 2016-09-21 03:20:56.824516 -- Maximum depth: 5 +- Started: 2021-03-16 18:42:56.950279 +- Maximum depth: 4 - Max number of pages: 100 == Parsed pages == -Sociální fobie (revision 13567590) -Adaptace (revision 13991192) -Agorafobie (revision 13013445) -Alkoholismus (revision 13822064) -Alprazolam (revision 14082425) -Antidepresivum (revision 14113423) -Asertivita (revision 14111958) +Sociální fobie (revision 19562865) +Adaptace (revision 18611473) +Agorafobie (revision 19426793) +Alkoholismus (revision 19586776) +Alprazolam (revision 19373957) +Americká psychiatrická společnost (revision 18200634) +Antidepresivum (revision 19057482) +Asertivita (revision 19469246) Atenolol (revision 12051880) -Automatické negativní myšlenky (revision 13567590) -Benzodiazepin (revision 13947546) -Beta-blokátory (revision 13428762) -Blud (revision 13888988) -Bohatství (revision 13556478) -Bupropion (revision 13686045) -Citaloparam (revision 13567590) -Clonazepan (revision 13567590) -Crohnova nemoc (revision 13745254) -Deprese (psychologie) (revision 13695735) -Diagnostický a statický manuál mentálních poruch (revision 13567590) -Diagnostický a statistický manuál mentálních poruch (revision 13714660) -Diagnóza (medicína) (revision 13052239) -Dichotomické myšlení (revision 13567590) -Digital object identifier (revision 14138049) -Dopamin (revision 13714274) -Dystymie (revision 13567267) -Důkaz kruhem (revision 13190761) -Elektivní mutismus (revision 9940891) -Emoce (revision 14110033) -Escitalopram (revision 12954987) -Evoluce (revision 13951488) -Expozice (psychologie) (revision 14119474) -Extraverze a introverze (revision 13872996) -Fluoxetin (revision 12955006) -Fluvoxamin (revision 12955006) -Gen (revision 13907182) -Generalizovaná úzkostná porucha (revision 14006709) -Halucinaci (revision 12188143) -Hněv (revision 14057864) -Inteligence (revision 14009781) -International Standard Serial Number (revision 12869806) -Interpersonální psychoterapie (revision 13567590) -Iracionalita (revision 4765977) -Ján Praško Pavlov (revision 14086840) -Klinické testování (revision 13530979) -Kognitivní omyl (revision 13107294) -Kognitivní psychologie (revision 11629465) -Kognitivní restrukturalizace (revision 13567360) -Kognitivně behaviorální terapie (revision 13980494) -Komorbidita (revision 11351714) -Lymská borelióza (revision 14068446) -Malé sebevědomí (revision 13567590) -Medical Subject Headings (revision 12239331) -Meditace (revision 13180783) -Mentální černý filtr (revision 13567590) -Mezinárodní klasifikace nemocí (revision 12531067) -Michael Liebowitz (revision 13567590) -Moclobemid (revision 13567590) -Moritova terapie (revision 11960292) -Musturbace (revision 13567590) -Nervozita (revision 13847097) -Noradrenalin (revision 14054165) -Obsedantně kompulzivní porucha (revision 13950365) -Panická ataka (revision 13253537) -Panická porucha (revision 13253537) -Paranoia (revision 14027052) -Paroxetin (revision 12955006) -Pohlavnost (revision 13564689) -Porucha (revision 11039108) -Pravděpodobnost (revision 13596041) -Predestinace (revision 12467403) -Profese (revision 13975485) -Propanolol (revision 12972658) -Psychiatr (revision 12767960) -Psychické trauma (revision 11227535) -Psychoaktivní droga (revision 13939232) -Psychodynamická léčba (revision 13567590) -Psychofarmaka (revision 9928215) -Psycholog (revision 12358728) -Psychoterapie (revision 13874178) -Puberta (revision 12540014) -RIMA (revision 10234728) -Remise (revision 9896748) -Richard Heimberg (revision 13567590) -Rámování myšlenek (revision 13567590) -Schizofrenie (revision 13977456) -Sebevražda (revision 14053884) -Selektivní abstrakce (revision 13567590) -Selektivní inhibitor zpětného vychytávání serotoninu (revision 12955027) -Serotonin (revision 13975104) -Sertralin (revision 12955006) -Skupinová terapie (revision 11964235) -Sociální chování (revision 13507313) -Sociální dovednost (revision 12226347) +Benzodiazepiny (revision 19464603) +Beta-blokátor (revision 19342461) +Blud (revision 18085659) +Bohatství (revision 16529725) +Bupropion (revision 12028550) +Citalopram (revision 17641873) +Clonazepam (revision 19414205) +Crohnova nemoc (revision 19441068) +DSM-IV (revision 18200634) +Deprese (psychologie) (revision 19554049) +Diagnostický a statistický manuál mentálních poruch (revision 18200634) +Diagnóza (medicína) (revision 18672900) +Dichotomické myšlení (revision 19472610) +Digital object identifier (revision 19452419) +Dopamin (revision 19339677) +Dystymie (revision 17683683) +Důkaz kruhem (revision 16799597) +Elektivní mutismus (revision 19334050) +Emoce (revision 19268819) +Escitalopram (revision 19342010) +Fluoxetin (revision 19342014) +Fluvoxamin (revision 19342014) +Gen (revision 18766924) +Generalizovaná úzkostná porucha (revision 19465410) +Halucinace (revision 19181320) +Hněv (revision 19602111) +Inteligence (revision 19472417) +International Standard Book Number (revision 19411852) +International Standard Serial Number (revision 17477154) +Interpersonální psychoterapie (revision 17446502) +Introverze (revision 19273893) +Iracionalita (revision 16731536) +Jana Vyskočilová (revision 19609212) +Ján Praško (revision 18740907) +Ján Praško Pavlov (revision 18740907) +Kognitivní omyl (revision 19618239) +Kognitivní psychologie (revision 16289048) +Kognitivní restrukturalizace (revision 19284546) +Kognitivně behaviorální terapie (revision 19475205) +Komorbidita (revision 17525950) +Lymská borelióza (revision 19051205) +Medical Subject Headings (revision 18009832) +Meditace (revision 18651670) +Mezinárodní klasifikace nemocí (revision 19575331) +Michael Liebowitz (revision 17336961) +Moclobemid (revision 19562865) +Moritova terapie (revision 16391634) +Musturbace (revision 19562865) +NDRI (revision 19412768) +Nervozita (revision 18799061) +Noradrenalin (revision 19376674) +Obsedantně kompulzivní porucha (revision 19461977) +Panická ataka (revision 18158083) +Panická porucha (revision 18158083) +Paranoia (revision 19271797) +Paroxetin (revision 19342014) +Pohlavnost (revision 19553039) +Pravděpodobnost (revision 19370061) +Predestinace (revision 15390515) +Profese (revision 19148432) +Propanolol (revision 19342521) +Psychiatr (revision 18661359) +Psychické trauma (revision 17566056) +Psychoaktivní droga (revision 19150920) +Psychodynamická léčba (revision 19562865) +Psychofarmaka (revision 19341820) +Psycholog (revision 18812730) +Psychoterapie (revision 18403501) +PubMed (revision 17045891) +RIMA (revision 13950874) +Remise (revision 19427721) +Richard Heimberg (revision 19562865) +Schizofrenie (revision 19507435) +Sebevražda (revision 19464374) +Selektivní abstrakce (revision 17523049) +Selektivní inhibitor zpětného vychytávání serotoninu (revision 19342041) +Serotonin (revision 19186450) +Sertralin (revision 19342014) +Skupinová psychoterapie (revision 15430379) +Skupinová terapie (revision 15430379) +Sociální chování (revision 18867179) == End of Parsed pages == -- Wikipedia parsing ended at: 2016-09-21 03:28:11.731386 +- Wikipedia parsing ended at: 2021-03-16 18:50:25.563305 -47 characters appeared 594800 times. +48 characters appeared 495093 times. First 41 characters: -[ 0] Char o: 8.323806321452588 % -[ 1] Char e: 8.040013449899126 % -[ 2] Char n: 6.895595158036315 % -[ 3] Char a: 6.263113651647613 % -[ 4] Char i: 5.650470746469401 % -[ 5] Char t: 5.40383322125084 % -[ 6] Char s: 4.588937457969065 % -[ 7] Char v: 3.8685272360457295 % -[ 8] Char p: 3.6914929388029587 % -[ 9] Char r: 3.6302958977807664 % -[10] Char l: 3.6017148621385338 % -[11] Char í: 3.5733019502353733 % -[12] Char k: 3.301950235373235 % -[13] Char u: 3.1782111634162744 % -[14] Char c: 3.1383658372562206 % -[15] Char d: 3.120208473436449 % -[16] Char m: 2.758406186953598 % -[17] Char h: 2.2747141896435776 % -[18] Char á: 2.156186953597848 % -[19] Char z: 2.0260591795561536 % -[20] Char y: 1.9894082044384667 % -[21] Char j: 1.8979488903833224 % -[22] Char b: 1.8189307330195021 % -[23] Char ě: 1.277236045729657 % -[24] Char é: 1.2291526563550772 % -[25] Char č: 0.9502353732347008 % -[26] Char ž: 0.9214862138533961 % -[27] Char ř: 0.8955951580363146 % -[28] Char ý: 0.7646267652992602 % -[29] Char š: 0.6605581708137189 % -[30] Char f: 0.6260928043039677 % -[31] Char ů: 0.5016812373907196 % -[32] Char g: 0.47041022192333554 % -[33] Char ú: 0.19502353732347008 % -[34] Char x: 0.13685272360457296 % -[35] Char ň: 0.05447209145931405 % -[36] Char w: 0.04488903833221251 % -[37] Char ó: 0.03429724277067922 % -[38] Char ť: 0.02269670477471419 % -[39] Char ď: 0.012104909213180902 % -[40] Char q: 0.007229320780094149 % +[ 0] Char o: 8.197651754316865 % +[ 1] Char e: 8.02455296277669 % +[ 2] Char n: 6.99807914876599 % +[ 3] Char a: 6.436164518585397 % +[ 4] Char i: 5.469477451711093 % +[ 5] Char t: 5.3903004082061345 % +[ 6] Char s: 4.630443169263149 % +[ 7] Char v: 3.9471372045252107 % +[ 8] Char r: 3.7742403952388743 % +[ 9] Char p: 3.6326508352976106 % +[10] Char l: 3.626389385428596 % +[11] Char k: 3.4845978432334936 % +[12] Char í: 3.306247513093499 % +[13] Char d: 3.2319180436806825 % +[14] Char c: 3.084269016124243 % +[15] Char u: 3.0539716780483666 % +[16] Char m: 2.917835638960761 % +[17] Char h: 2.260989349475755 % +[18] Char z: 2.074559729182194 % +[19] Char á: 2.05597736182899 % +[20] Char y: 2.00184611780009 % +[21] Char j: 1.8560149305282037 % +[22] Char b: 1.743510815139782 % +[23] Char ě: 1.2797595603250298 % +[24] Char é: 1.2238104760115776 % +[25] Char č: 0.9543661493901145 % +[26] Char ž: 0.9283104386448606 % +[27] Char ř: 0.8905397571769345 % +[28] Char ý: 0.7972239559032344 % +[29] Char š: 0.6172577677325269 % +[30] Char g: 0.5201043036358826 % +[31] Char f: 0.5120250134823154 % +[32] Char ů: 0.5093992441824061 % +[33] Char ú: 0.18077411718606404 % +[34] Char x: 0.1575461579945586 % +[35] Char w: 0.07291559363594315 % +[36] Char ň: 0.052313403744347016 % +[37] Char ó: 0.050495563459794425 % +[38] Char ť: 0.027469586522128164 % +[39] Char q: 0.010301094945798063 % +[40] Char ď: 0.010099112691958885 % -The first 41 characters have an accumulated ratio of 0.9999613315400132. +The first 41 characters have an accumulated ratio of 0.9999353656787715. -1025 sequences found. +1037 sequences found. -First 512 (typical positive ratio): 0.9786035192432675 -Next 512 (512-1024): 1.6812373907195695e-06 -Rest: 2.0246480655940202e-06 +First 512 (typical positive ratio): 0.9751874547460189 +Next 512 (512-1024): 0.009283104386448606 +Rest: 3.158667139656693e-05 -- Processing end: 2016-09-21 03:28:12.235582 +- Processing end: 2021-03-16 18:50:26.412061 diff --git a/script/BuildLangModelLogs/LangEsperantoModel.log b/script/BuildLangModelLogs/LangEsperantoModel.log index 5f020cd..05d0464 100644 --- a/script/BuildLangModelLogs/LangEsperantoModel.log +++ b/script/BuildLangModelLogs/LangEsperantoModel.log @@ -1,110 +1,157 @@ = Logs of language model for Esperanto (eo) = - Generated by BuildLangModel.py -- Started: 2015-12-04 01:22:51.466573 -- Maximum depth: 3 -- Max number of pages: 50 +- Started: 2021-03-16 18:50:26.592918 +- Maximum depth: 4 +- Max number of pages: 100 == Parsed pages == -Vikipedio:Ĉefpaĝo (revision 5524911) -10-a de novembro (revision 5792999) -12-a de novembro (revision 5793854) -13-a de novembro (revision 5795088) -18-a de novembro (revision 5796972) -2-a de novembro (revision 5772615) -20-a de novembro (revision 5799664) -2015 (revision 5791963) -22-a de novembro (revision 5799355) -24-a de novembro (revision 5800563) -4-a de decembro (revision 5806422) -4-a de novembro (revision 5789811) -5-a de novembro (revision 5789774) -6-a de novembro (revision 5790336) -7-a de novembro (revision 5791066) -8-a de novembro (revision 5791337) -9-a de novembro (revision 5791916) -A Night at the Opera (Queen) (revision 5184272) -Abdelhamid Abaaoud (revision 5800134) -André Glucksmann (revision 5792591) -Anglio (revision 5693468) -Argentino (revision 5804665) -Atencoj de novembro 2015 en Parizo (revision 5800135) -Aung San Suu Kyi (revision 5791362) -Austin FX4 (revision 5583207) -Azilo (revision 5751210) -Aŭstrio (revision 5804014) -Bahio (revision 5773065) -Bamako (revision 5798202) -Bataclan (revision 5795605) -Bejruto (revision 5774306) -Birmo (revision 5790386) -Blonda (revision 5441229) -Bohemian rhapsody (revision 5654078) -Cayetano Redondo (revision 5591025) -Ciro la 2-a (revision 5774667) -DJ Abdel (revision 5628860) -Daniela Mercury (revision 5764721) -Decembro de 2015 (revision 5626904) -Dilatkoeficiento (revision 5806460) -Eksproprietigo (revision 5586845) -Elektroniko (revision 5788966) -Elle s'appelait Sarah (filmo) (revision 5475154) -Esperanto (revision 5804190) -Federaciero (revision 5696168) -Fondaĵo Vikimedio (revision 5772681) -Francio (revision 5759775) -François Hollande (revision 5627721) +Vikipedio:Ĉefpaĝo (revision 7070684) +1-a de marto (revision 7133709) +10-a de marto (revision 7140053) +1812 (revision 6759865) +1836 (revision 6759900) +1870 (revision 6759944) +2-a de marto (revision 7134407) +2013 (revision 7120546) +2021 (revision 7133381) +20a jarcento (revision 6911173) +4-a de aprilo (revision 7095124) +7-a de februaro (revision 7126938) +7-a de marto (revision 7140031) +9-a de junio (revision 7096958) +Advokato (revision 7015897) +Alĝerio (revision 7136438) +Amazona arbaro (revision 7057380) +Anglio (revision 6910536) +Antikva Egiptio (revision 6715674) +Batao (revision 6348833) +Biero en Germanio (revision 5158902) +Bjalistoko (revision 7095427) +Charles Dickens (revision 7139853) +David Copperfield (romano) (revision 6728487) +Decembro de 2020 (revision 7115650) +Demotika lingvo (revision 6581652) +Duolingo (revision 6996800) +Eduko (revision 7064206) +Ekvatora Gvineo (revision 7111153) +El Greco (revision 7130251) +Emmanuel Macron (revision 7076767) +Esperantisto (revision 6583368) +Esperanto (revision 7125932) +Esperanto kaj Libera Scio (revision 7106401) +Eŭropa Kosma Agentejo (revision 6998003) +Fabriko (revision 6775703) +Februaro de 2021 (revision 7139991) +Fluganta Spagetmonstro (revision 7072467) +Fondaĵo Vikimedio (revision 7097854) +Francaj Armitaj Fortoj (revision 6521662) +Francio (revision 7035760) +Grandduklando Flandrensis (revision 7064691) +Hieroglifoj (revision 6475302) +Honkongo (revision 7022513) +Infanlaboro (revision 7043683) +Internacia Fonetika Alfabeto (revision 6826202) +Irlanda lingvo (revision 7108415) +Januaro de 2021 (revision 7119168) +Kreismo (revision 7029678) +Landport (revision 6722661) +Libera scio (revision 6432924) +Listen to Wikipedia (revision 6980163) +Listo de originalaj romanoj en Esperanto (revision 7134297) +Marto de 2021 (revision 7140759) +Metroo de Parizo (revision 7129616) +Monda Komerca Organizaĵo (revision 7135765) +Mutzig (revision 7085274) +Namacu (revision 6342288) +Ngozi Okonjo-Iweala (revision 7138302) +Niĝerio (revision 7135950) +Novelo (revision 7099911) +Oktobrofesto (revision 6860497) +Oseta Vikipedio (revision 7061966) +Portsmouth (revision 6756801) +Rolulo (revision 7078410) +Romano (revision 7102617) +San-Marino (revision 7075794) +Sismo (revision 6757493) +Slovaka Vikipedio (revision 6973132) +Strasburgo (revision 7139993) +Svahila Vikipedio (revision 6655220) +Telegram (aplikaĵo) (revision 6982939) +Teodoro Obiang Nguema Mbasogo (revision 6521358) +Verkisto (revision 6694998) +Vikio (revision 6761946) +Vikipedio (revision 7075981) +Vikipedio en Esperanto (revision 7075983) +Ĉeĥa Vikipedio (revision 5571847) +Ĉinio (revision 7133172) +Ĵurnalisto (revision 7129724) +-771 (revision 6917193) +-86 (revision 7120146) +1058 (revision 6758857) +11-a de marto (revision 7140194) +1101 (revision 6758901) +1105 (revision 6758905) +1131 (revision 6758935) +1157 (revision 6758962) +12-a de marto (revision 7141381) +1290 (revision 6759097) +13-a de marto (revision 7142227) +1389 (revision 6759315) +14-a de marto (revision 7142231) +1420 (revision 6759383) +1445 (revision 6759438) +1456 (revision 6759463) +1457 (revision 6759465) +1459 (revision 6759469) == End of Parsed pages == -- Wikipedia parsing ended at: 2015-12-04 01:27:38.176708 +- Wikipedia parsing ended at: 2021-03-16 18:54:42.162702 -56 characters appeared 342524 times. +55 characters appeared 738091 times. -First 35 characters: -[ 0] Char a: 12.557952143499435 % -[ 1] Char o: 9.84719318938235 % -[ 2] Char e: 9.10242785906973 % -[ 3] Char i: 8.362333734278474 % -[ 4] Char n: 7.6359612757062285 % -[ 5] Char r: 6.630192336887342 % -[ 6] Char t: 5.70821314710794 % -[ 7] Char l: 5.610409781504361 % -[ 8] Char s: 5.004320865107262 % -[ 9] Char k: 3.8855671427403626 % -[10] Char d: 3.7194473963868226 % -[11] Char j: 3.28531723324497 % -[12] Char u: 2.8465158645817517 % -[13] Char m: 2.787833845219605 % -[14] Char p: 2.6582078920017285 % -[15] Char g: 1.6825098387266293 % -[16] Char v: 1.4048650605505015 % -[17] Char c: 1.3823848839789328 % -[18] Char b: 1.1406499982482978 % -[19] Char f: 1.077296773364786 % -[20] Char z: 0.7342551178895493 % -[21] Char h: 0.6735294461118053 % -[22] Char ĝ: 0.53572888323154 % -[23] Char ŭ: 0.4268314045147202 % -[24] Char ĉ: 0.33545094650301877 % -[25] Char y: 0.17079095187490512 % -[26] Char ŝ: 0.15327393116978666 % -[27] Char w: 0.1442234704721421 % -[28] Char ĵ: 0.1039343228503696 % -[29] Char á: 0.0814541462788009 % -[30] Char ó: 0.05430276418586727 % -[31] Char é: 0.053718863495696656 % -[32] Char q: 0.04350060141771087 % -[33] Char x: 0.040873048311943105 % -[34] Char ĥ: 0.03824549520617533 % +First 32 characters: +[ 0] Char a: 12.443858548607151 % +[ 1] Char o: 9.828462886012701 % +[ 2] Char e: 9.238969178597218 % +[ 3] Char i: 8.570894374812863 % +[ 4] Char n: 7.557604685601098 % +[ 5] Char r: 6.426172382538196 % +[ 6] Char t: 5.784923539238386 % +[ 7] Char l: 5.684935868341437 % +[ 8] Char s: 5.134326255163659 % +[ 9] Char k: 4.062778166919797 % +[10] Char d: 3.544278415534128 % +[11] Char j: 3.39619369427347 % +[12] Char u: 2.807783864049284 % +[13] Char m: 2.731370522062998 % +[14] Char p: 2.685847680028614 % +[15] Char g: 1.6155189536249595 % +[16] Char v: 1.417033942969092 % +[17] Char c: 1.328968921176386 % +[18] Char b: 1.1882003709569686 % +[19] Char f: 1.1564969631115947 % +[20] Char h: 0.6592683016050866 % +[21] Char z: 0.6408423893530744 % +[22] Char ĝ: 0.5576548149211953 % +[23] Char ŭ: 0.44980903438735875 % +[24] Char ĉ: 0.3391180762263732 % +[25] Char w: 0.15404604581277917 % +[26] Char y: 0.13819434189009214 % +[27] Char ŝ: 0.12938783971082157 % +[28] Char ĵ: 0.1166522827131072 % +[29] Char á: 0.04579381133220701 % +[30] Char é: 0.039155063535526106 % +[31] Char ĥ: 0.031025984600814804 % -The first 35 characters have an accumulated ratio of 0.9991971365510156. +The first 32 characters have an accumulated ratio of 0.9990556719970846. -989 sequences found. +1066 sequences found. -First 512 (typical positive ratio): 0.9942980632768038 -Next 512 (512-1024): 0.0015327393116978665 -Rest: -5.0306980803327406e-17 +First 512 (typical positive ratio): 0.995442680189542 +Next 512 (512-1024): 0.004498090343873587 +Rest: 6.983124116715766e-05 -- Processing end: 2015-12-04 01:27:38.307198 +- Processing end: 2021-03-16 18:54:42.252378 diff --git a/script/BuildLangModelLogs/LangEstonianModel.log b/script/BuildLangModelLogs/LangEstonianModel.log index f1095eb..31acf96 100644 --- a/script/BuildLangModelLogs/LangEstonianModel.log +++ b/script/BuildLangModelLogs/LangEstonianModel.log @@ -1,159 +1,160 @@ = Logs of language model for Estonian (et) = - Generated by BuildLangModel.py -- Started: 2016-09-26 23:45:22.351942 -- Maximum depth: 5 +- Started: 2021-03-16 18:58:31.291439 +- Maximum depth: 4 - Max number of pages: 100 == Parsed pages == -Harilik pohl (revision 4248853) -A-vitamiin (revision 4330862) -Aasta keskmine sademete hulk (revision 4266801) -Aasta keskmine õhutemperatuur (revision 3902142) -Ahm (revision 4343671) -Ain Raal (revision 4464651) -Alalehed (revision 2892741) -Alamliik (revision 3522810) -Alaska (revision 4216575) -Aleksander Heintalu (revision 4445156) -Aleuudid (revision 4335893) -Ameerika jänes (revision 4325220) -Ameerika valgejänes (revision 4355263) -Anneli Sihvart (revision 4211078) +Harilik pohl (revision 5703478) +A-vitamiin (revision 5556956) +Aasta keskmine sademete hulk (revision 5284375) +Aasta keskmine õhutemperatuur (revision 5542687) +Ahm (revision 5513665) +Ain Raal (revision 5662146) +Alalehed (revision 4983554) +Alamliik (revision 5278935) +Alaska (revision 5844590) +Aleksander Heintalu (revision 5754094) +Aleuudid (revision 4704649) +Ameerika jänes (revision 5843342) +Ameerika valgejänes (revision 5411720) +Anneli Sihvart (revision 3546469) Arbutiin (revision 4451788) -Baribal (revision 4268462) -Bensoehape (revision 3810308) -Binaarne nomenklatuur (revision 3970950) -C-vitamiin (revision 4444353) -Droog (revision 4352968) -E-vitamiin (revision 4336726) -Eesti (revision 4474984) -Eesti Entsüklopeediakirjastus (revision 4012421) -Eesti köök (revision 4314947) -Ellips (revision 4272113) +Baribal (revision 5793838) +Bensoehape (revision 5172889) +Binaarne nomenklatuur (revision 5719069) +C-vitamiin (revision 5487089) +Droog (revision 5051359) +E-vitamiin (revision 5553995) +Eesti (revision 5807277) +Eesti Entsüklopeediakirjastus (revision 5697753) +Eesti köök (revision 5622964) +Ellips (revision 5425749) Emakakael (botaanika) (revision 3521516) -Euraasia (revision 3710768) +Euraasia (revision 5843444) Fenoloogia (revision 3512905) -Folaadid (revision 4266628) -Fosfor (revision 4270122) -Fotosüntees (revision 4380600) -Fruktoos (revision 4285660) -Glükoos (revision 4047315) +Folaadid (revision 5695132) +Fosfor (revision 5817280) +Fotosüntees (revision 5849350) +Fruktoos (revision 5580398) +Glükoos (revision 5398752) Gneiss (revision 4333338) -Graniit (revision 4435351) -Gröönimaa (revision 4331557) -Halljänes (revision 4051603) -Haned (revision 4127680) +Graniit (revision 5788916) +Gröönimaa (revision 5704662) +Halljänes (revision 5844682) +Haned (revision 5655933) Happeline keskkond (revision 2966453) -Heilongjiang (revision 4342364) -Hendrik Relve (revision 4342591) -Hiina (revision 4448121) -Holland (revision 4307885) -Hunt (revision 4427752) -Hõimkond (revision 3489569) +Heilongjiang (revision 5573413) +Hendrik Relve (revision 5776793) +Hiina (revision 5842572) +Holland (revision 5563481) +Hunt (revision 5833431) +Hõimkond (revision 5594301) Hüdrofiilsus (revision 4309797) -Ida-Euroopa (revision 4337624) -Ida-sinilind (revision 4248853) -Ida-vöötorav (revision 3520679) -Igihaljus (revision 3536500) -Ilves (revision 4404632) -Imetaja (revision 4289188) -Indiaanlased (revision 4479868) -Indrek Rohtmets (revision 4218674) -Itaalia (revision 4404119) -Jaapan (revision 4465542) -Jilin (revision 3894473) -Jood (revision 4025060) +Ida-Euroopa (revision 5852084) +Ida-sinilind (revision 3944751) +Ida-vöötorav (revision 5772003) +Igihaljus (revision 5718075) +Ilves (revision 5810469) +Imetaja (revision 5817468) +Immuunsus (revision 5465129) +Indiaanlased (revision 5715264) +Indrek Rohtmets (revision 5460729) +Itaalia (revision 5821960) +Jaapan (revision 5848576) +Jilin (revision 5551781) +Jood (revision 5506157) Juurestik (revision 3341159) -Jääkaru (revision 4372399) -Jõhvikas (revision 4391549) -Kaalium (revision 4486067) -Kaheidulehelised (revision 4031352) +Jääkaru (revision 5798648) +Jõhvikas (revision 5765158) +Kaalium (revision 5506158) +Kaheidulehelised (revision 4551109) Kaheli õiekate (revision 3063362) Kahesuguline õis (revision 3383221) -Kaitsestaatus (revision 3527096) -Kajakas (revision 4456839) -Kalorsus (revision 3843290) -Kaltsium (revision 4339861) -Kanada (revision 4434682) -Kanalised (revision 3616579) +Kaitsestaatus (revision 5622492) +Kajakas (revision 5799897) +Kalorsus (revision 5843070) +Kaltsium (revision 5506160) +Kanada (revision 5846973) +Kanalised (revision 4824603) Kanarbikulaadsed (revision 4318215) -Kanarbikulised (revision 3534760) -Karboksüülhapped (revision 3659011) -Karoteen (revision 4347634) -Kasvuperiood (revision 4231717) -Katteseemnetaimed (revision 4176294) +Kanarbikulised (revision 5479568) +Karboksüülhapped (revision 5328337) +Karoteen (revision 5479578) +Kasvuperiood (revision 5279042) +Katteseemnetaimed (revision 5315975) Kaukasus (revision 4476003) -Kesk-Euroopa (revision 3580746) -Kimalane (revision 4261145) -Kiudained (toit) (revision 3538655) +Kesk-Euroopa (revision 5381871) +Kimalane (revision 5643935) +Kiudained (toit) (revision 5762236) Klass (bioloogia) (revision 3489567) -Kliima (revision 4160781) -Korea (revision 4329396) -Kroom (revision 4030460) +Kliima (revision 5719219) +Korea (revision 5555270) +Kroom (revision 5506123) Kroonlehed (revision 3543291) -Kuusepüü (revision 4028988) -Kvertsetiin (revision 4448461) -Laanemets (revision 4001157) -Laanepüü (revision 4475093) -Laiuskraad (revision 3990366) -Leesikas (revision 4420533) -Lehed (revision 4471821) -Leheroots (revision 3595351) -Liik (bioloogia) (revision 4320981) -Liiv (revision 4399494) -Liivakivi (revision 4330598) -Linnaeus (revision 4276836) -Linnud (revision 4479668) +Kuusepüü (revision 5715613) +Kvertsetiin (revision 5610539) +Laanemets (revision 5751227) +Laanepüü (revision 5747330) +Laiuskraad (revision 4993978) +Leesikas (revision 5842030) +Lehed (revision 5725384) +Leheroots (revision 5532086) +Liik (bioloogia) (revision 5791564) +Liiv (revision 5675176) +Liivakivi (revision 5548801) +Linnaeus (revision 5635181) == End of Parsed pages == -- Wikipedia parsing ended at: 2016-09-26 23:47:54.476445 +- Wikipedia parsing ended at: 2021-03-16 19:01:52.570995 -55 characters appeared 433559 times. +55 characters appeared 482798 times. -First 33 characters: -[ 0] Char a: 12.486881831538499 % -[ 1] Char i: 10.26503889897338 % -[ 2] Char e: 10.177622884082673 % -[ 3] Char s: 8.710233209320991 % -[ 4] Char t: 6.56634967789851 % -[ 5] Char l: 6.051540851418146 % -[ 6] Char u: 5.423944607308348 % -[ 7] Char n: 5.131020230233947 % -[ 8] Char k: 4.663033174262327 % -[ 9] Char o: 4.526950195936424 % -[10] Char d: 4.167368224393911 % -[11] Char r: 3.6740097656835635 % -[12] Char m: 3.552688330769284 % -[13] Char v: 2.4700213811730354 % -[14] Char p: 1.9229216784797456 % -[15] Char g: 1.865259399528092 % -[16] Char h: 1.8043680329551455 % -[17] Char j: 1.6860450365463524 % -[18] Char ä: 1.0247740215287884 % -[19] Char b: 0.9255949017319443 % -[20] Char õ: 0.9246723052687178 % -[21] Char ü: 0.6536595941959457 % -[22] Char f: 0.37342091849090897 % -[23] Char c: 0.34851081398379463 % -[24] Char ö: 0.24333481717597835 % -[25] Char y: 0.1287022066200909 % -[26] Char x: 0.06781084004714467 % -[27] Char w: 0.04082489349777078 % -[28] Char q: 0.020989069538401926 % -[29] Char š: 0.018913227496142396 % -[30] Char z: 0.017529332801302706 % -[31] Char ō: 0.010379210211297655 % -[32] Char ž: 0.009687262863877812 % +First 34 characters: +[ 0] Char a: 12.61500669016856 % +[ 1] Char i: 10.380117564695794 % +[ 2] Char e: 10.063007717513328 % +[ 3] Char s: 8.719795856652263 % +[ 4] Char t: 6.619538606207979 % +[ 5] Char l: 6.04559256666349 % +[ 6] Char u: 5.504372429049002 % +[ 7] Char n: 5.077278696266347 % +[ 8] Char k: 4.702380705802427 % +[ 9] Char o: 4.470606754791859 % +[10] Char d: 4.163438953765343 % +[11] Char r: 3.6719290469306007 % +[12] Char m: 3.5747869709485123 % +[13] Char v: 2.4621063053285224 % +[14] Char p: 1.8848462503987176 % +[15] Char g: 1.8341003898110597 % +[16] Char h: 1.7551853984482124 % +[17] Char j: 1.7216309926718836 % +[18] Char ä: 1.033972800218725 % +[19] Char õ: 0.9384877319292955 % +[20] Char b: 0.8972696655744226 % +[21] Char ü: 0.6507897712915132 % +[22] Char f: 0.34610748180398426 % +[23] Char c: 0.30426803756436444 % +[24] Char ö: 0.24275162697442823 % +[25] Char y: 0.1056342404069611 % +[26] Char x: 0.05550975770405014 % +[27] Char w: 0.035211413468987034 % +[28] Char z: 0.025476493274620024 % +[29] Char q: 0.019884092311898558 % +[30] Char š: 0.017605706734493517 % +[31] Char é: 0.009527794232784725 % +[32] Char ō: 0.009113542309620172 % +[33] Char ž: 0.00869929038645562 % -The first 33 characters have an accumulated ratio of 0.9995410082595447. +The first 34 characters have an accumulated ratio of 0.9996603134230051. -853 sequences found. +869 sequences found. -First 512 (typical positive ratio): 0.9972721312183132 -Next 512 (512-1024): 9.687262863877811e-05 -Rest: -5.204170427930421e-18 +First 512 (typical positive ratio): 0.9973685549586747 +Next 512 (512-1024): 8.69929038645562e-05 +Rest: -3.122502256758253e-17 -- Processing end: 2016-09-26 23:47:54.561846 +- Processing end: 2021-03-16 19:01:52.649852 diff --git a/script/BuildLangModelLogs/LangFinnishModel.log b/script/BuildLangModelLogs/LangFinnishModel.log index e99e9aa..f7247f3 100644 --- a/script/BuildLangModelLogs/LangFinnishModel.log +++ b/script/BuildLangModelLogs/LangFinnishModel.log @@ -1,156 +1,157 @@ = Logs of language model for Finnish (fi) = - Generated by BuildLangModel.py -- Started: 2016-09-21 18:12:24.181917 -- Maximum depth: 5 +- Started: 2021-03-16 19:01:52.812613 +- Maximum depth: 4 - Max number of pages: 100 == Parsed pages == -Yhdistynyt kuningaskunta (revision 15843357) -1. toukokuuta (revision 15910178) -1700-luku (revision 15493702) -1707 (revision 15106709) -1800-luku (revision 15708929) -2014 (revision 15891601) -409 (revision 12809782) -5. marraskuuta (revision 15421719) -927 (revision 12785964) -Aasia (revision 15948161) -Abhasia (revision 15730328) -Adolf Hitler (revision 15951829) -Afrikka (revision 15934209) -Agatha Christie (revision 15760740) -Aikavyöhyke (revision 15800313) -Ajoneuvon kansallisuustunnus (revision 15897445) -Akrotiri ja Dhekelia (revision 14625383) -Alamaat (revision 15913741) -Alan Turing (revision 15904871) -Alankomaat (revision 15936643) -Albania (revision 15767604) -Alec Guinness (revision 15363805) -Alexander Fleming (revision 15023225) -Alfred Hitchcock (revision 15892843) -Alfred Tennyson (revision 15856114) -Allen Jones (revision 12871703) -Andorra (revision 15913862) -Andrew Lloyd Webber (revision 14978349) -Anglit (revision 15902350) -Anguilla (revision 15854041) -Anne Brontë (revision 14287992) -Anthony Eden (revision 14391831) -Antigua ja Barbuda (revision 15196967) -Arabian Lawrence (revision 15736417) -Argentiina (revision 15676474) -Armenia (revision 15634470) -Arthur Conan Doyle (revision 15402837) -Arts and Crafts (revision 15806930) -Aurinko (revision 15934252) -Australia (revision 15934255) -Avara luonto (revision 15815943) -Azerbaidžan (revision 15946891) -BBC (revision 15866026) -BKT (revision 15656549) -Bahama (revision 15516869) -Bangladesh (revision 15883994) -Bank of England (revision 14481173) -Barbados (revision 15839821) -Barbara Hepworth (revision 15106880) -Bath (revision 15869900) -Beatrix Potter (revision 15057380) -Belfast (revision 15715934) -Belgia (revision 15932391) -Belize (revision 15665086) -Ben Nevis (revision 15610196) -Bengalin kieli (revision 15551820) -Benjamin Britten (revision 15081615) -Bermuda (revision 15632621) -Bertrand Russell (revision 14631969) -Bhutan (revision 15377394) -Big Ben (revision 14897401) -Big Brother (revision 14641391) -Birmingham (revision 15855259) -Black Sabbath (revision 15839917) -Bosnia ja Hertsegovina (revision 15934266) -Botswana (revision 15524955) -Bristol (revision 15891889) -Bristolin kanaali (revision 15849713) -Bristolin kansainvälinen lentoasema (revision 14452870) -Britannia (provinssi) (revision 14557442) -Britannian avoin golfturnaus (revision 14293265) -Britannian kuninkaallinen perhe (revision 15522149) -Britannian talous (revision 15470242) -Britannian väestö (revision 15661241) -Brittein saaret (revision 15805422) -Brittiläinen Antarktiksen alue (revision 15836227) -Brittiläinen Intia (revision 15593126) -Brittiläinen Intian valtameren alue (revision 14272903) -Brittiläinen imperiumi (revision 15906600) -Brittiläinen kansainyhteisö (revision 15894379) -Brittiläinen keittiö (revision 13393533) -Brittiläinen kulttuuri (revision 15951407) -Brittiläiset Neitsytsaaret (revision 15910520) -Brittiläiset merentakaiset alueet (revision 15836213) -Brunei (revision 15580824) -Bruttokansantuote (revision 15656549) -Bulgaria (revision 15944101) -Burma (revision 15627218) -Cambridge (revision 14641664) -Cambridgen yliopisto (revision 15493340) -Canterburyn tarinoita (revision 15232140) -Cardiff (revision 15840398) -Caymansaaret (revision 15914575) -Channel 4 (revision 15882475) -Charles Babbage (revision 15203616) -Charles Chaplin (revision 15674652) -Charles Darwin (revision 15894085) -Charles Dickens (revision 15699592) -Charles Dickensin joulutarina (revision 15116247) +Yhdistynyt kuningaskunta (revision 19524940) +2014 (revision 19539649) +Aasia (revision 19400161) +Abhasia (revision 19547259) +Adolf Hitler (revision 19547632) +Advanced Level (revision 18652085) +Afrikka (revision 19621405) +Agatha Christie (revision 19512386) +Aikavyöhyke (revision 19555749) +Ajoneuvon kansallisuustunnus (revision 18193201) +Akrotiri ja Dhekelia (revision 18855449) +Alamaat (revision 19549275) +Alan Turing (revision 19549334) +Alankomaat (revision 19640525) +Albania (revision 19549481) +Alec Guinness (revision 19544530) +Alexander Fleming (revision 19522285) +Alfred Hitchcock (revision 19402710) +Alfred Tennyson (revision 19481118) +Allen Jones (revision 19591974) +Andorra (revision 19511981) +Andrew Lloyd Webber (revision 18151455) +Anglit (revision 19065858) +Anguilla (revision 19591105) +Anne Brontë (revision 19340812) +Anthony Eden (revision 19341665) +Antigua ja Barbuda (revision 18868418) +Arabian Lawrence (revision 19429776) +Argentiina (revision 19507062) +Armenia (revision 19633290) +Arthur Conan Doyle (revision 19393798) +Arts and crafts (revision 19285842) +Artsakhin tasavalta (revision 19508669) +Atlantin valtameri (revision 19417172) +Aurinko (revision 19558951) +Australia (revision 19585414) +Avara luonto (revision 19570513) +Azerbaidžan (revision 19618379) +BBC (revision 19151226) +BKT (revision 19395273) +Bahama (revision 19614968) +Bangladesh (revision 19529050) +Bank of England (revision 17954121) +Barbados (revision 19193877) +Barbara Hepworth (revision 19016157) +Bath (revision 19316232) +Beatrix Potter (revision 19590080) +Belfast (revision 19638226) +Belgia (revision 19623003) +Belize (revision 18839172) +Ben Nevis (revision 19287404) +Bengalin kieli (revision 19361714) +Benjamin Britten (revision 19284581) +Bermuda (revision 19508737) +Bertrand Russell (revision 19418381) +Bhutan (revision 19609977) +Big Ben (revision 19521754) +Big Brother (revision 19638747) +Birmingham (revision 19638219) +Birminghamin kansainvälinen lentoasema (revision 19638219) +Black Sabbath (revision 19637531) +Bodiamin linna (revision 19288333) +Boris Johnson (revision 18896646) +Bosnia ja Hertsegovina (revision 19317622) +Botswana (revision 19174485) +Brexit (revision 19428746) +Bristol (revision 19316368) +Bristolin kansainvälinen lentoasema (revision 19316368) +Britannia (provinssi) (revision 19620168) +Britannia (täsmennyssivu) (revision 19524940) +Britannian alahuone (revision 19561351) +Britannian avoin golfturnaus (revision 18803777) +Britannian kuninkaallinen perhe (revision 18877640) +Britannian merentakaiset alueet (revision 18985200) +Britannian talous (revision 19363886) +Britannian väestö (revision 19334304) +Britannian ylähuone (revision 19561348) +Britteinsaaret (revision 19149527) +Brittiläinen Antarktiksen alue (revision 19065469) +Brittiläinen Intia (revision 19532682) +Brittiläinen Intian valtameren alue (revision 19386472) +Brittiläinen imperiumi (revision 18932562) +Brittiläinen keittiö (revision 18379105) +Brittiläinen kulttuuri (revision 19490255) +Brittiläiset Neitsytsaaret (revision 19078289) +Brittiläiset merentakaiset alueet (revision 18985200) +Brunei (revision 19566565) +Bruttokansantuote (revision 19395273) +Bulgaria (revision 19361771) +Burma (revision 19618164) +Cambridge (revision 19030154) +Cambridgen yliopisto (revision 18847878) +Canterburyn tarinoita (revision 19505844) +Cardiff (revision 18124102) +Caymansaaret (revision 19078996) +Ceylonin dominio (revision 18848736) +Channel 4 (revision 19210598) +Charles Babbage (revision 19265262) +Charles Chaplin (revision 19446083) +Charles Darwin (revision 19338522) == End of Parsed pages == -- Wikipedia parsing ended at: 2016-09-21 18:15:05.189221 +- Wikipedia parsing ended at: 2021-03-16 19:06:31.128554 -61 characters appeared 940364 times. +65 characters appeared 1138649 times. First 30 characters: -[ 0] Char a: 12.508773198463574 % -[ 1] Char i: 10.969475649854738 % -[ 2] Char n: 8.815841525196626 % -[ 3] Char t: 8.80169806585535 % -[ 4] Char e: 7.8206949649284745 % -[ 5] Char s: 7.595782058862313 % -[ 6] Char l: 5.963541777439374 % -[ 7] Char o: 5.439808414613916 % -[ 8] Char u: 5.0102938861972595 % -[ 9] Char k: 4.589712068943515 % -[10] Char r: 3.1231523112326713 % -[11] Char ä: 3.041800834570443 % -[12] Char m: 3.0392486313810396 % -[13] Char v: 2.156292669647073 % -[14] Char h: 1.996141919512019 % -[15] Char j: 1.9248929138078446 % -[16] Char p: 1.6324529650220552 % -[17] Char y: 1.6323466232224966 % -[18] Char d: 1.1981530556252684 % -[19] Char b: 0.6835650875618378 % -[20] Char g: 0.5793501239945382 % -[21] Char c: 0.5056552569005194 % -[22] Char ö: 0.38931732818355447 % -[23] Char f: 0.215023118707224 % -[24] Char w: 0.2106631049253268 % -[25] Char z: 0.06593191572625068 % -[26] Char x: 0.024458613898447838 % -[27] Char š: 0.010421496356729947 % -[28] Char ž: 0.007869293167326695 % -[29] Char q: 0.007762951367768225 % +[ 0] Char a: 12.546096294819561 % +[ 1] Char i: 10.975375203420896 % +[ 2] Char n: 8.908627680698793 % +[ 3] Char t: 8.82282424171101 % +[ 4] Char e: 7.780448584243256 % +[ 5] Char s: 7.584865924442036 % +[ 6] Char l: 5.942217487566405 % +[ 7] Char o: 5.487731513398773 % +[ 8] Char u: 5.063017663915746 % +[ 9] Char k: 4.558296718303885 % +[10] Char r: 3.1709508373519846 % +[11] Char m: 3.0275352632813095 % +[12] Char ä: 2.9864339230087587 % +[13] Char v: 2.178195387691905 % +[14] Char j: 1.9307969356667418 % +[15] Char h: 1.9113001460502754 % +[16] Char p: 1.6500256005142937 % +[17] Char y: 1.593203875821258 % +[18] Char d: 1.2042341406350858 % +[19] Char b: 0.6837049872260899 % +[20] Char g: 0.5634747845912129 % +[21] Char c: 0.4858389196319498 % +[22] Char ö: 0.38018739752109737 % +[23] Char f: 0.21982191175682764 % +[24] Char w: 0.19382619226820558 % +[25] Char z: 0.0598955428758116 % +[26] Char x: 0.02564442598201904 % +[27] Char ž: 0.009484924678281015 % +[28] Char š: 0.009309277924979516 % +[29] Char q: 0.007201516885361511 % -The first 30 characters have an accumulated ratio of 0.9996012182516557. +The first 30 characters have an accumulated ratio of 0.9996056730388382. -919 sequences found. +940 sequences found. -First 512 (typical positive ratio): 0.9985378147555799 -Next 512 (512-1024): 1.0634179955846884e-06 -Rest: 3.881443777498106e-17 +First 512 (typical positive ratio): 0.9985812031154878 +Next 512 (512-1024): 9.484924678281016e-05 +Rest: 2.7321894746634712e-17 -- Processing end: 2016-09-21 18:15:05.307164 +- Processing end: 2021-03-16 19:06:31.204594 diff --git a/script/BuildLangModelLogs/LangGreekModel.log b/script/BuildLangModelLogs/LangGreekModel.log index a61b2ec..ee210e2 100644 --- a/script/BuildLangModelLogs/LangGreekModel.log +++ b/script/BuildLangModelLogs/LangGreekModel.log @@ -1,272 +1,174 @@ = Logs of language model for Greek (el) = - Generated by BuildLangModel.py -- Started: 2016-05-25 15:16:42.898905 -- Maximum depth: 5 -- Max number of pages: 200 +- Started: 2021-03-16 18:54:42.415198 +- Maximum depth: 4 +- Max number of pages: 100 == Parsed pages == -Πύλη:Κύρια (revision 5511929) -14 Σεπτεμβρίου (revision 5808678) -16 Σεπτεμβρίου (revision 5810117) -1771 (revision 4940722) -1829 (revision 5863423) -1921 (revision 5819621) -1948 (revision 5785943) -1965 (revision 5846907) -1970 (revision 5816968) -1973 (revision 5423504) -25 Μαΐου (revision 5865973) -Eurovision (revision 5865484) -Scorpions (revision 5586116) -Wiki (revision 5859059) -Wikimedia (revision 5771416) -Αγία Πετρούπολη (revision 5782933) -Αγγλική γλώσσα (revision 5851128) -Αλεξάντρ Πούσκιν (revision 5790131) -Βέλος ΙΙ (Αντιτορπιλικό) (revision 5178914) -Βραζιλία (revision 5857981) -Γαλλική γλώσσα (revision 5851119) -Γαλλική εισβολή στην Ρωσία (revision 5858523) -Γενικές εκλογές στη Δομινικανή Δημοκρατία 2016 (revision 5848770) -Γηραιότερο πρόσωπο στον κόσμο (revision 5852034) -Διαγωνισμός Τραγουδιού Eurovision 2016 (revision 5863783) -Δικτατορία των Συνταγματαρχών (revision 5864405) -Δομινικανή Δημοκρατία (revision 5848627) -Εγκυκλοπαίδεια (revision 5566281) -Ελεύθερο περιεχόμενο (revision 5824058) -Ελλάδα (revision 5863759) -Ελληνική γλώσσα (revision 5790854) -Ιππικό (revision 5376587) -Ιταλία (revision 5781867) -Κίεβο (revision 5794613) -Κατάληψη του Παρισιού (1814) (revision 5729368) -Κλάους Μάιν (revision 5668218) -Μάχη της Λειψίας (revision 5729316) -Μάχη της Σαλτάνοφκα (revision 5865460) -Μάχη του Μποροντίνο (revision 5670322) -Μαξ Βερστάπεν (revision 5864745) -Μπλουζ (revision 5846428) -Νίκος Καχτίτσης (revision 5723615) -Νικολάι Νικολάεβιτς Ραέφσκι (revision 5865460) -Ντίλμα Ρούσεφ (revision 5843412) -Ομοσπονδιακό Σοβιέτ της Ρωσικής Αυτοκρατορίας (revision 5865460) -Ουκρανία (revision 5847651) -Πάτρα (revision 5800331) -Ποδόσφαιρο (revision 5864952) -Πριμέρα Ντιβιζιόν (revision 5846965) -Ρωσική Αυτοκρατορία (revision 5858419) -Ρωσική γλώσσα (revision 5818960) -Ρώσοι (revision 5376764) -Σουζάνα Μούσατ Τζόουνς (revision 5848866) -Στοκχόλμη (revision 5670508) -Στρατηγός (revision 5464718) -Τζακ Στάινμπεργκερ (revision 5820361) -Τζαμάλα (revision 5863755) -ΦΚ Μπαρτσελόνα (revision 5862032) -Φόρμουλα Ένα (revision 5809160) -10 Σεπτεμβρίου (revision 5841838) -11 Σεπτεμβρίου (revision 5796866) -12 Σεπτεμβρίου (revision 5795991) -1321 (revision 5811404) -13 Σεπτεμβρίου (revision 5830505) -1435 (revision 5600729) -1498 (revision 5831868) -1523 (revision 5863396) -1527 (revision 5579042) -1580 (revision 5742938) -15 Σεπτεμβρίου (revision 5817369) -1712 (revision 5699806) -1741 (revision 5817896) -1752 (revision 5666171) -1760 (revision 5490201) -1769 (revision 5336004) -17 Σεπτεμβρίου (revision 5843911) -1812 (revision 5703237) -1814 (revision 5751122) -1851 (revision 5854460) -1878 (revision 5863501) -1889 (revision 5795061) -1890 (revision 5705460) -1898 (revision 5863504) -18 Σεπτεμβρίου (revision 5661544) -1901 (revision 5865687) -1902 (revision 5779111) -1905 (revision 5862599) -1910 (revision 5794858) -1916 (revision 5800363) -1917 (revision 5865701) -1925 (revision 5854774) -1927 (revision 5839595) -1928 (revision 5814308) -1933 (revision 5854834) -1936 (revision 5854290) -1937 (revision 5794891) -1943 (revision 5807315) -1944 (revision 5865804) -1950 (revision 5807377) -1956 (revision 5795994) -1960 (revision 5795065) -1963 (revision 5863751) -1966 (revision 5707508) -1969 (revision 5668647) -1980 (revision 5832053) -1981 (revision 5817635) -1982 (revision 5788879) -1983 (revision 5812702) -1984 (revision 5749754) -1989 (revision 5846909) -1994 (revision 5863999) -1999 (revision 5795003) -19 Σεπτεμβρίου (revision 5850863) -1 Σεπτεμβρίου (revision 5630491) -2000 (revision 5779037) -2001 (revision 5779042) -2005 (revision 5779066) -2006 (revision 5808681) -2009 (revision 5827105) -2011 (revision 5808660) -2016 (revision 5801621) -20 Σεπτεμβρίου (revision 5808561) -21 Σεπτεμβρίου (revision 5751207) -22 Σεπτεμβρίου (revision 5807133) -23 Σεπτεμβρίου (revision 5800012) -24 Σεπτεμβρίου (revision 5662618) -258 (revision 4952368) -25 Σεπτεμβρίου (revision 5817621) -26 Σεπτεμβρίου (revision 5817637) -27 Σεπτεμβρίου (revision 5817648) -28 Σεπτεμβρίου (revision 5817677) -29 Σεπτεμβρίου (revision 5703562) -2 Σεπτεμβρίου (revision 5701639) -30 Σεπτεμβρίου (revision 5838312) -326 (revision 5818811) -3 Σεπτεμβρίου (revision 5816313) -407 (revision 4952524) -4 Σεπτεμβρίου (revision 5816970) -5 Σεπτεμβρίου (revision 5817185) -628 (revision 5398024) -680 (revision 5365010) -685 (revision 5819296) -6 Σεπτεμβρίου (revision 5765157) -775 (revision 5373211) -786 (revision 5398031) -7 Σεπτεμβρίου (revision 5749649) -81 (revision 5397958) -891 (revision 4952139) -8 Σεπτεμβρίου (revision 5788878) -9 Σεπτεμβρίου (revision 5817240) -CIA (revision 5857678) -Miyavi (revision 4944860) -Άρμεν Κούπτσιος (revision 5766774) -Έιμι Γουάινχαουζ (revision 5809279) -Έρβιν Θάλμπεργκ (revision 5716376) -Ίων Δραγούμης (revision 5818568) -Αγία Ελένη (revision 5821916) -Αλεξάντερ φον Χούμπολτ (revision 5773636) -Αλμπέρτο Κόρντα (revision 5800055) -Απρίλιος (revision 5766829) -Αυτοκρατορία των Σασσανιδών (revision 5859880) -Αύγουστος (revision 5461793) -Β΄ Παγκόσμιος Πόλεμος (revision 5848530) -Βέρμαχτ (revision 5212228) -Βασίλης Λάσκος (revision 5695445) -Βενεζουέλα (revision 5847962) -Βρετανική Αυτοκρατορία (revision 5606306) -Βόρεια Ελλάδα (revision 5670938) -Γαλλία (revision 5776756) -Γεώργιος Καρατζαφέρης (revision 5803114) -Γιάννης Λάτσης (revision 5692530) -Γιάννος Κρανιδιώτης (revision 5574536) -Γιώργος Παπασιδέρης (μουσικός) (revision 5722203) -Γκέοργκ Φρήντριχ Χαίντελ (revision 5807098) -Γκρέις Κέλι (revision 5807168) -Γρηγοριανό Hμερολόγιο (revision 5793842) -Γρηγοριανό ημερολόγιο (revision 5793842) -Γρηγόρης Λαμπράκης (revision 5752808) -Δάντης Αλιγκέρι (revision 5648882) -Δήμος Βιάννου (revision 4816422) -Δεκέμβριος (revision 5461807) -Δομιτιανός (revision 5735554) -Δράμα (πόλη) (revision 5857326) -Ενιαία Δημοκρατική Αριστερά (revision 5742309) -Ετόρε Σότσας (revision 5785872) -Ζιλ Αντριαμαχαζό (revision 5819706) -Η.Π.Α. (revision 5845171) -Ηράκλειος (revision 5778827) -Θεσσαλονίκη (revision 5844955) -Θεόδωρος Ρούζβελτ (revision 5815087) -Ιανουάριος (revision 5615044) -Ιερουσαλήμ (revision 5824734) -Ιησούς Χριστός (revision 5859687) -Ιούλιος (revision 5712711) -Ιούνιος (revision 5461799) -Ιράκ (revision 5820378) -Ιράν (revision 5861249) -Ισιδώρα Ντάνκαν (revision 5044778) -Ιωάννης ο Χρυσόστομος (revision 5824898) +Πύλη:Κύρια (revision 7950664) +16 Μαρτίου (revision 8737120) +1797 (revision 8019834) +1839 (revision 8019704) +1900 (revision 7952521) +1901 (revision 7905277) +1935 (revision 8290828) +Mars 2020 (revision 8718725) +Perseverance (ρόβερ) (revision 8718754) +The Economist (revision 8341010) +Wiki (revision 8595867) +Wikimedia (revision 8518678) +Άρθουρ Έβανς (revision 8502931) +Άρθρουρ Γουέλσλεϋ, Δούκας του Ουέλλινγκτον (revision 8423158) +Αγγλική γλώσσα (revision 8702613) +Αδόλφος Χίτλερ (revision 8722090) +Αντισφαίριση (revision 8557812) +Αρειανό ελικόπτερο Ingenuity (revision 8718783) +Αυστραλιανό Όπεν (revision 8078988) +Βέρμαχτ (revision 8711795) +Βραβεία Νόμπελ Λογοτεχνίας (revision 8519145) +Γαλλία (revision 8680274) +Γενικός Διευθυντής του Παγκόσμιου Οργανισμού Εμπορίου (revision 8694448) +Γερμανία (revision 8724575) +Εγκυκλοπαίδεια (revision 8687200) +Ελεύθερο περιεχόμενο (revision 8707719) +Ελληνική Βικιπαίδεια (revision 8731090) +Κνωσός (revision 8697910) +Κρήτη (revision 8735869) +Λονδίνο (revision 8666776) +Ναόμι Οσάκα (revision 8736512) +Νγκόζι Οκόντζο-Ιουεάλα (revision 8716446) +Νόβακ Τζόκοβιτς (revision 8735633) +Ουίλιαμ Μπάντινγκ (revision 8298356) +Παγκόσμιος Οργανισμός Εμπορίου (revision 8694448) +Πατριάρχης Σερβίας Πορφύριος (revision 8716966) +Σερβική Ορθόδοξη Εκκλησία (revision 8703081) +Συλί Προυντόμ (revision 8736464) +Συνθήκη των Βερσαλλιών (revision 7991516) +10 Μαρτίου (revision 8726574) +1185 (revision 8532989) +1190 (revision 8729267) +11 Μαρτίου (revision 8730381) +1244 (revision 7906151) +12 Μαρτίου (revision 8730152) +13 Μαρτίου (revision 8544014) +1405 (revision 7906083) +1410 (revision 7906088) +1465 (revision 7905889) +1473 (revision 8687951) +1478 (revision 7905905) +14 Μαρτίου (revision 8096796) +15 Μαρτίου (revision 8734431) +1670 (revision 8120689) +1751 (revision 8019900) +1782 (revision 8019823) +1789 (revision 8019786) +1792 (revision 8019828) +1794 (revision 8019829) +17 Μαρτίου (revision 8233521) +1802 (revision 8019791) +1812 (revision 8019794) +1815 (revision 8728979) +1859 (revision 8019719) +1872 (revision 8019620) +1888 (revision 8678352) +1892 (revision 8019578) +1894 (revision 8019646) +1898 (revision 7905275) +18 Μαρτίου (revision 8666328) +1906 (revision 8019564) +1908 (revision 8110859) +1911 (revision 8234911) +1912 (revision 7905254) +1919 (revision 8188234) +1920 (revision 8689556) +1921 (revision 8019599) +1923 (revision 8640393) +1924 (revision 8019604) +1925 (revision 8424340) +1926 (revision 8019613) +1927 (revision 7905236) +1930 (revision 8019616) +1937 (revision 7905218) +1939 (revision 8731642) +1940 (revision 8503734) +1944 (revision 8556801) +1945 (revision 8699418) +1948 (revision 8707830) +1953 (revision 8660010) +1955 (revision 8733996) +1956 (revision 8637553) +1957 (revision 8582051) +1959 (revision 8621124) +1964 (revision 8701289) +1966 (revision 8596642) +1967 (revision 8657263) +1968 (revision 8640882) +1969 (revision 8709383) +1970 (revision 8645926) == End of Parsed pages == -- Wikipedia parsing ended at: 2016-05-25 15:21:50.071087 +- Wikipedia parsing ended at: 2021-03-16 18:58:31.004638 -63 characters appeared 1875535 times. +62 characters appeared 801479 times. -First 46 characters: -[ 0] Char α: 9.004097497514042 % -[ 1] Char ο: 8.311015256980008 % -[ 2] Char τ: 7.94493304577094 % -[ 3] Char ι: 6.338831320129989 % -[ 4] Char ν: 5.836627948825269 % -[ 5] Char ε: 5.635565318695733 % -[ 6] Char ρ: 4.717907157157825 % -[ 7] Char σ: 4.307197679595422 % -[ 8] Char κ: 4.061294510632965 % -[ 9] Char ς: 3.766551943845356 % -[10] Char η: 3.7565281373048225 % -[11] Char π: 3.4156653968067783 % -[12] Char υ: 3.30956233821283 % -[13] Char μ: 3.1442761665338157 % -[14] Char λ: 3.0899983204792236 % -[15] Char ί: 2.429973314280992 % -[16] Char ό: 2.076100952528212 % -[17] Char ά: 1.922651403466211 % -[18] Char γ: 1.8994047031913561 % -[19] Char έ: 1.6641651582081913 % -[20] Char δ: 1.508582884350332 % -[21] Char ω: 1.2410325587099147 % -[22] Char ή: 1.2077087337746297 % -[23] Char χ: 1.0482342371643292 % -[24] Char ύ: 0.9225101104484854 % -[25] Char β: 0.8928652357860558 % -[26] Char θ: 0.8681256281541001 % -[27] Char φ: 0.806756472153279 % -[28] Char ώ: 0.6969211451665791 % -[29] Char ζ: 0.35515199663029484 % -[30] Char e: 0.35488540603081253 % -[31] Char ξ: 0.314736861748781 % -[32] Char a: 0.2909036621550651 % -[33] Char i: 0.2884510286398281 % -[34] Char o: 0.24137112877125727 % -[35] Char r: 0.23262695710823847 % -[36] Char n: 0.2206303801315358 % -[37] Char t: 0.21668483925919804 % -[38] Char s: 0.2013825388489151 % -[39] Char l: 0.14865091827131993 % -[40] Char d: 0.1359078876160669 % -[41] Char c: 0.12124540464454144 % -[42] Char h: 0.1166600463334462 % -[43] Char u: 0.10381037943840024 % -[44] Char m: 0.09074744006376848 % -[45] Char ψ: 0.08669526295163779 % +First 47 characters: +[ 0] Char α: 8.791371951105393 % +[ 1] Char ο: 8.656870610458913 % +[ 2] Char τ: 7.436002690026814 % +[ 3] Char ι: 6.335661944979219 % +[ 4] Char ν: 5.906455440504367 % +[ 5] Char ε: 5.323907426145913 % +[ 6] Char ρ: 5.098698780629311 % +[ 7] Char ς: 4.129740142910793 % +[ 8] Char κ: 4.033542987402041 % +[ 9] Char σ: 3.9103956560309125 % +[10] Char υ: 3.7128858023728633 % +[11] Char η: 3.4742020689250745 % +[12] Char λ: 3.4385180397739674 % +[13] Char π: 3.329220104332116 % +[14] Char μ: 3.3050148537890576 % +[15] Char ί: 2.7370648513560556 % +[16] Char ό: 2.185958708837038 % +[17] Char γ: 2.095251403966916 % +[18] Char ά: 1.8429678132552443 % +[19] Char έ: 1.6417148796163092 % +[20] Char δ: 1.4553094965682194 % +[21] Char β: 1.2000314418718394 % +[22] Char ω: 1.121801070271336 % +[23] Char ή: 1.0494348573075527 % +[24] Char χ: 0.9217958299593626 % +[25] Char ύ: 0.8777522555176118 % +[26] Char φ: 0.8600350102747546 % +[27] Char θ: 0.7800578680165045 % +[28] Char ώ: 0.617732966178777 % +[29] Char ζ: 0.4195992658572464 % +[30] Char e: 0.30456194111137036 % +[31] Char ξ: 0.28696946520120925 % +[32] Char i: 0.25203405204627943 % +[33] Char a: 0.23631311612656103 % +[34] Char n: 0.21647479222786872 % +[35] Char r: 0.1978841616561382 % +[36] Char o: 0.18915030836740576 % +[37] Char s: 0.17779629909205355 % +[38] Char t: 0.16269920983581604 % +[39] Char l: 0.14585534992183202 % +[40] Char d: 0.11665932607092637 % +[41] Char c: 0.10468147013209328 % +[42] Char h: 0.09257884486056403 % +[43] Char u: 0.08409453023722394 % +[44] Char m: 0.08247252891217362 % +[45] Char ΐ: 0.07161759696760614 % +[46] Char ψ: 0.06774974765402461 % -The first 46 characters have an accumulated ratio of 0.993456267145108. +The first 47 characters have an accumulated ratio of 0.9947858895866266. -1579 sequences found. +1390 sequences found. -First 512 (typical positive ratio): 0.958419074626211 -Next 512 (512-1024): 0.006969211451665791 -Rest: 0.0018920066107342773 +First 512 (typical positive ratio): 0.9624941725288916 +Next 512 (512-1024): 0.00617732966178777 +Rest: 0.0016086054433421051 -- Processing end: 2016-05-25 15:21:50.812982 +- Processing end: 2021-03-16 18:58:31.125842 diff --git a/script/BuildLangModelLogs/LangHungarianModel.log b/script/BuildLangModelLogs/LangHungarianModel.log index f04ad98..1e4ed44 100644 --- a/script/BuildLangModelLogs/LangHungarianModel.log +++ b/script/BuildLangModelLogs/LangHungarianModel.log @@ -1,109 +1,157 @@ = Logs of language model for Hungarian (hu) = - Generated by BuildLangModel.py -- Started: 2015-12-12 18:01:21.560682 -- Maximum depth: 2 -- Max number of pages: 50 +- Started: 2021-03-16 19:18:56.191449 +- Maximum depth: 4 +- Max number of pages: 100 == Parsed pages == -Kezdőlap (revision 12748721) -1722 (revision 16471860) -1780 (revision 16407861) -1800 (revision 15028835) -1831 (revision 16469576) -1848–49-es forradalom és szabadságharc (revision 16955214) -1875 (revision 16798555) -1895 (revision 16649417) -1900 (revision 16961019) -1905 (revision 16601113) -1915 (revision 16792868) -1940 (revision 16936087) -1950 (revision 16820817) -1970 (revision 16093156) -1985 (revision 16463340) -1995 (revision 16945805) -1998 (revision 16542908) -2003 (revision 16943939) -2015 (revision 16960983) -73. Golden Globe-gála (revision 16937296) -Akacuki (revision 16960353) -Akasztottak erdeje (regény) (revision 16918702) -Alan Hodgkinson (revision 16953214) -Alfred Bernhard Nobel (revision 16654409) -Alkotmány (revision 16784843) -André-Marie Ampère (revision 16865419) -Angela Merkel (revision 16960753) -Anne Baxter (revision 15572176) -Az irgalmasság rendkívüli szentéve (revision 16951018) -Az év embereinek listája (revision 16961722) -Bencések (revision 16853524) -Boeing 747–400 (revision 16947261) -Chantal Szent Johanna Franciska (revision 16371923) -December 12. (revision 15637986) -December 13. (revision 16546152) -Dinamó (revision 15949492) -Dionne Warwick (revision 16522754) -Elektrodinamika (revision 14888277) -Elektromosság (revision 16051899) -Enciklopédia (revision 16556513) -Eric Maskin (revision 16907781) -Európai migrációs válság (revision 16922218) -Eötvös Loránd (revision 16960057) -Eötvös Loránd Tudományegyetem (revision 16684410) -Fellner Jakab (revision 16960223) -Feltaláló (revision 13609621) -Ferenc pápa (revision 16928970) -Frank Sinatra (revision 16927399) -François Jean Dominique Arago (revision 16197941) -Gabriella (revision 16906500) +Kezdőlap (revision 21016160) +1621 (revision 19051984) +1771 (revision 21393041) +1821 (revision 23636828) +1831 (revision 22164941) +1848–49-es forradalom és szabadságharc (revision 23685544) +1858 (revision 22166952) +1871 (revision 23533908) +1921 (revision 23662365) +1924 (revision 23246889) +1941 (revision 23564803) +1946 (revision 23682260) +1971 (revision 23593882) +2003 (revision 23647007) +2021 (revision 23686129) +A Nyugat lánya (revision 21595643) +A magyar zászló és címer napja (revision 23134982) +A párizsi Notre-Dame (revision 23521460) +Abja-Paluoja (revision 23589245) +Antoine-Jean Gros (revision 23684575) +Arina Szjarhejevna Szabalenka (revision 23614779) +Aun Szan Szu Kji (revision 23588064) +Barbora Krejčíková (revision 23683559) +Bebe Daniels (revision 23684693) +Berlini Nemzetközi Filmfesztivál (revision 23601008) +Bohémélet (revision 23533579) +Borel–Lebesgue-tétel (revision 20175177) +Brüsszel (revision 23681873) +Claude Debussy (revision 23666304) +Covid19-koronavírus-járvány Magyarországon (revision 23684719) +Covid19-pandémia (revision 23672833) +Császár Angela (revision 23405485) +December 22. (revision 23636644) +EastEnders (revision 23674461) +Eigel Ernő (revision 23678820) +Elise Mertens (revision 23668277) +Első világháború (revision 23681284) +Enciklopédia (revision 23257786) +Fahd szaúdi király (revision 23684688) +Filip Polášek (revision 20343200) +Finnugor Kulturális Főváros (revision 23593480) +Georg Neumark (revision 23419386) +Giacomo Puccini (revision 23685245) +Gianni Schicchi (revision 21500522) +Gonda János (revision 23672147) +Halálozások 2021-ben (revision 23686337) +Heine-tétel (revision 15274788) +Heller Bernát (revision 21796754) +Henrietta (keresztnév) (revision 23599183) +Hmelnickiji terület (revision 21540657) +Ivan Dodig (revision 19700630) +Jankovics József (revision 23686084) +Jean Frydman (revision 23684355) +Jászai Mari-díj (revision 23683756) +Katona Gyula (matematikus) (revision 23651033) +Kew Gardens (revision 23635430) +Klasszikus gitár (revision 23640016) +Kombinatorika (revision 23457078) +Kurucz György (motorversenyző) (revision 23682502) +Landerer Lajos (revision 20960777) +Lucca (revision 22052809) +Lítium (revision 23671148) +Magyar Tudományos Akadémia (revision 23644040) +Magyar Wikipédia (revision 23672081) +Magyar nyelv (revision 21426463) +Magyarország (revision 23674944) +Magyarország címere (revision 23623029) +Magyarország nemzetiségei (revision 23600289) +Magyarország zászlaja (revision 23056847) +Mars (bolygó) (revision 23667637) +Mercury–Atlas–6 (revision 23639047) +Mianmar (revision 23673840) +Michael Jordan (revision 23621635) +Michal Polák (revision 23684810) +Mustárgáz (revision 23682684) +Március 16. (revision 23685754) +NASA (revision 23080317) +Nagy Dénes (filmrendező) (revision 23656475) +Newbery Medal (revision 23594588) +Nobel-békedíj (revision 23517207) +Novak Đoković (revision 23685551) +November 29. (revision 23652299) +Nyílt tartalom (revision 22335123) +Olaszország (revision 23657820) +Országos Rabbiképző – Zsidó Egyetem (revision 23624676) +Perseverance (revision 23666738) +Pillangókisasszony (revision 23430673) +Pánczél Lajos (revision 23532037) +Rajeev Ram (revision 23615665) +Richard Strauss (revision 23674657) +Révész László László (revision 23685649) +Spinosaurus (revision 23680682) +Szomszédok (revision 23682854) +Széchenyi-díj (revision 23683451) +Természetes fény (film) (revision 22147530) +Tiltott Város (revision 23663664) +Tosca (revision 23580069) +Ukrajna (revision 23683387) == End of Parsed pages == -- Wikipedia parsing ended at: 2015-12-12 18:02:46.729734 +- Wikipedia parsing ended at: 2021-03-16 19:23:30.841696 -55 characters appeared 375370 times. +56 characters appeared 1168905 times. First 32 characters: -[ 0] Char e: 9.710685457015744 % -[ 1] Char a: 8.803314063457389 % -[ 2] Char t: 7.322375256413672 % -[ 3] Char s: 6.666222660308496 % -[ 4] Char l: 5.73967019207715 % -[ 5] Char r: 5.4341050163838345 % -[ 6] Char n: 5.39920611663159 % -[ 7] Char i: 4.773689959240216 % -[ 8] Char o: 4.347976663025815 % -[ 9] Char k: 4.289634227562138 % -[10] Char z: 4.244611982843594 % -[11] Char á: 3.7855982097663636 % -[12] Char m: 3.2144284306151265 % -[13] Char g: 3.0727016010869277 % -[14] Char é: 3.0295441830727015 % -[15] Char b: 2.287609558568879 % -[16] Char d: 1.9966965926952074 % -[17] Char v: 1.8832085675466872 % -[18] Char y: 1.8453792258305137 % -[19] Char u: 1.5155713029810587 % -[20] Char h: 1.2960545595012922 % -[21] Char p: 1.288861656498921 % -[22] Char j: 1.2363801049631031 % -[23] Char c: 1.0951860830647095 % -[24] Char f: 1.0256546873751233 % -[25] Char ö: 1.020859418706876 % -[26] Char ó: 0.9955510562911262 % -[27] Char ő: 0.8399712283879905 % -[28] Char í: 0.6340410794682579 % -[29] Char ü: 0.4211844313610571 % -[30] Char ú: 0.3295415190345526 % -[31] Char ű: 0.2056637451048299 % +[ 0] Char e: 9.498462236024313 % +[ 1] Char a: 9.06651952040585 % +[ 2] Char t: 7.768381519456244 % +[ 3] Char s: 6.3276314157266835 % +[ 4] Char l: 5.860613137936787 % +[ 5] Char n: 5.5261120450336 % +[ 6] Char r: 5.029493414777077 % +[ 7] Char i: 4.7589838352988485 % +[ 8] Char k: 4.502162280082642 % +[ 9] Char o: 4.291794457205675 % +[10] Char z: 4.136777582438265 % +[11] Char á: 3.7318687147372973 % +[12] Char é: 3.275287555447192 % +[13] Char m: 3.2307159264439798 % +[14] Char g: 3.089215975635317 % +[15] Char b: 2.120103857884088 % +[16] Char d: 2.0372913110988486 % +[17] Char y: 2.0071776577223983 % +[18] Char v: 1.8980156642327648 % +[19] Char u: 1.421757970066002 % +[20] Char h: 1.3363789187316335 % +[21] Char p: 1.229868979942767 % +[22] Char j: 1.147227533460803 % +[23] Char c: 1.0305371266270569 % +[24] Char ö: 1.0298527254139558 % +[25] Char f: 0.9665456132020994 % +[26] Char ó: 0.9550818928826551 % +[27] Char ő: 0.8821931636873827 % +[28] Char í: 0.6613026721589864 % +[29] Char ü: 0.46162861823672585 % +[30] Char ú: 0.293950321026944 % +[31] Char ű: 0.23611841851989682 % -The first 32 characters have an accumulated ratio of 0.9975117883688093. +The first 32 characters have an accumulated ratio of 0.998090520615448. -1084 sequences found. +1122 sequences found. -First 512 (typical positive ratio): 0.9748272224933486 -Next 512 (512-1024): 5.328076298052588e-06 -Rest: 0.0001889139024889644 +First 512 (typical positive ratio): 0.9736098834669349 +Next 512 (512-1024): 0.0023611841851989683 +Rest: 0.00010464608288375879 -- Processing end: 2015-12-12 18:02:46.902033 +- Processing end: 2021-03-16 19:23:30.943714 diff --git a/script/BuildLangModelLogs/LangIrishModel.log b/script/BuildLangModelLogs/LangIrishModel.log index 7bee9d8..37e867d 100644 --- a/script/BuildLangModelLogs/LangIrishModel.log +++ b/script/BuildLangModelLogs/LangIrishModel.log @@ -1,156 +1,158 @@ = Logs of language model for Irish (ga) = - Generated by BuildLangModel.py -- Started: 2016-09-27 00:31:16.489602 -- Maximum depth: 5 +- Started: 2021-03-16 19:06:31.364099 +- Maximum depth: 4 - Max number of pages: 100 == Parsed pages == -Tracy Caldwell Dyson (revision 812158) -14 Lúnasa (revision 716575) -1969 (revision 810361) -California (revision 790976) -Ceimic (revision 759983) -Ceimic fhisiciúil (revision 656896) -NASA (revision 806394) -Rúisis (revision 771746) -SAM (revision 807668) -Spáinnis (revision 812323) -Stáisiún Idirnáisiúnta Spáis (revision 806394) -Tointeálaí spáis (revision 761309) +Tracy Caldwell Dyson (revision 972597) +14 Lúnasa (revision 945830) +1969 (revision 950246) +Arcadia (revision 940778) +California (revision 977165) +Ceimic (revision 996644) +Ceimic fhisiciúil (revision 927461) +Ceimiceoir (revision 927503) +Fisiceoir (revision 880864) +IMDb (revision 941231) +Max Q (revision 910451) +Medal "For Merit in Space Exploration" (revision 972605) +NASA (revision 982342) +Ollscoil California, Davis (revision 972597) +Rúisis (revision 990076) +SAM (revision 976971) +Spáinnis (revision 976986) +Spásaire (revision 948727) +Stáisiún Idirnáisiúnta Spáis (revision 810459) +Stáit Aontaithe Mheiriceá (revision 976971) +Tointeálaí spáis (revision 884452) 10 Lúnasa (revision 649045) -11 Lúnasa (revision 776455) -12 Lúnasa (revision 716531) -13 Lúnasa (revision 716546) +11 Lúnasa (revision 855483) +12 Lúnasa (revision 970783) +13 Lúnasa (revision 843084) 1598 (revision 703178) 15 Lúnasa (revision 776986) -16 Lúnasa (revision 648836) -1740 (revision 791225) +16 Lúnasa (revision 956751) +1740 (revision 868712) 1771 (revision 776762) 17 Lúnasa (revision 777131) -1823 (revision 791774) -1832 (revision 794492) -1898 (revision 805176) +1823 (revision 884394) +1832 (revision 870502) +1898 (revision 881354) 18 Lúnasa (revision 777242) -1911 (revision 801932) -1956 (revision 797081) -1962 (revision 801511) -1966 (revision 807415) +1911 (revision 884923) +1956 (revision 922906) +1962 (revision 948322) +1966 (revision 983105) +1983 (revision 950195) 19 Lúnasa (revision 648524) -1 Lúnasa (revision 647726) -2001 (revision 801012) -2004 (revision 795759) -2016 (revision 812091) -20 Lúnasa (revision 777924) -21 Lúnasa (revision 647805) -22 Lúnasa (revision 778960) +1 Lúnasa (revision 970005) +2001 (revision 953347) +2004 (revision 915512) +20 Lúnasa (revision 863369) +21 Lúnasa (revision 987631) +22 Lúnasa (revision 949242) 23 Lúnasa (revision 778453) -24 Lúnasa (revision 778495) -25 Lúnasa (revision 778551) +24 Lúnasa (revision 855482) +25 Lúnasa (revision 922966) 26 Lúnasa (revision 649051) -27 Lúnasa (revision 778763) -28 Lúnasa (revision 778813) -29 Lúnasa (revision 778959) -2 Lúnasa (revision 774393) +27 Lúnasa (revision 855881) +28 Lúnasa (revision 855201) +29 Lúnasa (revision 937884) +2 Lúnasa (revision 949578) 30 Lúnasa (revision 648308) -31 Lúnasa (revision 649053) -3 Lúnasa (revision 647811) -4 Lúnasa (revision 786284) -5 Lúnasa (revision 776845) -6 Lúnasa (revision 647834) -7 Lúnasa (revision 775859) +31 Lúnasa (revision 874664) +3 Lúnasa (revision 954861) +4 Lúnasa (revision 936315) +5 Lúnasa (revision 946408) +6 Lúnasa (revision 936316) +7 Lúnasa (revision 936317) 8 Lúnasa (revision 648745) -9 Lúnasa (revision 648522) -AK Parti (revision 792248) -An Phacastáin (revision 759339) -An Tuirc (revision 811970) -Aoine (revision 717430) -Bertolt Brecht (revision 800584) -Czesław Miłosz (revision 780306) -Céadaoin (revision 717606) -Dan Boyle (revision 797926) +9 Lúnasa (revision 868992) +AK Parti (revision 980611) +An Phacastáin (revision 975474) +An Tuirc (revision 975987) +Aoidh Uí Néill (revision 945830) +Aoine (revision 871416) +Bertolt Brecht (revision 996168) +Czesław Miłosz (revision 968559) +Céadaoin (revision 841385) +Dan Boyle (revision 981683) Domhnach (revision 717663) -Déardaoin (revision 647860) +Déardaoin (revision 841384) Féilire (revision 648837) -Halle Berry (revision 759955) -Henry Bagenal (revision 716575) -Iúil (revision 647071) +Halle Berry (revision 916135) +Henry Bagenal (revision 936900) +Iúil (revision 931127) Luan (revision 717791) -Lúnasa (revision 810265) -Meán Fómhair (revision 779166) -Pápa Pius VII (revision 758126) -Satharn (revision 784525) -Walter Scott (revision 759029) -Áth Buí (revision 716575) -11 Márta (revision 716519) -17 Márta (revision 798614) -1882 (revision 801198) -1886 (revision 776624) -1890 (revision 801200) -1891 (revision 796677) -1903 (revision 812849) -1922 (revision 801227) -1930í (revision 740221) -1940í (revision 740219) -1950í (revision 740217) -1960í (revision 772724) -1967 (revision 796983) -1968 (revision 810926) -1970 (revision 812852) -1970í (revision 740213) -1971 (revision 809746) -1972 (revision 789490) -1980í (revision 740211) -1990í (revision 740208) -19ú haois (revision 739964) -1 Bealtaine (revision 647679) +Lúnasa (revision 970011) +Meán Fómhair (revision 931128) +Mila Kunis (revision 916248) +Pápa Pius VII (revision 972523) +Satharn (revision 717929) +Walter Scott (revision 973708) +Áth Buí (revision 923034) +10 Bealtaine (revision 974318) +11 Feabhra (revision 885848) +11 Meitheamh (revision 937886) +11 Márta (revision 956107) +11 Nollaig (revision 949777) +13 Eanáir (revision 952269) +14 Eanáir (revision 952327) +15 Meitheamh (revision 770401) +16 Nollaig (revision 922996) +17 Meán Fómhair (revision 974321) +17 Márta (revision 959908) +1882 (revision 894229) +1886 (revision 876620) == End of Parsed pages == -- Wikipedia parsing ended at: 2016-09-27 00:33:40.157338 +- Wikipedia parsing ended at: 2021-03-16 19:09:36.532359 -44 characters appeared 183561 times. +42 characters appeared 213560 times. First 31 characters: -[ 0] Char a: 15.192769705983297 % -[ 1] Char i: 10.534372769814938 % -[ 2] Char n: 8.106297089250985 % -[ 3] Char h: 7.243368689427493 % -[ 4] Char r: 6.442544985045844 % -[ 5] Char e: 6.198484427520007 % -[ 6] Char s: 5.622654049607488 % -[ 7] Char t: 4.776068990689743 % -[ 8] Char c: 4.543448771797931 % -[ 9] Char l: 4.1953356105054995 % -[10] Char o: 3.9469168287381304 % -[11] Char d: 3.2169142682813887 % -[12] Char g: 2.811054635788648 % -[13] Char m: 2.6269196615838877 % -[14] Char á: 2.2749930540801153 % -[15] Char u: 2.1932763495513754 % -[16] Char b: 2.0478206154902185 % -[17] Char í: 1.6599386579938005 % -[18] Char é: 1.2829522611012143 % -[19] Char f: 1.1494816437042727 % -[20] Char ú: 1.0525111543301682 % -[21] Char p: 0.9059658642086281 % -[22] Char ó: 0.8890777452726886 % -[23] Char v: 0.2522322279787101 % -[24] Char y: 0.23479933101257894 % -[25] Char k: 0.18195586208399386 % -[26] Char w: 0.1688811893593955 % -[27] Char j: 0.09697048937410452 % -[28] Char z: 0.07735848028720697 % -[29] Char x: 0.0343210159020707 % -[30] Char q: 0.010895560603831969 % +[ 0] Char a: 15.363832178310547 % +[ 1] Char i: 10.505712680277206 % +[ 2] Char n: 8.10825997377786 % +[ 3] Char h: 7.447087469563589 % +[ 4] Char r: 6.299868889305113 % +[ 5] Char e: 6.046076044203034 % +[ 6] Char s: 5.528657051882375 % +[ 7] Char t: 4.9690953362052825 % +[ 8] Char c: 4.70593744146844 % +[ 9] Char l: 4.132328151339202 % +[10] Char o: 3.9469001685708935 % +[11] Char d: 3.2154897920958985 % +[12] Char g: 2.7795467315976774 % +[13] Char m: 2.6760629331335455 % +[14] Char á: 2.228413560591871 % +[15] Char u: 2.17550103015546 % +[16] Char b: 2.0130174189923205 % +[17] Char í: 1.7522007866641691 % +[18] Char é: 1.2207342198913653 % +[19] Char f: 1.1186551788724481 % +[20] Char ú: 1.0039333208466004 % +[21] Char ó: 0.8967035025285635 % +[22] Char p: 0.8475369919460574 % +[23] Char y: 0.2289754635699569 % +[24] Char v: 0.22101517138040833 % +[25] Char k: 0.17606293313354562 % +[26] Char w: 0.16295186364487732 % +[27] Char j: 0.09271399138415433 % +[28] Char z: 0.06836486233377037 % +[29] Char x: 0.03511893613036149 % +[30] Char q: 0.01311106948866829 % -The first 31 characters have an accumulated ratio of 0.9997058198636966. +The first 31 characters have an accumulated ratio of 0.9997986514328528. -701 sequences found. +707 sequences found. -First 512 (typical positive ratio): 0.9974076651249096 -Next 512 (512-1024): 5.447780301915984e-06 -Rest: -2.7755575615628914e-17 +First 512 (typical positive ratio): 0.9976732191628278 +Next 512 (512-1024): 0.010039333208466004 +Rest: -3.5561831257524545e-17 -- Processing end: 2016-09-27 00:33:40.258886 +- Processing end: 2021-03-16 19:09:36.580170 diff --git a/script/BuildLangModelLogs/LangLatvianModel.log b/script/BuildLangModelLogs/LangLatvianModel.log index 4dd7a21..3fafa6c 100644 --- a/script/BuildLangModelLogs/LangLatvianModel.log +++ b/script/BuildLangModelLogs/LangLatvianModel.log @@ -1,162 +1,165 @@ = Logs of language model for Latvian (lv) = - Generated by BuildLangModel.py -- Started: 2016-09-21 00:16:33.485953 -- Maximum depth: 5 +- Started: 2021-03-16 19:26:37.227238 +- Maximum depth: 4 - Max number of pages: 100 == Parsed pages == -Zigfrīds Anna Meierovics (revision 2546984) -1. Saeima (revision 2511127) -1. Saeimas deputāti (revision 2303859) -1. Saeimas frakcijas (revision 2429725) -1. Saeimas vēlēšanas (revision 2464758) -1887. gads (revision 2583253) -1919. gada Parīzes miera konference (revision 2482078) -1920 (revision 2401222) -1921 (revision 2473337) -1922 (revision 2486819) -1923 (revision 2544643) -1924 (revision 2539361) -1925 (revision 2486795) -22. augusts (revision 2583254) -31. jūlijs (revision 2559648) -5. februāris (revision 2581966) -ASV (revision 2549746) -Agrārā reforma Latvijā (revision 2473423) -Agudas Izrael (Latvija) (revision 2311143) -Aigars Kalvītis (revision 2545858) -Alberts Kviesis (revision 2546934) -Aleksandrs Bočagovs (revision 2329526) -Aleksandrs Dauge (revision 2546805) -Aleksandrs Jaunbērzs (revision 2462254) -Aleksandrs Kerenskis (revision 2461214) -Aleksandrs Millerāns (revision 2309419) -Aleksandrs Neibergs (revision 2491897) -Alfrēds Birznieks (revision 2567317) -Alfrēds Jēkabs Bērziņš (revision 2564068) -Alfrēds Riekstiņš (politiķis) (revision 2586148) -Andrejs Bērziņš (revision 2564283) -Andrejs Kurcijs (revision 2564338) +Zigfrīds Anna Meierovics (revision 3325285) +1. Saeima (revision 3366185) +1. Saeimas deputāti (revision 3368139) +1. Saeimas frakcijas (revision 3366184) +1. Saeimas vēlēšanas (revision 3330484) +1887. gads (revision 2773799) +1919. gada Parīzes miera konference (revision 3359347) +1920 (revision 3362733) +1921 (revision 3340387) +1922 (revision 3337740) +1923 (revision 3347028) +1924 (revision 3347028) +1925 (revision 3347028) +22. augusts (revision 3327223) +31. jūlijs (revision 3347080) +5. februāris (revision 3364814) +Agrārā reforma Latvijā (revision 3328548) +Agudas Izrael (Latvija) (revision 3285729) +Aigars Kalvītis (revision 3169702) +Alberts Kviesis (revision 3379738) +Aleksandrs Bočagovs (revision 3004343) +Aleksandrs Dauge (revision 3062538) +Aleksandrs Jaunbērzs (revision 3373734) +Aleksandrs Kerenskis (revision 2758772) +Aleksandrs Millerāns (revision 3108576) +Aleksandrs Neibergs (deputāts) (revision 3349399) +Alfrēds Birznieks (revision 3300916) +Alfrēds Jēkabs Bērziņš (revision 3351998) +Alfrēds Riekstiņš (politiķis) (revision 3034089) +Amerikas Savienotās Valstis (revision 3355214) +Andrejs Bērziņš (politiķis) (revision 3089135) +Andrejs Kurcijs (revision 3223696) Andrejs Petrevics (revision 2460269) -Andrejs Sīmanis (revision 2547079) -Andrejs Veckalns (revision 2564224) -Andrievs Niedra (revision 2546988) -Andris Bērziņš (politiķis, 1951) (revision 2218488) -Andris Šķēle (revision 2457423) -Angļu valoda (revision 2447598) -Ansis Buševics (revision 2578312) -Ansis Rudevics (revision 2414854) -Antante (revision 2581862) +Andrejs Sīmanis (revision 3210302) +Andrejs Veckalns (revision 3237365) +Andrievs Niedra (revision 3374557) +Andris Bērziņš (politiķis, 1951) (revision 3231604) +Andris Šķēle (revision 3379347) +Angļu valoda (revision 3303218) +Ansis Buševics (revision 2927384) +Ansis Rudevics (revision 2700953) +Antante (revision 3373256) Antons Dzenis (revision 2564295) -Antons Laizāns (revision 2467408) -Antons Rubins (1885) (revision 2465396) -Antons Velkme (revision 2564425) -Ants Pīps (revision 2564383) -Apollo (portāls) (revision 2371202) -Apolonija Laurinoviča (revision 2466232) -Aprīļa pučs (revision 2150686) -Apvienotā Karaliste (revision 2566258) -Aristīds Briāns (revision 2536819) -Arons Nuroks (revision 2337085) -Arturs Alberings (revision 2442531) -Arturs Ozols (inženieris) (revision 2491399) -Artūrs Balfūrs (revision 2309461) -Artūrs Vīgants (revision 2461471) -Artūrs Žers (revision 2564230) -Arveds Bergs (revision 2564118) -Arveds Švābe (revision 2586288) -Arvīds Kalniņš (revision 2545254) -Aspazija (revision 2574081) -Augusts Briedis (revision 2546879) -Augusts Kalniņš (revision 2436647) -Augusts Kirhenšteins (revision 2547109) -Austroungārija (revision 2524307) +Antons Laizāns (revision 3360427) +Antons Rubins (1885) (revision 3351508) +Antons Velkme (revision 3279136) +Ants Pīps (revision 3375003) +Apollo (portāls) (revision 3232284) +Apolonija Laurinoviča (revision 3209013) +Aprīļa pučs (revision 3010427) +Apvienotā Karaliste (revision 3382180) +Aristīds Briāns (revision 2767296) +Arons Nuroks (revision 3062127) +Arturs Alberings (revision 3325257) +Arturs Krišjānis Kariņš (revision 3381504) +Arturs Ozols (inženieris) (revision 3352707) +Artūrs Balfūrs (revision 3177309) +Artūrs Reisners (revision 3300906) +Artūrs Vīgants (revision 3296217) +Artūrs Žers (revision 3296461) +Arveds Bergs (revision 3238379) +Arveds Švābe (revision 3340584) +Arvīds Kalniņš (ķīmiķis) (revision 3382254) +Aspazija (revision 3382469) +Augusts Briedis (revision 3163311) +Augusts Kalniņš (revision 3310251) +Augusts Kirhenšteins (revision 3302758) +Austroungārija (revision 3376635) Autoritatīvā vadība (revision 2385793) -Balfūra nota (revision 2538973) -Baltijas Antante (revision 2541901) -Baltijas pārkrievošana (revision 2570657) -Bermontiāde (revision 2499160) +Balfūra nota (revision 3224093) +Baltijas Antante (revision 3236261) +Baltijas pārkrievošana (revision 3311586) +Bermontiāde (revision 3156269) Bernards Kublinskis (revision 2441386) -Bezpartijiskais nacionālais centrs (revision 2438819) -Beļģija (revision 2579008) -Brestļitovskas miera līgums (revision 2569020) -Brizules muiža (revision 2584564) -Bruno Kalniņš (revision 2566572) -Brīvības piemineklis (revision 2578595) -Bulduru konference (revision 2193449) -Ceire-Cion (revision 2311779) -Celmiņa 1. Ministru kabinets (revision 2112830) -Delfi (portāls) (revision 2544918) -Demokrātiskais Centrs (revision 2113060) -Demokrātu savienība (revision 2179593) -Diena (laikraksts) (revision 2548854) -Donats Bicāns (revision 2479349) -Dubulti (Jūrmala) (revision 2456811) -Durbe (revision 2381790) -Dāvids Komisārs (revision 2574685) -Džovanni Džoliti (revision 2538055) -Ebreju bloks (revision 2311643) -Ebreju nacionāldemokrātu partija (revision 2312288) -Eduards Grantskalns (revision 2565167) -Eduards Jaunzems (revision 2452579) -Eduards Laimiņš (revision 2449521) -Eduards Radziņš (revision 2564393) +Berta Vesmane (revision 3299697) +Bezpartijiskais nacionālais centrs (revision 3286113) +Beļģija (revision 3308106) +Brestļitovskas miera līgums (revision 3348377) +Brizules muiža (revision 3103947) +Bruno Kalniņš (revision 3297011) +Brīvības piemineklis (revision 3343774) +Bulduru konference (revision 3122422) +Bunds (revision 3368404) +Ceire-Cion (revision 3285715) +Celmiņa 1. Ministru kabinets (revision 2925529) +Delfi (portāls) (revision 3363824) +Demokrātiskais Centrs (revision 3286115) +Demokrātu savienība (revision 3339759) +Diena (laikraksts) (revision 3343800) +Donats Bicāns (revision 3311441) +Dubulti (Jūrmala) (revision 3349180) +Durbe (revision 3380441) +Dāvids Komisārs (revision 3082713) +Džovanni Džoliti (revision 3165202) +Ebreji (revision 3340750) +Ebreju bloks (revision 3285659) +Ebreju nacionāldemokrātu partija (revision 3368172) +Eduards Grantskalns (revision 2932497) == End of Parsed pages == -- Wikipedia parsing ended at: 2016-09-21 00:19:18.361533 +- Wikipedia parsing ended at: 2021-03-16 19:30:28.292124 -55 characters appeared 354745 times. +55 characters appeared 437791 times. -First 39 characters: -[ 0] Char a: 11.905171320244119 % -[ 1] Char i: 9.3977364022044 % -[ 2] Char s: 8.224217395594017 % -[ 3] Char e: 6.367108768270166 % -[ 4] Char r: 5.854064186951191 % -[ 5] Char t: 5.831230884156225 % -[ 6] Char u: 4.939604504644181 % -[ 7] Char n: 4.463769750102186 % -[ 8] Char ā: 3.9498794909019157 % -[ 9] Char l: 3.8030134321836813 % -[10] Char o: 3.6296494665182033 % -[11] Char k: 3.524785409237621 % -[12] Char m: 3.2739009711201 % -[13] Char d: 3.177775585279567 % -[14] Char v: 3.0046935122411873 % -[15] Char p: 2.827101157169234 % -[16] Char j: 2.8166711299665956 % -[17] Char b: 2.0279355593454453 % -[18] Char ī: 1.8855797826607845 % -[19] Char g: 1.6146809680192813 % -[20] Char z: 1.5343415692962552 % -[21] Char ē: 1.4593581304880971 % -[22] Char c: 1.2231321089796898 % -[23] Char š: 0.8876798827326671 % -[24] Char ņ: 0.46596851259355315 % -[25] Char f: 0.4203019070036223 % -[26] Char ļ: 0.34700982395805435 % -[27] Char ū: 0.30162511099522193 % -[28] Char h: 0.20070755049401684 % -[29] Char ž: 0.18774048964749326 % -[30] Char ķ: 0.14207388405756247 % -[31] Char ģ: 0.1268516821942522 % -[32] Char č: 0.08287643236691145 % -[33] Char w: 0.0324176521163089 % -[34] Char y: 0.02734358482853881 % -[35] Char x: 0.015785987117506943 % -[36] Char ö: 0.005074067287770088 % -[37] Char é: 0.003946496779376736 % -[38] Char q: 0.0031008188980817205 % +First 40 characters: +[ 0] Char a: 11.993622527644469 % +[ 1] Char i: 9.41179695334075 % +[ 2] Char s: 8.204599911830075 % +[ 3] Char e: 6.371761868106014 % +[ 4] Char t: 5.8011699646635035 % +[ 5] Char r: 5.772845947038655 % +[ 6] Char u: 4.945053690002764 % +[ 7] Char n: 4.437505567725239 % +[ 8] Char ā: 4.014015820334361 % +[ 9] Char l: 3.6974263975275874 % +[10] Char o: 3.597150238355745 % +[11] Char k: 3.5347917156816835 % +[12] Char m: 3.307971155185922 % +[13] Char d: 3.2337348186691823 % +[14] Char v: 2.977904982057648 % +[15] Char j: 2.8618678775945603 % +[16] Char p: 2.8296607285211435 % +[17] Char b: 2.040242946976982 % +[18] Char ī: 1.874638811670409 % +[19] Char g: 1.6240626234892905 % +[20] Char z: 1.5235580448204737 % +[21] Char ē: 1.5109949724868716 % +[22] Char c: 1.216105401892684 % +[23] Char š: 0.9225863482803439 % +[24] Char ņ: 0.45478321847639624 % +[25] Char f: 0.42691603984549703 % +[26] Char ļ: 0.3277819781585277 % +[27] Char ū: 0.29420431210326387 % +[28] Char h: 0.18616189003428577 % +[29] Char ž: 0.1815935000947941 % +[30] Char ķ: 0.126772820820894 % +[31] Char ģ: 0.11649394345703772 % +[32] Char č: 0.08382995538967224 % +[33] Char y: 0.029466115109721306 % +[34] Char w: 0.029466115109721306 % +[35] Char x: 0.012334652836627522 % +[36] Char é: 0.0050252289334408425 % +[37] Char ö: 0.0034262924546187568 % +[38] Char ü: 0.0027410339636950052 % +[39] Char q: 0.0025126144667204212 % -The first 39 characters have an accumulated ratio of 0.9998590536864506. +The first 40 characters have an accumulated ratio of 0.9998857902515126. -970 sequences found. +982 sequences found. -First 512 (typical positive ratio): 0.9904102202220861 -Next 512 (512-1024): 0.0018774048964749328 -Rest: -1.734723475976807e-17 +First 512 (typical positive ratio): 0.9904642991017133 +Next 512 (512-1024): 0.001815935000947941 +Rest: -5.377642775528102e-17 -- Processing end: 2016-09-21 00:19:18.484318 +- Processing end: 2021-03-16 19:30:28.395006 diff --git a/script/BuildLangModelLogs/LangLithuanianModel.log b/script/BuildLangModelLogs/LangLithuanianModel.log index 9ea0467..5db032a 100644 --- a/script/BuildLangModelLogs/LangLithuanianModel.log +++ b/script/BuildLangModelLogs/LangLithuanianModel.log @@ -1,162 +1,166 @@ = Logs of language model for Lithuanian (lt) = - Generated by BuildLangModel.py -- Started: 2016-09-21 00:23:03.857157 -- Maximum depth: 5 +- Started: 2021-03-16 19:23:31.104161 +- Maximum depth: 4 - Max number of pages: 100 == Parsed pages == -Karūna (laivas) (revision 5080379) -1650 (revision 4990868) -1654 (revision 4991037) -1664 (revision 4991048) -1665 (revision 4991050) -1668 (revision 4991052) -1669 (revision 4991053) -1672 (revision 4991056) -1676 (revision 4991060) -1718 (revision 4990914) -1909 (revision 4990667) -1928 (revision 4990262) -1932 (revision 4990613) -1956 (revision 4990635) -1980 (revision 4990655) -Baltijos jūra (revision 5052833) -Burinis laivas (revision 4657401) -Flagmanas (laivas) (revision 5005271) -Grimzlė (revision 4487052) -Kalmaras (Švedija) (revision 4978519) -Karo laivas (revision 4726931) -Karolis XI (revision 4944621) -Karolis XII (revision 4915230) -Kilis (revision 4325533) -Koordinačių sistema (revision 5033980) -Laivo vėliava (revision 4986001) -Liepos 1 d. (revision 4910200) -Nyderlandai (revision 5080140) -Rugpjūčio 10 (revision 4910281) -Varytuvas (revision 4620792) -Vaza (laivas) (revision 5079282) -XVIII a. (revision 4896219) -XVII a. (revision 4768242) -Švedija (revision 5057665) -Švedų kalba (revision 4687559) -1590 (revision 4990983) -1596 (revision 4990989) -1608 (revision 4991000) -1610 (revision 4991002) -1623 m. (revision 4991015) -1634 m. (revision 4991026) -1643 m. (revision 4990870) -1644 m. (revision 4990872) -1645 m. (revision 4990873) -1646 m. (revision 4990874) -1647 m. (revision 4913295) -1648 m. (revision 4990875) -1649 m. (revision 4990876) -1651 m. (revision 4991035) -1652 m. (revision 4991072) -1653 m. (revision 4991036) -1654 m. (revision 4991037) -1655 m. (revision 4991038) -1662 m. (revision 4991046) -1668 m. (revision 4991052) -1677 m. (revision 4991061) -1702 (revision 4990595) -1704 (revision 4990863) -1722 (revision 4990918) -1723 (revision 4990919) -1737 (revision 4990931) -2 tūkstantmetis (revision 4296407) -ATR (revision 5078529) -Abiejų Tautų Respublika (revision 5078529) -Adomas Freitagas (revision 4362991) -Anglų kalba (revision 4911240) -Armėnų kalendorius (revision 4817534) -Bahajų kalendorius (revision 4706296) -Bajorai (revision 5006456) +Karūna (laivas) (revision 5105933) +1650 (revision 5301814) +1654 (revision 5301823) +1664 (revision 5301833) +1665 (revision 5301834) +1668 (revision 5301872) +1669 (revision 5301873) +1672 (revision 5301876) +1676 (revision 5801857) +1718 (revision 5301969) +1909 (revision 6129929) +1928 (revision 6176161) +1932 (revision 6195207) +1956 (revision 6150066) +1980 (revision 6190258) +Baltijos jūra (revision 6193053) +Burinis laivas (revision 6040752) +Flagmanas (laivas) (revision 5987584) +Grimzlė (revision 5989647) +Kalmaras (Švedija) (revision 5604914) +Karo laivas (revision 5994228) +Karolis XI (revision 5480144) +Karolis XII (revision 5880104) +Kilis (revision 5995782) +Koordinačių sistema (revision 6044079) +Laivo vėliava (revision 6208955) +Liepos 1 d. (revision 5779083) +Nyderlandai (revision 6196943) +Olando mūšis (revision 6020430) +Rugpjūčio 10 (revision 5793253) +Varytuvas (revision 6020287) +Vaza (laivas) (revision 6203069) +XVIII a. (revision 6031323) +XVII a. (revision 6025004) +Švedija (revision 6205204) +Švedų kalba (revision 5560532) +1590 (revision 5801846) +1596 (revision 5552466) +1608 (revision 5637570) +1610 (revision 5301721) +1647 m. (revision 5301819) +1648 m. (revision 5301818) +1649 m. (revision 5301820) +1651 m. (revision 5301821) +1652 m. (revision 5301836) +1653 m. (revision 5301822) +1702 (revision 5301912) +1704 (revision 5301925) +1722 (revision 5301973) +1723 (revision 5301974) +1737 (revision 5302020) +2 tūkstantmetis (revision 5976362) +ATR (revision 6212255) +Abiejų Tautų Respublika (revision 6212255) +Adomas Freitagas (revision 6152308) +Armėnų kalendorius (revision 5965695) +Bahajų kalendorius (revision 6168286) +Bajorai (revision 6040220) Berberų kalendorius (revision 4926904) -Birželio 21 (revision 4910142) -Bizantijos kalendorius (revision 4927623) -Budistų kalendorius (revision 4705734) -Dešimtmetis (revision 4296419) -Dominikonai (revision 4921895) -Dominikonų ordinas (revision 4921895) -Džohoro sultonatas (revision 4934526) -Džu Ihai (revision 4991072) -Džu Joulang (revision 4991072) -Emanuelis Vladislovas Tiškevičius Logoiskis (revision 4939239) -Filosofas (revision 5078172) -Gegužės 26 (revision 4910130) -Grafas (titulas) (revision 5008057) -Grigaliaus kalendorius (revision 5000317) -Hebrajų kalendorius (revision 4728592) -Imperatorius Go-Komijas (revision 4907057) -Inocentas X (revision 4905150) +Birželio 21 (revision 6172033) +Bizantijos kalendorius (revision 5300569) +Budistų kalendorius (revision 5979182) +Dešimtmetis (revision 5982040) +Dominikonai (revision 6068818) +Dominikonų ordinas (revision 6068818) +Emanuelis Vladislovas Tiškevičius Logoiskis (revision 5761120) +Filosofas (revision 5836448) +Gegužės 26 (revision 6075204) +Grafas (titulas) (revision 5832187) +Grigaliaus kalendorius (revision 5989624) +Hebrajų kalendorius (revision 5990271) Iraniečių kalendorius (revision 4964854) -Isaac Titsingh (revision 4990745) -Japonija (revision 5035249) -Japonijos imperatorius (revision 4720428) -Japonų kalendorius (revision 4956765) -John Churchill (revision 4903704) -Jonas Kazimieras Vaza (revision 5037754) +Japonų kalendorius (revision 6082601) +John Churchill (revision 5350480) Jurgis Kasakauskis (revision 5047829) Jurgis Kazimieras Ancuta (revision 5059404) -Jurgis Mikalojus Tiškevičius (revision 4939554) +Jurgis Mikalojus Tiškevičius (revision 5481136) +Kalijugos kalendorius (revision 5741238) +Kazimieras Tiškevičius Logoiskis (revision 5481143) +Kinų kalendorius (revision 5995873) +Koptų kalendorius (revision 5996919) +Korėjiečių kalendorius (revision 5996955) +LDK (revision 6130316) +Lapkričio 14 (revision 5943612) +Lelija (herbas) (revision 5999126) +Lietuvių kalba (revision 6201110) +Lietuvos Didžioji Kunigaikštystė (revision 6130316) +Metai (revision 5765072) +Mianmaro kalendorius (revision 5979182) +Mokslų daktaras (revision 6172930) +Motiejus Juozapas Ancuta (revision 4951716) +Musulmonų kalendorius (revision 4705912) +Nekeliamieji metai, prasidedantys šeštadienį (revision 6004293) +Profesorius (revision 6009297) +René Descartes (revision 6201538) +Saka kalendorius (revision 6109866) +Senovės indų kalendoriai (revision 6012785) +Spauda (revision 5345510) +Stanislovas Kristupas Naruševičius (revision 5481106) == End of Parsed pages == -- Wikipedia parsing ended at: 2016-09-21 00:25:34.773941 +- Wikipedia parsing ended at: 2021-03-16 19:26:36.949228 -60 characters appeared 353051 times. +68 characters appeared 398895 times. -First 38 characters: -[ 0] Char i: 13.032394753165974 % -[ 1] Char a: 11.167225131779828 % -[ 2] Char s: 8.586578143101137 % -[ 3] Char o: 7.018815978428046 % -[ 4] Char e: 5.525830545728521 % -[ 5] Char r: 5.469181506354606 % -[ 6] Char n: 5.142599794363987 % -[ 7] Char t: 5.105777918770942 % -[ 8] Char u: 4.270487833202568 % -[ 9] Char k: 3.9617505686147325 % -[10] Char l: 3.9051015292408184 % -[11] Char m: 3.359854525266888 % -[12] Char d: 3.0372382460324427 % -[13] Char v: 2.7270847554602593 % -[14] Char j: 2.4472385009531203 % -[15] Char p: 2.329125253858508 % -[16] Char g: 1.9427788053284087 % -[17] Char ė: 1.5657794482950054 % -[18] Char b: 1.5074309377398734 % -[19] Char y: 1.2236192504765602 % -[20] Char ų: 1.181698961339863 % -[21] Char š: 0.9630336693565519 % -[22] Char ž: 0.8171623929687212 % -[23] Char c: 0.5959478942135839 % -[24] Char č: 0.48010060869392807 % -[25] Char f: 0.428266737666796 % -[26] Char h: 0.42515104050123065 % -[27] Char z: 0.4010751987673169 % -[28] Char ū: 0.3685020011273159 % -[29] Char ą: 0.3526402701026197 % -[30] Char į: 0.29004308159444386 % -[31] Char ę: 0.14813723796278724 % -[32] Char x: 0.08752276583269838 % -[33] Char w: 0.059198246145740985 % -[34] Char ō: 0.01812769259965274 % -[35] Char ö: 0.008780601102956797 % -[36] Char é: 0.0076476203154785 % -[37] Char q: 0.007364375118608926 % +First 40 characters: +[ 0] Char i: 13.296732222765389 % +[ 1] Char a: 11.103673899146392 % +[ 2] Char s: 8.654407801551786 % +[ 3] Char o: 6.708030935459205 % +[ 4] Char e: 5.518244149462891 % +[ 5] Char r: 5.427493450657441 % +[ 6] Char t: 5.170533599067424 % +[ 7] Char n: 5.082039133105203 % +[ 8] Char u: 4.293109715589315 % +[ 9] Char k: 4.091302222389351 % +[10] Char l: 3.876208024668146 % +[11] Char m: 3.384349264844132 % +[12] Char d: 3.0411511801351234 % +[13] Char v: 2.8220459018037327 % +[14] Char j: 2.286817332882087 % +[15] Char p: 2.243196831246318 % +[16] Char g: 1.902756364456812 % +[17] Char ė: 1.5700873663495405 % +[18] Char b: 1.55980897228594 % +[19] Char y: 1.2637410847466124 % +[20] Char ų: 1.1800097770089872 % +[21] Char š: 0.9924917584828087 % +[22] Char ž: 0.8423269281389839 % +[23] Char c: 0.557289512277667 % +[24] Char č: 0.49461637774351647 % +[25] Char f: 0.40336429386179323 % +[26] Char ū: 0.3863172012685043 % +[27] Char ą: 0.36901941613707867 % +[28] Char z: 0.362501410145527 % +[29] Char h: 0.3604958698404342 % +[30] Char į: 0.3070983592173379 % +[31] Char ę: 0.15618145125910327 % +[32] Char x: 0.09777008987327492 % +[33] Char w: 0.05715789869514534 % +[34] Char ó: 0.027325486656889657 % +[35] Char á: 0.014289474673786336 % +[36] Char é: 0.011531856754283708 % +[37] Char ã: 0.011030471678010504 % +[38] Char ö: 0.008523546296644481 % +[39] Char q: 0.007270083605961468 % -The first 38 characters have an accumulated ratio of 0.9996629382157253. +The first 40 characters have an accumulated ratio of 0.9994234071622861. -1016 sequences found. +1138 sequences found. -First 512 (typical positive ratio): 0.9928710196247589 -Next 512 (512-1024): 0.008171623929687212 -Rest: -4.85722573273506e-17 +First 512 (typical positive ratio): 0.9919219576954762 +Next 512 (512-1024): 0.008423269281389839 +Rest: 0.00033781981757727893 -- Processing end: 2016-09-21 00:25:34.935858 +- Processing end: 2021-03-16 19:26:37.062994 diff --git a/script/BuildLangModelLogs/LangMalteseModel.log b/script/BuildLangModelLogs/LangMalteseModel.log index ad867b3..76b703b 100644 --- a/script/BuildLangModelLogs/LangMalteseModel.log +++ b/script/BuildLangModelLogs/LangMalteseModel.log @@ -1,147 +1,147 @@ = Logs of language model for Maltese (mt) = - Generated by BuildLangModel.py -- Started: 2016-09-21 02:05:23.411546 -- Maximum depth: 5 +- Started: 2021-03-16 19:30:28.553074 +- Maximum depth: 4 - Max number of pages: 100 == Parsed pages == -Unjoni Ewropea (revision 246298) +Unjoni Ewropea (revision 255663) 1951 (revision 229183) 1952 (revision 229184) 1957 (revision 229188) 1958 (revision 229189) -1973 (revision 223536) -1979 (revision 243876) -1981 (revision 205545) -1985 (revision 216368) -1986 (revision 231433) -1990 (revision 237666) -1992 (revision 244087) -1995 (revision 214650) -1 ta' Mejju (revision 245374) -2007 (revision 214851) -2013 (revision 245606) -Albanija (revision 243079) -Awstrija (revision 243627) -Awtonomija (revision 245824) -Ażores (revision 246298) -Bank Ċentrali Ewropew (revision 246298) -Belt kapitali (revision 237400) -Belġju (revision 244363) +1973 (revision 252982) +1979 (revision 252967) +1981 (revision 253774) +1985 (revision 252978) +1986 (revision 252978) +1990 (revision 257440) +1992 (revision 249582) +1995 (revision 252258) +1 ta' Mejju (revision 258193) +2007 (revision 258027) +2013 (revision 248708) +Albanija (revision 261944) +Awstrija (revision 261959) +Awtonomija (revision 262074) +Ażores (revision 255663) +Bank Ċentrali Ewropew (revision 255748) +Belt kapitali (revision 255506) +Belġju (revision 255745) Brussell (revision 243311) -Bulgarija (revision 243622) -Danimarka (revision 244419) +Bulgarija (revision 261950) +Danimarka (revision 256058) +Dazji doganali (revision 255663) De facto (revision 215102) -Estonja (revision 243826) -European Free Trade Association (revision 246298) -Ewropa (revision 244177) -Ex Repubblika Jugoslava tal-Maċedonja (revision 246298) -Federazzjoni (revision 246226) -Finlandja (revision 245824) -Frankfurt (revision 243576) -Franza (revision 244461) -Greċja (revision 244423) -Groenlandja (revision 243829) -Indja (revision 244873) -Islanda (revision 243771) -Isle of Man (revision 246298) -Istitut tal-Unjoni Ewropea għall-Istudji dwar is-Sigurtà (revision 244412) -Italja (revision 246323) -Kilometru kwadru (revision 244871) -Komunitajiet Ewropej (revision 246298) -Komunità Ekonomika Ewropea (revision 246298) -Kroazja (revision 245711) -Kummissjoni Ewropea (revision 243311) -Kunsill Ewropew (revision 246298) -Kunsill tal-Ewropa (revision 243334) -Kunsill tal-Unjoni Ewropea (revision 243311) -Latvja (revision 245746) -Lista ta' pajjiżi skont id-daqs (revision 244419) -Lista ta' pajjiżi skont il-popolazzjoni (revision 246128) -Litwanja (revision 243114) -Liġijiet tal-Unjoni Ewropea (revision 246298) -Lussemburgu (revision 244239) +Dħul nazzjonali gross (revision 255663) +Estonja (revision 255711) +European Free Trade Association (revision 255663) +Ewropa (revision 259973) +Ex Repubblika Jugoslava tal-Maċedonja (revision 255663) +Federazzjoni (revision 228364) +Finlandja (revision 258210) +Frankfurt (revision 261246) +Franza (revision 259635) +Greċja (revision 259971) +Groenlandja (revision 250685) +Indja (revision 254565) +Islanda (revision 255630) +Isle of Man (revision 259978) +Istati Membri (revision 255663) +Istitut tal-Unjoni Ewropea għall-Istudji dwar is-Sigurtà (revision 256700) +Italja (revision 254814) +Kilometru kwadru (revision 247665) +Komunitajiet Ewropej (revision 256698) +Komunità Ekonomika Ewropea (revision 255663) +Kroazja (revision 249144) +Kummissjoni Ewropea (revision 258115) +Kunsill Ewropew (revision 255754) +Kunsill tal-Ewropa (revision 255754) +Kunsill tal-Unjoni Ewropea (revision 255754) +Latvja (revision 255712) +Lista ta' pajjiżi skont id-daqs (revision 254529) +Lista ta' pajjiżi skont il-popolazzjoni (revision 260622) +Litwanja (revision 259637) +Liġijiet tal-Unjoni Ewropea (revision 255663) +Lussemburgu (revision 253431) Lussemburgu (belt) (revision 243587) Madejra (revision 243625) -Malta (revision 247210) -Montenegro (revision 243930) -Norveġja (revision 243829) -Olanda (revision 243989) -Organizzazzjoni Internazzjonali (revision 246724) -Pajjiżi l-Baxxi (revision 243989) -Pajjiżi membri tal-Unjoni Ewropea (revision 243625) -Pajjiżi ġirien li jdawru l-Unjoni Ewropea (revision 246298) -Parlament Ewropew (revision 243907) -Patt ta' Stabilità u Tkabbir (revision 246298) -Politika agrikola komuni (revision 244363) -Politika reġjonali tal-Unjoni Ewropea (revision 246298) -Polonja (revision 244530) +Malta (revision 261973) +Montenegro (revision 255647) +Norveġja (revision 261168) +Olanda (revision 261407) +Organizzazzjoni Internazzjonali (revision 258039) +Organizzazzjonijiet mhux governattivi (revision 233500) +Pajjiżi l-Baxxi (revision 261407) +Pajjiżi membri tal-Unjoni Ewropea (revision 255663) +Pajjiżi ġirien li jdawru l-Unjoni Ewropea (revision 255663) +Parlament Ewropew (revision 255748) +Politika agrikola komuni (revision 255745) +Politika reġjonali tal-Unjoni Ewropea (revision 255663) +Polonja (revision 261762) Portugall (revision 243625) -Relazzjonijiet ta' terzi pajjiżi ma l-UE (revision 246298) -Renju Unit (revision 247318) -Repubblika Federali tal-Ġermanja (revision 244859) -Repubblika tal-Irlanda (revision 243686) -Repubblika Ċeka (revision 246832) -Rumanija (revision 243623) -Segretarjat tal-Parlament Ewropew (revision 246298) -Serbja (revision 243728) -Slovakkja (revision 243831) -Slovenja (revision 244588) -Spanja (revision 246856) -Stati Uniti tal-Amerika (revision 243926) -Stati membri tal-Unjoni Ewropea (revision 243114) +Qorti tal-Ġustizzja tal-Unjoni Ewropea (revision 255663) +Relazzjonijiet ta' terzi pajjiżi ma l-UE (revision 255663) +Renju Unit (revision 254529) +Repubblika Federali tal-Ġermanja (revision 258687) +Repubblika tal-Irlanda (revision 250619) +Repubblika Ċeka (revision 255669) +Rumanija (revision 261954) +Segretarjat tal-Parlament Ewropew (revision 255663) +Serbja (revision 259975) +Slovakkja (revision 255727) +Slovenja (revision 261963) +Spanja (revision 258290) +Stati membri tal-Unjoni Ewropea (revision 255663) Strasburgu (revision 243503) -Sui generis (revision 247150) -Suq komuni (revision 246298) -Svezja (revision 244871) == End of Parsed pages == -- Wikipedia parsing ended at: 2016-09-21 02:07:45.508113 +- Wikipedia parsing ended at: 2021-03-16 19:33:28.445834 -48 characters appeared 474337 times. +49 characters appeared 643393 times. First 31 characters: -[ 0] Char a: 12.326257492036252 % -[ 1] Char i: 12.069899670487438 % -[ 2] Char t: 8.064941170518008 % -[ 3] Char l: 7.795301652622502 % -[ 4] Char e: 6.615971345267184 % -[ 5] Char n: 6.128132530247482 % -[ 6] Char r: 5.579577389071483 % -[ 7] Char u: 4.376424356522894 % -[ 8] Char o: 3.8337721915009797 % -[ 9] Char j: 3.7378488289971057 % -[10] Char m: 3.6084049947611088 % -[11] Char s: 3.3533120966738834 % -[12] Char k: 2.588033402412209 % -[13] Char d: 2.3173397816320462 % -[14] Char p: 2.0555006250830106 % -[15] Char b: 2.017131280081461 % -[16] Char f: 2.004692866042497 % -[17] Char ħ: 1.6372326004507345 % -[18] Char w: 1.4801712706366992 % -[19] Char g: 1.4763765002519307 % -[20] Char z: 1.3150987588992635 % -[21] Char ż: 0.9910675321554084 % -[22] Char h: 0.9750451683086075 % -[23] Char ġ: 0.7640137708000851 % -[24] Char ċ: 0.6723068198348432 % -[25] Char x: 0.5892435125237964 % -[26] Char v: 0.5668965313690478 % -[27] Char q: 0.5647883255997318 % -[28] Char c: 0.2759641352034524 % -[29] Char à: 0.10730767365817974 % -[30] Char y: 0.059029761540845424 % +[ 0] Char i: 12.115145797358691 % +[ 1] Char a: 12.109705887381429 % +[ 2] Char t: 8.033037350421903 % +[ 3] Char l: 7.963095650714261 % +[ 4] Char e: 6.5463876666361 % +[ 5] Char n: 5.990118014961307 % +[ 6] Char r: 5.530834186881113 % +[ 7] Char u: 4.447514971409388 % +[ 8] Char o: 3.9081867536637795 % +[ 9] Char j: 3.7945703481386963 % +[10] Char m: 3.619405246870886 % +[11] Char s: 3.4255890256810377 % +[12] Char k: 2.5824029792055554 % +[13] Char d: 2.3040350143691337 % +[14] Char p: 2.1852895508654897 % +[15] Char b: 2.0524003214209667 % +[16] Char f: 1.9347428399127748 % +[17] Char ħ: 1.6223365812186332 % +[18] Char g: 1.4863388317871036 % +[19] Char w: 1.4324060100125429 % +[20] Char z: 1.3761417982477273 % +[21] Char ż: 0.9421924080616357 % +[22] Char h: 0.9235412881395973 % +[23] Char ġ: 0.7990450626599915 % +[24] Char ċ: 0.6618039052336597 % +[25] Char v: 0.6143989754318122 % +[26] Char x: 0.610357899448704 % +[27] Char q: 0.5511405936962324 % +[28] Char c: 0.24153200299039623 % +[29] Char à: 0.08936994962643362 % +[30] Char y: 0.061082417744675495 % -The first 31 characters have an accumulated ratio of 0.9994708403519017. +The first 31 characters have an accumulated ratio of 0.9995414933019164. -870 sequences found. +888 sequences found. -First 512 (typical positive ratio): 0.9959115850692665 -Next 512 (512-1024): 2.108205769315908e-06 -Rest: -4.423544863740858e-17 +First 512 (typical positive ratio): 0.9960434044151966 +Next 512 (512-1024): 0.009421924080616357 +Rest: 1.5612511283791264e-17 -- Processing end: 2016-09-21 02:07:45.646198 +- Processing end: 2021-03-16 19:33:28.518739 diff --git a/script/BuildLangModelLogs/LangPolishModel.log b/script/BuildLangModelLogs/LangPolishModel.log index f90f2de..f92700b 100644 --- a/script/BuildLangModelLogs/LangPolishModel.log +++ b/script/BuildLangModelLogs/LangPolishModel.log @@ -1,154 +1,163 @@ = Logs of language model for Polish (pl) = - Generated by BuildLangModel.py -- Started: 2016-09-21 17:06:43.735784 -- Maximum depth: 5 +- Started: 2021-03-16 19:33:28.678083 +- Maximum depth: 4 - Max number of pages: 100 == Parsed pages == -Krasnyj Krym (revision 46884814) -1913 (revision 46708474) -1915 (revision 46743905) -1917 (revision 46559521) -1925 (revision 46809935) -1928 (revision 46875978) -1929 (revision 46760445) -1935 (revision 46487358) -1936 (revision 46874348) -1939 (revision 46789269) -1941 (revision 46856112) -1942 (revision 46851808) -1943 (revision 46768330) -1944 (revision 46866229) -1949 (revision 46882598) -1953 (revision 46437607) -1957 (revision 46591716) -1959 (revision 46255886) -Admirał Butakow (revision 45993412) -Admirał Spiridow (revision 45993412) -Aparat torpedowy (revision 46633263) -Askold (revision 45787848) -Avro 504 (revision 44668646) -Ałmaz (1903) (revision 46472283) -Batumi (revision 46594611) -Bomba głębinowa (revision 46011227) -Brest (revision 45771242) +Krasnyj Krym (revision 62415649) +11 grudnia (revision 62631194) +1913 (revision 62510480) +1915 (revision 62045210) +1925 (revision 62586144) +1929 (revision 62587250) +1935 (revision 62643677) +1936 (revision 62578718) +1939 (revision 62647877) +1941 (revision 62626183) +1942 (revision 62634977) +1943 (revision 62605793) +1944 (revision 62629763) +1949 (revision 62629889) +1953 (revision 62544578) +1957 (revision 62605043) +1959 (revision 62544562) +45 mm armata morska 21-K (revision 61708436) +76 mm armata przeciwlotnicza wz. 1914/15 (revision 62529098) +7 grudnia (revision 62636554) +AG Vulcan Stettin (revision 56402035) +Admirał Butakow (revision 61255818) +Aurora (1900) (revision 60525374) +Avro 504 (revision 62119913) +Bomba głębinowa (revision 62280686) +Brest (revision 59991108) Burta (revision 45569092) -Cagliari (revision 46235605) -Cesariewicz (revision 40031486) -Czerwona Ukraina (revision 45993524) -Daty nowego i starego porządku (revision 45622575) -Drednot (revision 45789788) -Działo przeciwlotnicze (revision 45160162) -Flota Bałtycka Marynarki Wojennej Rosji (revision 45700667) -Gromoboj (revision 44328986) -Hulk (okręt) (revision 46020688) -II wojna światowa (revision 46871591) -I wojna światowa (revision 46869119) -Imperator Nikołaj I (okręt lotniczy) (revision 45520638) -Imperium Rosyjskie (revision 46604959) -Impierator Nikołaj I (1916) (revision 46534166) -Język rosyjski (revision 46433952) -Kanonierka (revision 41091952) -Kanonierki typu Ardagan (revision 46534166) -Kanonierki typu Bobr (revision 45788694) -Kanonierki typu Chiwiniec (revision 46534166) -Kanonierki typu Groziaszczij (revision 46534166) -Kanonierki typu Mandżur (revision 46534166) -Karabin maszynowy DSzK (revision 45587452) -Karabin maszynowy Vickers 12,7 mm (revision 44572918) -Kocioł parowy (revision 46716473) -Konstrukcyjna linia wodna (revision 37082620) -Kontrtorpedowce typu Biesstrasznyj (revision 46534166) -Kontrtorpedowce typu Brawyj (revision 46534166) -Kontrtorpedowce typu Grozowoj (revision 46534166) -Kontrtorpedowce typu Prytkij (revision 46534166) -Koń mechaniczny (revision 44722357) -Krab (1915) (revision 42791389) -Kronsztad (revision 46425497) -Krążownik lekki (revision 40661490) -Krążownik liniowy (revision 40601776) -Krążownik pancernopokładowy (revision 40055901) -Krążownik pancerny (revision 40324458) -Krążowniki lekkie typu Swietłana (revision 45993412) -Krążowniki liniowe typu Borodino (revision 45990866) -Krążowniki typu Admirał Nachimow (revision 45993521) -Krążowniki typu Bajan (revision 45991279) -Krążowniki typu Diana (revision 45991349) -Krążowniki typu Izumrud (revision 45991349) -Lend-Lease Act (revision 46877263) -Marynarka Wojenna Związku Socjalistycznych Republik Radzieckich (revision 45795993) +Cagliari (revision 57357802) +Czerwona Ukraina (revision 62415654) +Daty nowego i starego porządku (revision 60118095) +Działo przeciwlotnicze (revision 57354362) +Długość całkowita (statek) (revision 57603162) +Flota Bałtycka (revision 62436950) +Flota Czarnomorska (revision 62138173) +Gwardia (wojsko) (revision 53610648) +Hulk (okręt) (revision 61976707) +II wojna światowa (revision 62628019) +I wojna światowa (revision 61897062) +Imperium Rosyjskie (revision 62512980) +Język rosyjski (revision 62485083) +Karabin maszynowy DSzK (revision 62495075) +Karabin maszynowy Vickers 12,7 mm (revision 51917495) +Kocioł parowy (revision 62570204) +Konstrukcyjna linia wodna (revision 59497856) +Koń mechaniczny (revision 57660802) +Kronsztad (revision 58913101) +Krążownik lekki (revision 58075663) +Krążowniki lekkie typu Swietłana (revision 61255818) +Krążowniki typu Admirał Nachimow (revision 56872613) +Lend-Lease Act (revision 61097607) +Marynarka Wojenna Związku Socjalistycznych Republik Radzieckich (revision 62606797) Maszyna sterowa (revision 28497888) -Mecidiye (1903) (revision 43956539) -Mila morska (revision 45754209) -Mina morska (revision 45781427) -Morze Czarne (revision 46729213) -Nadbudówka (revision 45292731) -Neapol (revision 46823083) -Niszczyciel (revision 45799132) -Niszczyciele rakietowe projektu 61 (revision 46498775) -Niszczyciele typu Finn (revision 46620140) -Niszczyciele typu Lejtienant Szestakow (revision 46620140) -Niszczyciele typu Ochotnik (revision 46620140) -Niszczyciele typu Ukraina (revision 46620140) -Noworosyjsk (revision 44721836) -Odessa (revision 45629804) -Oerlikon 20 mm (revision 45493862) -Okres międzywojenny (revision 46668249) -Okręt-baza wodnosamolotów (revision 45115462) +Mila morska (revision 61023950) +Mina morska (revision 61000099) +Morze Czarne (revision 61790806) +Nadbudówka (revision 57496460) +Neapol (revision 61681555) +Niszczyciele rakietowe projektu 61 (revision 61591760) +Noworosyjsk (revision 62635030) +Obrona Odessy (revision 61668078) +Odessa (revision 62609713) +Oerlikon 20 mm (revision 60068925) +Operacja desantowa kerczeńsko-teodozyjska (revision 60265054) +Parawan (trał) (revision 54434173) +Petersburg (revision 62601352) +Poti (revision 62387800) +Radar (revision 61897200) +Rangi okrętów (revision 59334819) +Rewolucja październikowa (revision 62498820) +Rosyjska Federacyjna Socjalistyczna Republika Radziecka (revision 62401382) +Rosyjska marynarka wojenna (revision 62145039) +Salwa burtowa (revision 45535265) +Sewastopol (revision 61699516) +Siewastopol (1911) (revision 61344180) +Stal Kruppa (revision 44611245) +Sudak (revision 56397428) +Szalupa (revision 50176935) +Szerokość całkowita (revision 59927053) +Tallinn (revision 62370993) +Tarmo (revision 60930043) +Teodozja (miasto) (revision 61289639) +Tuapse (revision 54506404) +Turbina parowa (revision 58882974) +Typ okrętu (revision 58157719) +Wielka wojna ojczyźniana (revision 62540748) +Wielkokalibrowy karabin maszynowy (revision 60069207) +Wodnosamolot (revision 61361212) +Wojna domowa w Rosji (revision 61724197) +Wyporność (revision 61495676) +Wyrzutnia torpedowa (revision 59771268) +Węzeł (jednostka prędkości) (revision 62033661) +Zatoka Biskajska (revision 59124431) +Związek Socjalistycznych Republik Radzieckich (revision 62525734) +Śruba okrętowa (revision 62489877) +Śródokręcie (revision 45285929) +Świnoujście (revision 62151792) +(691) Lehigh (revision 60266839) +1066 (revision 62500082) == End of Parsed pages == -- Wikipedia parsing ended at: 2016-09-21 17:21:04.404471 +- Wikipedia parsing ended at: 2021-03-16 19:54:55.177499 -78 characters appeared 1159291 times. +86 characters appeared 1860467 times. -First 37 characters: -[ 0] Char a: 9.685575062689178 % -[ 1] Char i: 8.815819324052374 % -[ 2] Char o: 7.920185699707839 % -[ 3] Char e: 6.871613770830621 % -[ 4] Char r: 5.8672067668945935 % -[ 5] Char n: 5.763608964444647 % -[ 6] Char s: 4.736688199942896 % -[ 7] Char k: 4.722196583946568 % -[ 8] Char z: 4.519227700378939 % -[ 9] Char w: 4.279512219106333 % -[10] Char t: 4.0191806888865695 % -[11] Char c: 3.6891513864939864 % -[12] Char y: 3.565282573572986 % -[13] Char p: 3.0190004062828053 % -[14] Char d: 2.851052928039638 % -[15] Char l: 2.7930002044352973 % -[16] Char m: 2.7530620008263673 % -[17] Char u: 2.348504387595522 % -[18] Char j: 1.881236031332944 % -[19] Char ł: 1.6885320424293815 % -[20] Char b: 1.394559260789569 % -[21] Char g: 1.3928340684090534 % -[22] Char h: 1.163901039514669 % -[23] Char ę: 0.8066136975099435 % -[24] Char ó: 0.5971753425153823 % -[25] Char ą: 0.563275312238256 % -[26] Char f: 0.5245447432956868 % -[27] Char ż: 0.4545019326467643 % -[28] Char ś: 0.39567287247119143 % -[29] Char ń: 0.3857530162832283 % -[30] Char ć: 0.1397405828217419 % -[31] Char v: 0.12455888987320698 % -[32] Char ź: 0.10204512930748191 % -[33] Char x: 0.05468859846233603 % -[34] Char é: 0.020961087423261287 % -[35] Char á: 0.01707940456710179 % -[36] Char q: 0.011386269711401192 % +First 38 characters: +[ 0] Char a: 9.71455016401796 % +[ 1] Char i: 8.783547356658302 % +[ 2] Char o: 7.7947633578021005 % +[ 3] Char e: 6.889130524755344 % +[ 4] Char r: 6.010641414225568 % +[ 5] Char n: 5.536996893790645 % +[ 6] Char k: 5.05394613287954 % +[ 7] Char s: 5.034864902199287 % +[ 8] Char z: 4.529185414199769 % +[ 9] Char w: 4.033180916404322 % +[10] Char t: 4.019743430009777 % +[11] Char c: 3.6763887776563626 % +[12] Char y: 3.5020777041463247 % +[13] Char p: 3.0798181316841413 % +[14] Char l: 2.971941990908734 % +[15] Char d: 2.804779660160594 % +[16] Char m: 2.7137810022967352 % +[17] Char u: 2.3359726348277072 % +[18] Char j: 1.8645856121070676 % +[19] Char ł: 1.5818608983658402 % +[20] Char g: 1.402981079481657 % +[21] Char b: 1.3551436279170768 % +[22] Char h: 1.1977100373185872 % +[23] Char ę: 0.6938042975231488 % +[24] Char ą: 0.5616331813464038 % +[25] Char ó: 0.5564194366253205 % +[26] Char f: 0.5355107077954083 % +[27] Char ń: 0.43010706451659714 % +[28] Char ż: 0.42290457180912105 % +[29] Char ś: 0.3628658825982939 % +[30] Char v: 0.1491023490338716 % +[31] Char ć: 0.12942986895225767 % +[32] Char ź: 0.08433366461216459 % +[33] Char x: 0.0421399573332932 % +[34] Char é: 0.02617622349657371 % +[35] Char á: 0.02246747725167928 % +[36] Char í: 0.014136235687061367 % +[37] Char q: 0.013114986721075944 % -The first 37 characters have an accumulated ratio of 0.9993892818972973. +The first 38 characters have an accumulated ratio of 0.9993173756911571. -1321 sequences found. +1547 sequences found. -First 512 (typical positive ratio): 0.9894531815946438 -Next 512 (512-1024): 1.7251923805153322e-06 -Rest: 0.0003530230403650733 +First 512 (typical positive ratio): 0.9881622113600178 +Next 512 (512-1024): 0.0042290457180912105 +Rest: 0.0005488849902139173 -- Processing end: 2016-09-21 17:21:04.878014 +- Processing end: 2021-03-16 19:54:55.605846 diff --git a/script/BuildLangModelLogs/LangPortugueseModel.log b/script/BuildLangModelLogs/LangPortugueseModel.log index dce6f36..e1f91e2 100644 --- a/script/BuildLangModelLogs/LangPortugueseModel.log +++ b/script/BuildLangModelLogs/LangPortugueseModel.log @@ -1,166 +1,166 @@ = Logs of language model for Portuguese (pt) = - Generated by BuildLangModel.py -- Started: 2016-09-20 23:44:39.722451 -- Maximum depth: 5 +- Started: 2021-03-16 19:54:55.771448 +- Maximum depth: 4 - Max number of pages: 100 == Parsed pages == -Papagaio-das-mascarenhas (revision 46763149) -Albinismo (revision 46498446) -Alfred Newton (revision 43617011) -Alphonse Milne-Edwards (revision 39740747) -Animalia (revision 46727732) -Asa (revision 46338820) -August von Pelzeln (revision 34726241) -Aves (revision 46728980) -Bico (revision 45311553) -Carl Wilhelm Hahn (revision 45025566) -Carlos Lineu (revision 46625396) -Carolus Linnaeus (revision 46625396) -Cauda (revision 43275401) -Charles Lucien Bonaparte (revision 45529712) -Chordata (revision 46640101) -Cladograma (revision 46700307) -Classe (biologia) (revision 46701409) -Classificação científica (revision 46306288) -Coleção Leverian (revision 45026647) -Comores (revision 46181501) +Papagaio-das-mascarenhas (revision 58875640) +Albinismo (revision 60544601) +Alfred Newton (revision 55613591) +Alphonse Milne-Edwards (revision 55360216) +Animalia (revision 59086849) +Asa (revision 59016280) +August von Pelzeln (revision 55658828) +Aves (revision 59780941) +Bico (revision 59270926) +BirdLife International (revision 60296296) +Carl Wilhelm Hahn (revision 58280895) +Carlos Lineu (revision 60424490) +Carolus Linnaeus (revision 60424490) +Cauda (revision 56806253) +Charles Lucien Bonaparte (revision 52587707) +Chordata (revision 60632448) +Cladograma (revision 55578666) +Classe (biologia) (revision 56051821) +Classificação científica (revision 59003514) +Coleção Leverian (revision 49939876) +Comores (revision 60033304) Coracopsinae (revision 36946101) -Coracopsis nigra (revision 44338845) -Coracopsis vasa (revision 42905822) -Cylindraspis indica (revision 42905410) -Cúlmen (revision 45311553) -Digital object identifier (revision 42172651) -Eclectus roratus (revision 44380798) -Edward Newton (revision 39261469) -Endemismo (revision 45260961) -Epíteto específico (revision 35101647) -Espécie (revision 45685675) -Esquilo-vermelho (revision 43489595) -Estado de conservação (revision 46662839) -Extinção (revision 46526607) -Família (biologia) (revision 46636004) -Filo (revision 46704246) -França (revision 46740839) +Coracopsis nigra (revision 49364496) +Coracopsis vasa (revision 55904306) +Cylindraspis indica (revision 55039606) +Cúlmen (revision 59270926) +Digital object identifier (revision 59704276) +EBird (revision 54789725) +Eclectus roratus (revision 60346158) +Edward Newton (revision 52355291) +Enciclopédia da Vida (revision 53360339) +Endemismo (revision 59148596) +Epíteto específico (revision 58254455) +Espécie (revision 60480387) +Esquilo-vermelho (revision 59084882) +Estado de conservação (revision 60507425) +Extinção (revision 60618960) +Família (biologia) (revision 58605859) +Filo (revision 58307920) +Fossilworks Paleobiology Database (revision 60618977) +França (revision 60657760) François-Nicolas Martinet (revision 43679514) -François Levaillant (revision 40142351) -Fredrik Hasselqvist (revision 44381122) -Fregilupus varius (revision 46555765) -Fumigação (revision 42458244) -George Robert Gray (revision 39047844) -Georges-Louis Leclerc, conde de Buffon (revision 45622418) -Género (biologia) (revision 45296588) -Hermann Schlegel (revision 43137605) -Herpetologista (revision 46207704) -Histoire Naturelle (revision 44293456) -Holótipo (revision 44029660) -Ilha da Reunião (revision 45458206) -Ilha vulcânica (revision 37924535) -Ilhas Mascarenhas (revision 45858660) -Ilhas Molucas (revision 45476933) -International Standard Book Number (revision 46326494) +François Levaillant (revision 49358726) +Fredrik Hasselqvist (revision 52281786) +Fregilupus varius (revision 54591191) +Fumigação (revision 50600995) +George Robert Gray (revision 60662109) +Georges-Louis Leclerc, conde de Buffon (revision 53113664) +Global Biodiversity Information Facility (revision 59909217) +Género (biologia) (revision 60485207) +Hermann Schlegel (revision 58280671) +Herpetologista (revision 57406279) +Histoire Naturelle (revision 50957493) +Holótipo (revision 55228464) +INaturalist (revision 54028036) +ITIS (revision 59095296) +IUCN (revision 58907792) +Ilha da Reunião (revision 60519224) +Ilha vulcânica (revision 59932533) +Ilhas Mascarenhas (revision 60149877) +Ilhas Molucas (revision 58541748) +International Standard Book Number (revision 59096583) Jacques Barraband (revision 45007769) Jean Feuilley (revision 43140791) -Johann Georg Wagler (revision 34585234) -John Gerrard Keulemans (revision 39664498) +Johann Georg Wagler (revision 58641840) +John Gerrard Keulemans (revision 49649801) Julian Hume (revision 41876605) -Leiolopisma (revision 43997173) -Lionel Walter Rothschild (revision 46022922) -Lista Vermelha da IUCN (revision 46569884) -Lista Vermelha da União Internacional para a Conservação da Natureza e dos Recursos Naturais (revision 46569884) -Lista Vermelha de Espécies Ameaçadas da IUCN (revision 46569884) -Lista de aves extintas (revision 45507420) -Londres (revision 46310311) -Língua inglesa (revision 46609785) -Madagascar (revision 46617630) +Leiolopisma (revision 49675967) +Lionel Walter Rothschild (revision 60408276) +Lista Vermelha da IUCN (revision 59379270) +Lista Vermelha da União Internacional para a Conservação da Natureza e dos Recursos Naturais (revision 58907792) +Lista Vermelha de Espécies Ameaçadas da IUCN (revision 59379270) +Lista de aves extintas (revision 56678269) +Londres (revision 60339639) +Língua inglesa (revision 60421609) +Madagascar (revision 60519261) Mascarenotus grucheti (revision 43145662) -Mathurin Jacques Brisson (revision 36018826) -Maurício (revision 46723599) -Maximiliano I José da Baviera (revision 46372080) -Melanina (revision 46762903) -Museu Nacional de História Natural (França) (revision 43731807) -Naturhistorisches Museum (revision 46694247) -Nesoenas duboisi (revision 43995805) -Nome científico (revision 46671641) -Nomenclatura binomial (revision 46671641) -Nycticorax duboisi (revision 43816214) -Nível do mar (revision 46414695) -Ordem (biologia) (revision 46360024) -Otto Finsch (revision 42362273) -Papagaio (revision 46738207) -Papagaio-cinzento (revision 46673943) -Papagaio-cinzento-de-maurício (revision 46664408) -Pedro Mascarenhas (c. 1484-1555) (revision 45541977) -Periquito-de-maurício (revision 43010883) -Periquito-de-reunião (revision 43048764) -Peter Mundy (revision 43563846) -Piton des Neiges (revision 45632497) -Pleistoceno (revision 45916874) -Plumagem (revision 34951058) -Ponto quente (revision 45375495) -Porphyrio coerulescens (revision 43672493) -Praslin (revision 40728143) -Psitacídeos (revision 46598835) -Psittaciformes (revision 46598835) -Psittacula (revision 42856453) -Psittaculinae (revision 46760737) -Psittaculini (revision 43015966) -Psittrichasiidae (revision 44385977) +Mathurin Jacques Brisson (revision 51922685) +Maurício (revision 60625767) +Maximiliano I José da Baviera (revision 58499194) +Melanina (revision 59475698) +Museu Nacional de História Natural (França) (revision 59928766) +National Center for Biotechnology Information (revision 59213569) +Naturhistorisches Museum (revision 51807264) +Nesoenas duboisi (revision 57384381) +Nome científico (revision 60480452) +Nomenclatura binomial (revision 60480452) +Nycticorax duboisi (revision 57384378) +Nível do mar (revision 59494064) +Ordem (biologia) (revision 56361837) +Otto Finsch (revision 52466524) +Papagaio (revision 60655174) +Papagaio-cinzento (revision 59484957) +Papagaio-cinzento-de-maurício (revision 58875653) +Pedro Mascarenhas (c. 1484-1555) (revision 49518171) +Periquito-de-maurício (revision 54615644) +Periquito-de-reunião (revision 54615645) +Peter Mundy (revision 58162914) +Piton des Neiges (revision 57212555) +Pleistoceno (revision 59637437) +Plumagem (revision 56296594) == End of Parsed pages == -- Wikipedia parsing ended at: 2016-09-20 23:47:27.346826 +- Wikipedia parsing ended at: 2021-03-16 19:59:19.802576 -51 characters appeared 558324 times. +51 characters appeared 713201 times. First 38 characters: -[ 0] Char a: 11.864795351802895 % -[ 1] Char e: 11.44604208309154 % -[ 2] Char o: 9.868284365350585 % -[ 3] Char s: 8.346587286235232 % -[ 4] Char i: 7.118089138206489 % -[ 5] Char r: 6.394136737808154 % -[ 6] Char n: 5.568272186042513 % -[ 7] Char d: 5.243192125002687 % -[ 8] Char t: 4.80061756256224 % -[ 9] Char m: 4.498105042949971 % -[10] Char c: 3.9747530107965985 % -[11] Char u: 3.7229279056605127 % -[12] Char l: 3.207814817202914 % -[13] Char p: 2.77562848811801 % -[14] Char g: 1.3850380782484721 % -[15] Char v: 1.3210967108703908 % -[16] Char f: 1.122466524813549 % -[17] Char b: 0.9702251739133549 % -[18] Char h: 0.9130898904578704 % -[19] Char é: 0.7026386112723079 % -[20] Char ã: 0.7022803963290133 % -[21] Char q: 0.5903382265494588 % -[22] Char ç: 0.5856814322866293 % -[23] Char í: 0.41391736697688086 % -[24] Char x: 0.3913498255493226 % -[25] Char á: 0.34567742027926435 % -[26] Char z: 0.3170202248156984 % -[27] Char ó: 0.22925756370852768 % -[28] Char j: 0.20454073262120204 % -[29] Char ê: 0.20239144296143458 % -[30] Char õ: 0.16155493942585308 % -[31] Char y: 0.15080849112701586 % -[32] Char w: 0.09241945537000021 % -[33] Char ú: 0.08794176857881804 % -[34] Char k: 0.08364318925928313 % -[35] Char â: 0.07898639499645367 % -[36] Char à: 0.06859816164091102 % -[37] Char ô: 0.031164700066627977 % +[ 0] Char a: 11.984419539512704 % +[ 1] Char e: 11.434925077222271 % +[ 2] Char o: 9.885712442915812 % +[ 3] Char s: 8.280835276450818 % +[ 4] Char i: 7.116787553578866 % +[ 5] Char r: 6.403664605069258 % +[ 6] Char n: 5.615948379208667 % +[ 7] Char d: 5.256442433479482 % +[ 8] Char t: 4.736673111787561 % +[ 9] Char m: 4.516118177063689 % +[10] Char c: 3.973213722358774 % +[11] Char u: 3.7191478979978996 % +[12] Char l: 3.1644655573954608 % +[13] Char p: 2.783647246708852 % +[14] Char g: 1.3397345208433526 % +[15] Char v: 1.3255730151808536 % +[16] Char f: 1.1414734415683656 % +[17] Char b: 0.9920064610116923 % +[18] Char h: 0.868759297869745 % +[19] Char ã: 0.7190118914583687 % +[20] Char é: 0.6653103402827534 % +[21] Char ç: 0.6455403175261952 % +[22] Char q: 0.5922594051326344 % +[23] Char í: 0.41138472884923044 % +[24] Char x: 0.3736674513916834 % +[25] Char á: 0.3452042271393338 % +[26] Char z: 0.3241722880366124 % +[27] Char ó: 0.2204147217965202 % +[28] Char ê: 0.204150022223749 % +[29] Char j: 0.2023272541681798 % +[30] Char õ: 0.17863126944578034 % +[31] Char y: 0.13222079049244184 % +[32] Char ú: 0.08819393130407838 % +[33] Char â: 0.08300605299207375 % +[34] Char w: 0.08174413664591049 % +[35] Char k: 0.07445306442363374 % +[36] Char à: 0.06688156634665403 % +[37] Char ô: 0.034492380128463083 % -The first 38 characters have an accumulated ratio of 0.9998137282294869. +The first 38 characters have an accumulated ratio of 0.9998261359700841. -891 sequences found. +929 sequences found. -First 512 (typical positive ratio): 0.9953179582313172 -Next 512 (512-1024): 1.7910747164728723e-06 -Rest: 2.42861286636753e-17 +First 512 (typical positive ratio): 0.9952990712503466 +Next 512 (512-1024): 0.0008819393130407837 +Rest: -7.806255641895632e-18 -- Processing end: 2016-09-20 23:47:27.489355 +- Processing end: 2021-03-16 19:59:19.891534 diff --git a/script/BuildLangModelLogs/LangRomanianModel.log b/script/BuildLangModelLogs/LangRomanianModel.log index 5d30cbc..c66f99f 100644 --- a/script/BuildLangModelLogs/LangRomanianModel.log +++ b/script/BuildLangModelLogs/LangRomanianModel.log @@ -1,153 +1,155 @@ = Logs of language model for Romanian (ro) = - Generated by BuildLangModel.py -- Started: 2016-09-28 18:53:56.086095 -- Maximum depth: 5 +- Started: 2021-03-16 19:59:20.080997 +- Maximum depth: 4 - Max number of pages: 100 == Parsed pages == -The Loving Kind (revision 10166481) -12 ianuarie (revision 10711676) -13 decembrie (revision 9938353) -2007 (revision 10716321) -2008 (revision 10752084) -2009 (revision 10654003) -21 noiembrie (revision 10447643) -25 ianuarie (revision 10228199) -31 ianuarie (revision 10718063) -4 Music (revision 9701591) -Billboard (revision 10505294) +The Loving Kind (revision 12020391) +12 ianuarie (revision 13977250) +13 decembrie (revision 13958824) +2007 (revision 13956975) +2008 (revision 13894929) +2009 (revision 13949957) +21 noiembrie (revision 13705857) +25 ianuarie (revision 13882659) +31 ianuarie (revision 13887860) +4 Music (revision 13955370) +Billboard (revision 13092896) Biology (revision 10112430) -Bulgaria (revision 10481051) -CD (revision 10477531) -Call The Shots (revision 10101027) -Call the Shots (revision 10101027) -Can't Speak French (revision 9721506) +Bulgaria (revision 13779617) +CD (revision 13258410) +Call The Shots (revision 13085752) +Call the Shots (revision 13085752) +Can't Speak French (revision 12018260) Casă de discuri (revision 10611348) -Channel 4 (revision 7953101) -Chemistry (revision 10112479) -Cheryl Cole (revision 10475016) -Chitară (revision 10468266) -Croația (revision 10737746) -Dance (revision 10231736) -Descărcare digitală (revision 10100743) -Digital Spy (revision 9044016) -Discografia Girls Aloud (revision 10172788) -Estonia (revision 10749810) -Europa (revision 10752724) -Fascination Records (revision 9655292) -Fiona Phillips (revision 5384082) -Gen muzical (revision 10534645) +Channel 4 (revision 13980413) +Chemistry (revision 13003795) +Cheryl Cole (revision 13707613) +Chitară (revision 13704508) +Croația (revision 13662573) +Dance (revision 12713318) +Descărcare digitală (revision 10785925) +Digital Spy (revision 12038314) +Discografia formației Girls Aloud (revision 13332557) +Estonia (revision 13885094) +Europa (revision 13985083) +Fascination Records (revision 9653126) +Gen muzical (revision 13743085) Girls A Live (revision 10112444) -Girls Aloud (revision 10112446) -Good Morning Television (revision 10166481) -Heat World (revision 10166481) +Girls Aloud (revision 12017377) +Good Morning Television (revision 13079309) +Heat World (revision 12994549) I'll Stand By You (cântec de Girls Aloud) (revision 10112432) -ITunes (revision 10744174) +ITunes (revision 13985408) I Think We're Alone Now (revision 10112427) -Irlanda (revision 10573806) +Irlanda (revision 13830248) +Jewels & Stone (revision 8842892) Jump (cântec de Girls Aloud) (revision 10112438) -Lady GaGa (revision 10753010) +Lady GaGa (revision 13982113) Life Got Cold (revision 10112437) -Limba engleză (revision 10756676) +Limba engleză (revision 13983069) Long Hot Summer (revision 10112429) Love Machine (revision 10112433) -MSN Search (revision 10653298) -MTV (revision 10170766) +MSN Search (revision 13651565) +MTV (revision 12996766) Mixed Up (revision 10112443) -Muzică electronică (revision 10608432) -Muzică pop (revision 10740529) +Muzică electronică (revision 13450013) +Muzică pop (revision 13648051) Nadine Coyle (revision 10316187) -Neil Tennant (revision 10499980) +Neil Tennant (revision 13355922) No Good Advice (revision 10112436) Out Of Control (revision 10112484) Out of Control (revision 10112484) -Pet Shop Boys (revision 10612741) -Poker Face (revision 10496402) -PopJustice (revision 10625677) -Regatul Unit (revision 10752338) -Regatul Unit al Marii Britanii și Irlandei de Nord (revision 10752338) -Regatul Unit al Marii Britanii și al Irlandei de Nord (revision 10752338) -Republica Irlanda (revision 10573806) -Romanian Top 100 (revision 10736281) -România (revision 10732435) -Sarah Harding (revision 10633651) -Sarah Hearding (revision 10112425) +Pet Shop Boys (revision 13165657) +Poker Face (revision 13083515) +PopJustice (revision 12061987) +Regatul Unit (revision 13957992) +Regatul Unit al Marii Britanii și Irlandei de Nord (revision 13957992) +Regatul Unit al Marii Britanii și al Irlandei de Nord (revision 13957992) +Republica Irlanda (revision 13830248) +Romanian Top 100 (revision 13882522) +România (revision 13906545) +Sarah Harding (revision 10139259) +Sarah Hearding (revision 12017812) See the Day (revision 10112431) -Sexy! No No No... (revision 10112425) -Slant Magazine (revision 7697473) -Slovenia (revision 10521499) +Sexy! No No No... (revision 12017812) +Slant Magazine (revision 12008416) +Slovenia (revision 13726273) Something Kinda Ooooh (revision 10112426) Sound of the Underground (album) (revision 10112476) Sound of the Underground (cântec) (revision 10112434) -Tangled Up (revision 10112482) -The Guardian (revision 9752334) -The Paul O'Grady Show (revision 10101027) -The Promise (revision 10166482) +Tangled Up (revision 13010794) +The Guardian (revision 12369330) +The Paul O'Grady Show (revision 12720320) +The Promise (revision 12178852) The Show (revision 10112441) The Sound of Girls Aloud (revision 10112480) -Tonalitate (revision 9966362) -Turneul Out of Control (revision 10112446) -UK Mix (revision 9721468) +Times Online (revision 12014967) +Tonalitate (revision 12509051) +Turneul Out of Control (revision 10112484) +UK Mix (revision 13757304) UK Singles Chart (revision 10226705) -Ungaria (revision 10737745) -Uniunea Europeană (revision 10751590) -Untouchable (revision 10112410) +Ungaria (revision 13960307) +Uniunea Europeană (revision 13689726) +Untouchable (revision 12020867) +Utah Saints (revision 12270967) Wake Me Up (revision 10112439) What Will The Neighbours Say? (revision 10112478) -Whole Lotta History (revision 10475020) -Wideboys (revision 10166481) -Wikimedia Commons (revision 9703907) -Xenomania (revision 10112484) +Whole Lotta History (revision 12369785) +Wideboys (revision 12030035) +Wikimedia Commons (revision 13278756) +Xenomania (revision 12020867) == End of Parsed pages == -- Wikipedia parsing ended at: 2016-09-28 18:58:13.756622 +- Wikipedia parsing ended at: 2021-03-16 20:04:01.198792 -60 characters appeared 883554 times. +63 characters appeared 1198090 times. First 33 characters: -[ 0] Char e: 11.67014127036944 % -[ 1] Char i: 10.97567324690964 % -[ 2] Char a: 10.080198833348046 % -[ 3] Char r: 7.490657050955572 % -[ 4] Char n: 7.18246988865423 % -[ 5] Char t: 6.516296683620921 % -[ 6] Char l: 5.595130574928075 % -[ 7] Char u: 5.551217016730161 % -[ 8] Char o: 4.922732509840938 % -[ 9] Char c: 4.495707110148333 % -[10] Char s: 3.8308920563994957 % -[11] Char d: 3.590499279048027 % -[12] Char m: 2.971408651876399 % -[13] Char p: 2.902369294915761 % -[14] Char ă: 2.1349006399156134 % -[15] Char g: 1.2248261000459508 % -[16] Char f: 1.1199089133205216 % -[17] Char b: 1.0781457613230203 % -[18] Char ț: 1.0323081554721047 % -[19] Char ș: 0.9732285745975912 % -[20] Char î: 0.97017273420753 % -[21] Char v: 0.9693804792915882 % -[22] Char z: 0.7369102510995367 % -[23] Char h: 0.533413916976212 % -[24] Char â: 0.4986678799484808 % -[25] Char x: 0.22081276300033725 % -[26] Char j: 0.20055367300696958 % -[27] Char k: 0.1901411798260208 % -[28] Char y: 0.15471606715605385 % -[29] Char w: 0.11827234102273318 % -[30] Char á: 0.016297815413658927 % -[31] Char é: 0.013355154297303842 % -[32] Char q: 0.00520624659047438 % +[ 0] Char e: 11.456985702242736 % +[ 1] Char i: 11.0956605931107 % +[ 2] Char a: 10.273852548639919 % +[ 3] Char r: 7.454949127361049 % +[ 4] Char n: 7.243779682661569 % +[ 5] Char t: 6.464122060947007 % +[ 6] Char l: 5.642480948843576 % +[ 7] Char u: 5.4753816491248575 % +[ 8] Char o: 4.928594679865453 % +[ 9] Char c: 4.4603493894448665 % +[10] Char s: 3.768080862038745 % +[11] Char d: 3.7479655117729047 % +[12] Char m: 2.9085461025465533 % +[13] Char p: 2.8108906676460035 % +[14] Char ă: 2.1405737465465866 % +[15] Char g: 1.262509494278393 % +[16] Char f: 1.0879817042125384 % +[17] Char b: 1.0721231293141584 % +[18] Char ț: 1.016534650986153 % +[19] Char ș: 1.0140306654758826 % +[20] Char v: 0.9768882137402032 % +[21] Char î: 0.9654533465766345 % +[22] Char z: 0.7075428390187716 % +[23] Char h: 0.5414451335041608 % +[24] Char â: 0.45664349088966605 % +[25] Char x: 0.22627682394477877 % +[26] Char j: 0.22452403408758942 % +[27] Char k: 0.20132043502574934 % +[28] Char y: 0.16918595431061106 % +[29] Char w: 0.12970644943201262 % +[30] Char á: 0.012937258469730987 % +[31] Char é: 0.012019130449298466 % +[32] Char q: 0.007428490347135858 % -The first 33 characters have an accumulated ratio of 0.9996661211425673. +The first 33 characters have an accumulated ratio of 0.9995676451685602. -981 sequences found. +1066 sequences found. -First 512 (typical positive ratio): 0.997762564143313 -Next 512 (512-1024): 1.1317927370596478e-06 -Rest: 3.0357660829594124e-18 +First 512 (typical positive ratio): 0.9975318123681904 +Next 512 (512-1024): 0.01016534650986153 +Rest: 4.3355868061878584e-05 -- Processing end: 2016-09-28 18:58:13.862425 +- Processing end: 2021-03-16 20:04:01.293047 diff --git a/script/BuildLangModelLogs/LangSlovakModel.log b/script/BuildLangModelLogs/LangSlovakModel.log index 2c4902e..4dc3fe5 100644 --- a/script/BuildLangModelLogs/LangSlovakModel.log +++ b/script/BuildLangModelLogs/LangSlovakModel.log @@ -1,158 +1,156 @@ = Logs of language model for Slovak (sk) = - Generated by BuildLangModel.py -- Started: 2016-09-21 13:26:28.712674 -- Maximum depth: 5 +- Started: 2021-03-16 20:04:01.478267 +- Maximum depth: 4 - Max number of pages: 100 == Parsed pages == -Dôkaz (matematika) (revision 6358810) -1825 (revision 6122752) -1839 (revision 6165808) -1847 (revision 5941780) -1852 (revision 5941777) -1878 (revision 6221358) -1955 (revision 6226609) -1976 (revision 6310709) -1983 (revision 6356952) -1993 (revision 6348358) -1995 (revision 6277350) -2012 (revision 6291145) -Adrien-Marie Legendre (revision 6060342) -Algebra (revision 6319238) +Dôkaz (matematika) (revision 7170221) +1825 (revision 6937105) +1839 (revision 6804159) +1847 (revision 7167629) +1852 (revision 6923466) +1878 (revision 7159904) +1955 (revision 7061181) +1976 (revision 7100059) +1983 (revision 7174204) +1993 (revision 7122277) +1995 (revision 7133683) +2012 (revision 7135523) +Adrien-Marie Legendre (revision 6556308) Algebraická geometria (revision 5964212) -Algebraická rovnica (revision 5288111) -Algebrické číslo (revision 6106622) -Algoritmus (revision 6286937) -Andrew Wiles (revision 5791970) -Arabi (revision 6044956) -Arabčina (revision 6322514) -Aristoteles (revision 6359959) +Algebraická rovnica (revision 6586551) +Algebrické číslo (revision 6382942) +Algoritmus (revision 7100698) +Andrew Wiles (revision 6813255) +Arabi (revision 7124298) +Arabčina (revision 7148041) +Aristoteles (revision 7150270) Arthur Cayley (revision 6332355) -Axióma (revision 6338092) -Babylonia (revision 6168813) -Bernard Bolzano (revision 6261374) -Boh (revision 6282272) -Bolzanova veta (revision 6345299) -Bytie (revision 5274918) -Byzantská ríša (revision 6359782) -Caroline Blundenová (revision 6358810) +Axióma (revision 7073489) +Babylonia (revision 6432954) +Bernard Bolzano (revision 6903631) +Boh (revision 7166677) +Bolzanova veta (revision 6852875) +Bytie (revision 6569833) +Byzantská ríša (revision 7168566) +Caroline Blundenová (revision 7170221) Cauchyho postupnosť (revision 6215169) -Celé číslo (revision 6302805) -Charles Hermite (revision 5751036) -Daniel Marcus (revision 5657431) +Celé číslo (revision 7047567) +Charles Hermite (revision 6412828) +Daniel Marcus (revision 5291472) David Hilbert (revision 5968866) Dedukcia (revision 6338099) -Definícia (revision 6106684) -Derivácia (funkcia) (revision 5970574) -Desiatková číselná sústava (revision 5924486) -Diofantická rovnica (revision 6327292) -Dynastia Chan (revision 6342042) +Definícia (revision 6965423) +Derivácia (funkcia) (revision 7014993) +Desiatková číselná sústava (revision 7047888) +Diofantická rovnica (revision 6060359) +Dynastia Chan (revision 7025657) Dôkaz (logika) (revision 5495754) -Dôkaz sporom (revision 5940134) -Dôkaz výpočtom (revision 6358810) -Energia (revision 6277761) +Dôkaz sporom (revision 7051518) +Energia (revision 6975312) Eric Weisstein (revision 6054413) Ernst Kummer (revision 6001344) -Európa (revision 6295124) +Európa (revision 7164742) Experiment (revision 6354302) -Fenomén (filozofia) (revision 5420897) -Filozofia (revision 6296369) +Fenomén (filozofia) (revision 6558128) +Filozofia (revision 6942330) Formula (logika) (revision 3916562) -Formálny dôkaz (revision 6358810) -Formálny jazyk (revision 5623029) -Gabriel Cramer (revision 5923903) -Galoisova teória (revision 6353573) -Gentzenovský kalkul (revision 6358810) -Geometria (revision 5970028) -Geometrický dôkaz (revision 6358810) -Georg Ferdinand Cantor (revision 6186696) -Giordano Bruno (revision 6312876) -Gottlob Frege (revision 5968855) -Gödelova veta o neúplnosti (revision 5323549) -Hardvér (revision 6214401) -Henri Poincaré (revision 6315506) -Hilbertovský kalkul (revision 6358810) -Hmotnosť (revision 5979540) -Hypotéza (revision 5983410) -Idea (revision 5960449) -India (revision 6362189) +Formálny dôkaz (revision 7170221) +Formálny jazyk (revision 6505890) +Gabriel Cramer (revision 7068001) +Galoisova teória (revision 6749172) +Gentzenovský kalkul (revision 7170221) +Geometria (revision 7010499) +Geometrický dôkaz (revision 7170221) +Georg Ferdinand Cantor (revision 6697670) +Giordano Bruno (revision 7072808) +Gottlob Frege (revision 6580699) +Gödelova veta o neúplnosti (revision 6968373) +Hardvér (revision 6946820) +Henri Poincaré (revision 6830074) +Hilbertovský kalkul (revision 7170221) +Hmotnosť (revision 7021343) +Hypotéza (revision 6850461) +Idea (revision 6113421) +India (revision 6976622) Intuícia (revision 5837951) -Jazyk (lingvistika) (revision 6073293) -John Taylor (revision 6355518) -Kardinálne číslo (revision 6090126) +Jazyk (lingvistika) (revision 6462864) +John Taylor (revision 6741201) +Kardinálne číslo (revision 7154031) Kenneth Appel (revision 5968422) Klasická mechanika (revision 6295646) -Konečná množina (revision 5276494) -Konfucianizmus (revision 5968816) -Kresťanstvo (revision 6289571) -Langlandsov program (revision 6088475) -Latinčina (revision 6121105) -Leonhard Euler (revision 6339382) -Lineárna algebra (revision 5473535) +Konečná množina (revision 6850487) +Konfucianizmus (revision 6948500) +Kresťanstvo (revision 7150939) +Latinčina (revision 7110742) +Leonhard Euler (revision 7016638) +Lineárna algebra (revision 6564030) Logická axióma (revision 5495754) Logický kalkul (revision 1608550) == End of Parsed pages == -- Wikipedia parsing ended at: 2016-09-21 13:33:10.330458 +- Wikipedia parsing ended at: 2021-03-16 20:13:09.022092 -62 characters appeared 550293 times. +64 characters appeared 535286 times. -First 45 characters: -[ 0] Char o: 8.867094438780795 % -[ 1] Char a: 8.59705647718579 % -[ 2] Char e: 8.562347694773512 % -[ 3] Char n: 6.0867574183207855 % -[ 4] Char i: 5.828531346028389 % -[ 5] Char t: 5.366595613609477 % -[ 6] Char r: 4.977711873492848 % -[ 7] Char k: 4.264273759615332 % -[ 8] Char s: 4.257731790155426 % -[ 9] Char v: 4.117079446767449 % -[10] Char l: 3.5979014815743615 % -[11] Char d: 3.416361829061972 % -[12] Char m: 3.2513588215732345 % -[13] Char p: 2.878466562358598 % -[14] Char u: 2.5987973679476206 % -[15] Char c: 2.419438371921867 % -[16] Char z: 2.127412124086623 % -[17] Char h: 2.0687161203213558 % -[18] Char j: 2.0312815173007834 % -[19] Char y: 1.6700194260148686 % -[20] Char b: 1.6574806512167153 % -[21] Char á: 1.6422160558102683 % -[22] Char ý: 1.2564215790497062 % -[23] Char í: 1.1326693234331529 % -[24] Char č: 0.9473135220691523 % -[25] Char é: 0.8913433389121795 % -[26] Char ž: 0.7668641978000811 % -[27] Char ú: 0.6949025337411161 % -[28] Char š: 0.6785476100913513 % -[29] Char f: 0.6514711253822963 % -[30] Char g: 0.6096752093884531 % -[31] Char ť: 0.46375294615777407 % -[32] Char ô: 0.4172322744428877 % -[33] Char ľ: 0.36053520579036985 % -[34] Char x: 0.23114958758334195 % -[35] Char ó: 0.2251527822450949 % -[36] Char ň: 0.09304134342977287 % -[37] Char w: 0.09013380144759246 % -[38] Char ä: 0.0694175648245571 % -[39] Char ď: 0.06560141597294532 % -[40] Char q: 0.01726353051919614 % -[41] Char ě: 0.009994675563745132 % -[42] Char ĺ: 0.009267790068200032 % -[43] Char ö: 0.008904347320427481 % -[44] Char ŕ: 0.00599680533824708 % +First 46 characters: +[ 0] Char o: 8.787265125559047 % +[ 1] Char a: 8.624174740232323 % +[ 2] Char e: 8.577470735270492 % +[ 3] Char n: 6.100103496074995 % +[ 4] Char i: 5.884891441210867 % +[ 5] Char t: 5.302772723366575 % +[ 6] Char r: 5.02273550961542 % +[ 7] Char s: 4.340670221152805 % +[ 8] Char k: 4.253240323864252 % +[ 9] Char v: 4.073896944810811 % +[10] Char l: 3.6208680966810265 % +[11] Char d: 3.3796886150581185 % +[12] Char m: 3.248356953105443 % +[13] Char p: 2.8470761424733695 % +[14] Char u: 2.6178528861206907 % +[15] Char c: 2.426740097816868 % +[16] Char z: 2.104856095619912 % +[17] Char h: 2.080570013039758 % +[18] Char j: 2.0389100406138025 % +[19] Char á: 1.675926514050433 % +[20] Char b: 1.6690143213160817 % +[21] Char y: 1.6607944164427988 % +[22] Char ý: 1.2490519086992748 % +[23] Char í: 1.1096871578931637 % +[24] Char č: 0.9322119390381964 % +[25] Char é: 0.8785957413420117 % +[26] Char ž: 0.7489454235679618 % +[27] Char ú: 0.702615050645823 % +[28] Char f: 0.6794498641847535 % +[29] Char š: 0.6790762321450589 % +[30] Char g: 0.6219105300717748 % +[31] Char ť: 0.4550838243481055 % +[32] Char ô: 0.38428055282596596 % +[33] Char ľ: 0.3648516867618432 % +[34] Char ó: 0.23090460053130477 % +[35] Char x: 0.22922325635267876 % +[36] Char ň: 0.09434209002290364 % +[37] Char w: 0.08855079340763629 % +[38] Char ä: 0.07005600744275023 % +[39] Char ď: 0.06706695112519288 % +[40] Char q: 0.018121153925191393 % +[41] Char ĺ: 0.010274881091603367 % +[42] Char ě: 0.010274881091603367 % +[43] Char ö: 0.010088065071756034 % +[44] Char ř: 0.007285824774046024 % +[45] Char ŕ: 0.006351744674809354 % -The first 45 characters have an accumulated ratio of 0.9998128269848972. +The first 46 characters have an accumulated ratio of 0.9998617561453131. -1181 sequences found. +1198 sequences found. -First 512 (typical positive ratio): 0.9733303573968434 -Next 512 (512-1024): 1.8172137388627513e-06 -Rest: 0.0003522983638913346 +First 512 (typical positive ratio): 0.9724967373205526 +Next 512 (512-1024): 0.007489454235679618 +Rest: 0.00042527339003644096 -- Processing end: 2016-09-21 13:33:10.831531 +- Processing end: 2021-03-16 20:13:09.628753 diff --git a/script/BuildLangModelLogs/LangSloveneModel.log b/script/BuildLangModelLogs/LangSloveneModel.log index e494190..9ec9020 100644 --- a/script/BuildLangModelLogs/LangSloveneModel.log +++ b/script/BuildLangModelLogs/LangSloveneModel.log @@ -1,148 +1,146 @@ = Logs of language model for Slovene (sl) = - Generated by BuildLangModel.py -- Started: 2016-09-28 22:00:35.243966 -- Maximum depth: 5 +- Started: 2021-03-16 20:13:09.868611 +- Maximum depth: 4 - Max number of pages: 100 == Parsed pages == -XCOM: Enemy Unknown (revision 4704271) -1UP.com (revision 4547348) +XCOM: Enemy Unknown (revision 5360018) +1UP.com (revision 5138164) 2K Games (revision 4110089) -Android (operacijski sistem) (revision 4619359) -Animator videoigre (revision 4702643) -App Store (revision 3903089) -Artefakt (revision 4484504) -Athlon (revision 4524746) -Avstralazija (revision 4623530) -Avtopsija (revision 4541344) -Bralno-pisalni pomnilnik (revision 4256388) -Civilization (serija) (revision 4645770) -Deus Ex: Human Revolution (revision 4694860) -Digitalna distribucija (revision 4696215) +Android (operacijski sistem) (revision 5423518) +Animator videoigre (revision 5438736) +App Store (revision 4916505) +Artefakt (revision 4871634) +Athlon (revision 5138170) +Avstralazija (revision 5234981) +Avtopsija (revision 5394899) +Bralno-pisalni pomnilnik (revision 5307992) +Civilization (serija) (revision 5138157) +Deus Ex: Human Revolution (revision 5312201) DirectX (revision 4477913) -Dishonored (revision 4619444) -Edge (magazine) (revision 4690049) -Electronic Entertainment Expo (revision 4538691) -Enoigralska videoigra (revision 4610359) -Eurogamer (revision 4694860) -Evropa (revision 4687833) +Dishonored (revision 5359830) +Edge (magazine) (revision 5356455) +Enoigralska videoigra (revision 5116872) +Eurogamer (revision 5312201) +Evropa (revision 5448355) Fantasy Flight Games (revision 4649361) Firaxis Games (revision 4110089) +Francoska narodna knjižnica (revision 4596643) GameRankings (revision 3934020) -GameSpot (revision 4238015) -GameSpy (revision 4538691) -GameTrailers (revision 4704271) -Game Informer (revision 4704271) -GamesTM (revision 4704271) -Grafična kartica (revision 4257980) -Granata (revision 3859332) -Holograf (revision 4477482) -IGN (revision 4576233) -IOS (revision 4597264) -Igra igranja vlog (revision 4642276) -Igra na deski (revision 4649363) -Igralna konzola (revision 4649866) -Igralni pogon (revision 4622773) -Intel (revision 4626025) -International Standard Book Number (revision 4015087) -Izdelovalec videoigre (revision 3851747) -Joker (revija) (revision 3867772) +GameSpot (revision 5116871) +GameSpy (revision 5168684) +GameTrailers (revision 5298120) +Game Informer (revision 5360018) +GamesTM (revision 5360018) +Grafična kartica (revision 5374734) +Granata (revision 4837685) +Holografija (revision 4760425) +IGN (revision 5370204) +IOS (revision 5404204) +Igra igranja vlog (revision 4768087) +Igra na deski (revision 5431955) +Igralna konzola (revision 4773547) +Igralni pogon (revision 4771045) +Intel (revision 5366957) +International Standard Book Number (revision 4765322) +Izdelovalec videoigre (revision 5438736) +Joker (revija) (revision 5351778) Kotaku (revision 4613535) -Kristal (revision 4156234) -Linux (revision 4524740) -Lovec prestreznik (revision 4102792) -MTV (revision 4621758) -Mac OS X (revision 4601645) -Machinima (revision 4601716) -Major (revision 4245802) -Mednarodna različica (revision 4116054) +Kristal (revision 5068718) +Linux (revision 5457231) +Lovec prestreznik (revision 4758667) +MTV (revision 5406174) +Mac OS X (revision 5212452) +Machinima (revision 5295004) +Major (revision 4758895) +Mednarodna različica (revision 5032649) Metacritic (revision 3934020) -Michael McCann (skladatelj) (revision 4694860) -MicroProse (revision 4382810) -Microsoft Windows (revision 4691357) -Nezemeljsko življenje (revision 4620576) -NowGamer (revision 4704271) -OS X (revision 4601645) +Michael McCann (skladatelj) (revision 5312201) +MicroProse (revision 5116826) +Microsoft Windows (revision 5460799) +Možje v črnem (revision 5262890) +Nezemeljsko življenje (revision 5386002) +Normativna kontrola (revision 5316351) +NowGamer (revision 5363253) +OS X (revision 5212452) Ognjena ekipa (revision 4694450) -Operacijski sistem (revision 4698515) -Ostrostrelec (revision 4529694) -Pilot (revision 4069093) -PlayStation 3 (revision 4382944) -PlayStation Network (revision 4382944) -PlayStation Vita (revision 3944025) -Pogon igre (revision 4622773) -Procesor (revision 4702518) -Producent videoiger (revision 4599904) -Razvijalec videoiger (revision 4093281) -Računalniška miška (revision 4385579) -Računalniška platforma (revision 4673669) -Severna Amerika (revision 4643798) +Operacijski sistem (revision 5309675) +Ostrostrelec (revision 4810396) +Pilot (revision 4758828) +PlayStation 3 (revision 5245525) +PlayStation Network (revision 4784984) +PlayStation Vita (revision 5245581) +Procesor (revision 5262718) +Producent videoiger (revision 5368686) +Razvijalec videoiger (revision 5171689) +Računalniška miška (revision 5169871) +Računalniško okolje (revision 5250619) +Severna Amerika (revision 5400891) Sid Meier (revision 4061487) Stealth (revision 4618630) -Steam (revision 4696215) -Strateška videoigra (revision 4236795) -Tablični računalnik (revision 4409985) -Take-Two Interactive (revision 4110089) -Telepatija (revision 4481192) -The Bureau: XCOM Declassified (revision 4704271) -The Guardian (revision 3929479) -Trdi disk (revision 4644623) -UFO: Enemy Unknown (revision 4704271) -Unreal Engine (revision 4622773) -Unreal Engine 3 (revision 4622773) -Uporabniški vmesnik (revision 4552473) -Valve Corporation (revision 4110105) +Steam (revision 5171704) +Strateška videoigra (revision 5245834) +Tablični računalnik (revision 5312221) +Telepatija (revision 4846742) +The Bureau: XCOM Declassified (revision 5360018) +The Guardian (revision 5361337) +Trdi disk (revision 5329681) +UFO: Enemy Unknown (revision 5360018) +Unreal Engine (revision 4771045) +Unreal Engine 3 (revision 4771045) +Uporabniški vmesnik (revision 5118420) +Valve Corporation (revision 5168680) Večigralska videoigra (revision 4618639) -VideoGamer.com (revision 4704271) -Vohunski satelit (revision 4215166) -Vojaška taktika (revision 3970259) -Vojaški čini (revision 4363026) +VideoGamer.com (revision 5363253) +Vohunski satelit (revision 5450401) +Vojaška taktika (revision 4759159) == End of Parsed pages == -- Wikipedia parsing ended at: 2016-09-28 22:06:46.133919 +- Wikipedia parsing ended at: 2021-03-16 20:20:05.416719 -41 characters appeared 411226 times. +41 characters appeared 318060 times. First 29 characters: -[ 0] Char a: 10.090315301075321 % -[ 1] Char e: 9.90477255815537 % -[ 2] Char i: 9.666703953543793 % -[ 3] Char o: 9.177921629468953 % -[ 4] Char n: 7.28309980400072 % -[ 5] Char r: 5.808241696779873 % -[ 6] Char s: 4.575586174025961 % -[ 7] Char t: 4.4963110309173056 % -[ 8] Char j: 4.343840126840229 % -[ 9] Char l: 4.2672399118732764 % -[10] Char v: 3.802775116359374 % -[11] Char p: 3.5216644861949393 % -[12] Char k: 3.5136397017698293 % -[13] Char d: 3.0387183689747244 % -[14] Char m: 2.9487435132992563 % -[15] Char z: 2.350775485985808 % -[16] Char u: 1.9719083910064055 % -[17] Char g: 1.9342162217369525 % -[18] Char b: 1.5392995579073308 % -[19] Char c: 1.2924766430138173 % -[20] Char h: 1.1864522184881305 % -[21] Char č: 1.137087635509428 % -[22] Char š: 0.6932927392723223 % -[23] Char ž: 0.45303555709026183 % -[24] Char f: 0.40707542811009034 % -[25] Char x: 0.19381070263067024 % -[26] Char y: 0.19040624863213904 % -[27] Char w: 0.18919037220409216 % -[28] Char q: 0.011186063138031156 % +[ 0] Char a: 10.018235553040308 % +[ 1] Char e: 9.988995786958435 % +[ 2] Char i: 9.602590706156072 % +[ 3] Char o: 9.246054203609381 % +[ 4] Char n: 7.188580770923725 % +[ 5] Char r: 5.758976293781048 % +[ 6] Char s: 4.588442432245488 % +[ 7] Char t: 4.5786958435515315 % +[ 8] Char l: 4.357668364459536 % +[ 9] Char j: 4.260202477519965 % +[10] Char v: 3.809344148902723 % +[11] Char p: 3.4980821228698984 % +[12] Char k: 3.4751304785260646 % +[13] Char d: 3.143746462931522 % +[14] Char m: 2.928692699490662 % +[15] Char z: 2.332893164811671 % +[16] Char u: 1.9908193422624663 % +[17] Char g: 1.9298245614035088 % +[18] Char b: 1.5607118153807458 % +[19] Char c: 1.2903225806451613 % +[20] Char h: 1.2145507137018172 % +[21] Char č: 1.1001068980695468 % +[22] Char š: 0.6841476450984091 % +[23] Char ž: 0.44331258253159783 % +[24] Char f: 0.4203609381877633 % +[25] Char w: 0.2021631138778847 % +[26] Char y: 0.19618939822674966 % +[27] Char x: 0.16726403823178018 % +[28] Char q: 0.011004213041564485 % -The first 29 characters have an accumulated ratio of 0.9998978663800442. +The first 29 characters have an accumulated ratio of 0.9998710935043701. -727 sequences found. +698 sequences found. -First 512 (typical positive ratio): 0.9983524317161332 -Next 512 (512-1024): 2.4317528560937295e-06 -Rest: -3.859759734048396e-17 +First 512 (typical positive ratio): 0.998296272473889 +Next 512 (512-1024): 0.004433125825315978 +Rest: -2.8189256484623115e-17 -- Processing end: 2016-09-28 22:06:46.601266 +- Processing end: 2021-03-16 20:20:05.900813 diff --git a/script/BuildLangModelLogs/LangSwedishModel.log b/script/BuildLangModelLogs/LangSwedishModel.log index 029e510..26104e1 100644 --- a/script/BuildLangModelLogs/LangSwedishModel.log +++ b/script/BuildLangModelLogs/LangSwedishModel.log @@ -1,151 +1,150 @@ = Logs of language model for Swedish (sv) = - Generated by BuildLangModel.py -- Started: 2016-09-28 22:26:37.221506 -- Maximum depth: 5 +- Started: 2021-03-16 20:20:06.144954 +- Maximum depth: 4 - Max number of pages: 100 == Parsed pages == -Kakapo (revision 36509929) -Akut hotad (revision 32517788) -Aotearoa (revision 36575359) -Art (revision 36771341) -Artepitet (revision 36771341) -Auckland (revision 35752058) -Auktorsnamn (revision 35976965) -BBC (revision 36508743) -Basalomsättning (revision 30567523) -Beilschmiedia tawa (revision 29101923) -Berguv (revision 36295501) -Betesmark (revision 34292168) -Biotop (revision 35528052) -BirdLife International (revision 36124283) -Bonaparte (revision 37325183) -British Museum (revision 36420244) -Bröstben (revision 30602527) -Dacrydium cupressinum (revision 32986501) -Digital object identifier (revision 27637223) -Djur (revision 37300775) -Djurpark (revision 37147093) -Domän (biologi) (revision 33377709) -Don Merton (revision 36509929) -Douglas Adams (revision 36556245) -Däggdjur (revision 37328286) -Ekologisk nisch (revision 33898643) -Ekosystem (revision 36598266) -Endemisk (revision 30647109) -Eukaryoter (revision 37095313) -Evolution (revision 37093592) -Familj (biologi) (revision 30280200) -Femininum (revision 30597527) -Fjäder (biologi) (revision 36364943) -Fjäderdräkt (revision 36364943) -Fladdermöss (revision 37307257) -Flygg (revision 36479633) -Frukter (revision 34088588) -Frö (revision 37333131) -Fågelläte (revision 34034723) -Fåglar (revision 37387306) -Fåglarnas liv (revision 36509929) -Genitiv (revision 37388438) -George Edward Grey (revision 36509929) -George Robert Gray (revision 20426710) -Haasts örn (revision 29175076) -Hauturu/Little Barrier Island (revision 36509929) -Hermelin (revision 36578682) -Hertz (revision 37104488) -Hjortdjur (revision 36493550) -Hund (revision 37351832) -Husdjur (revision 37384850) -Huskatt (revision 32922967) -Hāngi (revision 29609696) -IUCN (revision 30570280) -Iller (revision 30663158) -Infraröd (revision 36770733) -Internationella naturvårdsunionen (revision 30570280) -Jordbruk (revision 37352625) -Kahurangi National Park (revision 35956142) -Kamouflage (revision 36579595) -Kaniner (revision 36877621) -Kapiti Island (revision 37395588) -Katt (revision 36734686) -Kelp (revision 30312471) -Kivier (revision 36373234) -Klass (biologi) (revision 30280201) -Kroppsfett (revision 35066611) -Könsdimorfism (revision 30816932) -Könsfördelning (revision 24769321) -Lamm- och fårkött (revision 36187205) +Kakapo (revision 48946696) +Akut hotad (revision 45694757) +Aotearoa (revision 48764847) +Arkive (revision 45404194) +Art (revision 48819963) +Artepitet (revision 48819963) +Auckland (revision 48740415) +Auktorsnamn (revision 46648298) +BBC (revision 48945370) +Basalomsättning (revision 48638233) +Beilschmiedia tawa (revision 47662851) +Berguv (revision 47572081) +Betesmark (revision 47837257) +Biodiversity Heritage Library (revision 48152021) +Biotop (revision 48969696) +BirdLife International (revision 47616784) +British Museum (revision 48501908) +Bröstben (revision 48379566) +CITES (revision 47938046) +Dacrydium cupressinum (revision 47442085) +Digital object identifier (revision 47511062) +Djur (revision 48964290) +Djurpark (revision 48242363) +Domän (biologi) (revision 48975224) +Don Merton (revision 48407169) +Douglas Adams (revision 47251802) +Däggdjur (revision 48794669) +Ekologisk nisch (revision 48844778) +Ekosystem (revision 48570659) +Endemisk (revision 48546826) +Eukaryoter (revision 48898436) +Evolution (revision 49003401) +Familj (biologi) (revision 48771961) +Femininum (revision 46628147) +Fjäder (biologi) (revision 48641138) +Fjäderdräkt (revision 48641138) +Fladdermöss (revision 48746998) +Flygg (revision 48763776) +Fossilworks (revision 43519389) +Frukter (revision 48807025) +Frö (revision 46332448) +Fylum (revision 48212330) +Fågelläte (revision 48681377) +Fåglar (revision 48837894) +Fåglarnas liv (revision 48837894) +Genitiv (revision 48658908) +George Edward Grey (revision 46365447) +George Robert Gray (revision 43056128) +Global Biodiversity Information Facility (revision 40116158) +Haasts örn (revision 48440980) +Hauturu/Little Barrier Island (revision 20537378) +Hermelin (revision 48863152) +Hertz (revision 48548540) +Hjortdjur (revision 48740321) +Hund (revision 48989960) +Husdjur (revision 48155297) +Huskatt (revision 47647609) +Hāngi (revision 46574175) +IUCN (revision 49006187) +Iller (revision 48765500) +Inaturalist (revision 48552803) +Infraröd (revision 48615998) +Integrated Taxonomic Information System (revision 48591706) +Internationella naturvårdsunionen (revision 49006187) +Internet Archive (revision 48979443) +Jordbruk (revision 48448896) +Kahurangi National Park (revision 47659423) +Kamouflage (revision 47671382) +Kaniner (revision 48911042) +Kapiti Island (revision 48553791) +Katt (revision 48986224) +Kelp (revision 46077553) +Kivier (revision 48467049) +Klass (biologi) (revision 44944834) +Kroppsfett (revision 39272827) +Könsdimorfism (revision 48346350) +Könsfördelning (revision 45646592) +Lamm- och fårkött (revision 48351109) Lek (fortplantningsbeteende) (revision 30508235) -Mandel (revision 36577529) -Maori (revision 32560474) -Maorier (revision 35862066) -Maoripapegojor (revision 36545138) -Mark Carwardine (revision 20375916) -Markpapegoja (revision 36295722) -Maskulinum (revision 32704551) -Masterton (revision 29859631) -Metrosideros umbellata (revision 29071212) -Milford Sound (revision 20284758) -Morrhår (revision 36533839) -Muskelmage (revision 31196380) -Mustela (revision 20934105) -Mårddjur (revision 37306347) -Māori (revision 32560474) -NHNZ (revision 36509929) -Nattpapegoja (revision 33486517) -Nordön (revision 24810231) -Nya Zeeland (revision 36575359) -Näbb (revision 23648463) -Ollonår (revision 36509929) -Ordning (biologi) (revision 30280196) +Mandel (revision 48952857) +Maori (revision 48297968) +Maorier (revision 48066510) +Maoripapegojor (revision 46078328) +Mark Carwardine (revision 48869810) +Markpapegoja (revision 47342275) +Maskulinum (revision 46628162) +Masterton (revision 48262093) +Metrosideros umbellata (revision 46936435) +Milford Sound (revision 45323524) +Morrhår (revision 48980591) +Muskelmage (revision 41849238) +Mustela (revision 48294935) +Mårddjur (revision 48435918) == End of Parsed pages == -- Wikipedia parsing ended at: 2016-09-28 22:29:21.480287 +- Wikipedia parsing ended at: 2021-03-16 20:24:13.933499 -48 characters appeared 594415 times. +49 characters appeared 513356 times. -First 31 characters: -[ 0] Char a: 10.070741821791172 % -[ 1] Char e: 9.737136512369304 % -[ 2] Char r: 9.110638190489809 % -[ 3] Char n: 8.378826240925951 % -[ 4] Char t: 7.481305148759705 % -[ 5] Char s: 5.828587771169974 % -[ 6] Char i: 5.359891658184939 % -[ 7] Char l: 5.173489901836259 % -[ 8] Char o: 4.694195133029954 % -[ 9] Char d: 4.597293136949774 % -[10] Char k: 3.297359588839447 % -[11] Char m: 3.1898589369379975 % -[12] Char g: 3.004466576381821 % -[13] Char v: 2.2324470277499726 % -[14] Char f: 2.1988005013332437 % -[15] Char p: 2.06017681249632 % -[16] Char u: 2.0499146219392173 % -[17] Char ä: 2.0475593650900468 % -[18] Char h: 2.028380845032511 % -[19] Char å: 1.5443755625278637 % -[20] Char c: 1.442594820117258 % -[21] Char ö: 1.3515809661600062 % -[22] Char b: 1.268642278542769 % -[23] Char j: 0.7302978558751041 % -[24] Char y: 0.6699023409570755 % -[25] Char x: 0.2111319532649748 % -[26] Char w: 0.10262190557102362 % -[27] Char z: 0.09151855185350302 % -[28] Char é: 0.021197311642539303 % -[29] Char ā: 0.011103353717520588 % -[30] Char q: 0.007570468443764037 % +First 30 characters: +[ 0] Char a: 9.801969783152433 % +[ 1] Char e: 9.753075838209742 % +[ 2] Char r: 9.263357202409244 % +[ 3] Char n: 8.249635730370347 % +[ 4] Char t: 7.409088429861539 % +[ 5] Char s: 6.03207131113691 % +[ 6] Char i: 5.692346052252238 % +[ 7] Char l: 5.428981057979258 % +[ 8] Char o: 4.548890049010823 % +[ 9] Char d: 4.4466218374773065 % +[10] Char m: 3.3119316809387636 % +[11] Char k: 3.0742798369942106 % +[12] Char g: 3.073890243807416 % +[13] Char f: 2.2676271437365103 % +[14] Char v: 2.2645103982421557 % +[15] Char u: 2.116464987260303 % +[16] Char ä: 2.0311440793523405 % +[17] Char h: 1.9354989519943275 % +[18] Char p: 1.8753068046346004 % +[19] Char å: 1.4903887360817833 % +[20] Char c: 1.4510398242155542 % +[21] Char b: 1.3084487178488222 % +[22] Char ö: 1.2946181597176227 % +[23] Char j: 0.7221109717233265 % +[24] Char y: 0.6866579917250407 % +[25] Char x: 0.22323689603316216 % +[26] Char w: 0.12096868449964547 % +[27] Char z: 0.07947701010604727 % +[28] Char é: 0.01577852406517115 % +[29] Char q: 0.013635761537802226 % -The first 31 characters have an accumulated ratio of 0.999936071599808. +The first 30 characters have an accumulated ratio of 0.9998305269637442. -748 sequences found. +752 sequences found. -First 512 (typical positive ratio): 0.997323508584682 -Next 512 (512-1024): 1.6823263208364526e-06 -Rest: 1.7780915628762273e-17 +First 512 (typical positive ratio): 0.996987580875875 +Next 512 (512-1024): 0.012946181597176228 +Rest: 4.640385298237959e-17 -- Processing end: 2016-09-28 22:29:21.590354 +- Processing end: 2021-03-16 20:24:14.019931 diff --git a/script/BuildLangModelLogs/LangThaiModel.log b/script/BuildLangModelLogs/LangThaiModel.log index b7024c3..ea7437c 100644 --- a/script/BuildLangModelLogs/LangThaiModel.log +++ b/script/BuildLangModelLogs/LangThaiModel.log @@ -1,141 +1,192 @@ = Logs of language model for Thai (th) = - Generated by BuildLangModel.py -- Started: 2015-12-04 03:01:52.148282 -- Maximum depth: 3 -- Max number of pages: 50 +- Started: 2021-03-16 20:24:14.258574 +- Maximum depth: 4 +- Max number of pages: 100 == Parsed pages == -หน้าหลัก (revision 5512633) -26 พฤศจิกายน (revision 5570053) -27 พฤศจิกายน (revision 5888433) -28 พฤศจิกายน (revision 6110206) -กล้องโทรทรรศน์อวกาศฮับเบิล (revision 5830742) -การประชุมสภาสงฆ์แห่งแคลมงต์ (revision 5463877) -ความเอนเอียงเพื่อยืนยัน (revision 6231756) -คัมภีร์พระเวท (revision 6109417) -คาบสมุทรไซนาย (revision 5661104) -จักรวรรดิโรมันตะวันออก (revision 6150148) -ชาวมุสลิม (revision 6242838) -ซุคฮอย ซู-24 (revision 6015891) -ดาวอังคาร (revision 6235017) -ดาวเคราะห์นอกระบบ (revision 5823077) -ดินแดนศักดิ์สิทธิ์ (revision 6179072) -ทฤษฎี (revision 5606447) -ทะกะอะกิ คะจิตะ (revision 6177601) -ท่าอากาศยานนานาชาติตริภูวัน (revision 6010470) -นกกาเหว่า (revision 6142782) -ประเทศอัฟกานิสถาน (revision 6216996) -ประเทศเนปาล (revision 6206980) -ปรากฏการณ์การวางกรอบ (revision 6046655) -ปารีส (revision 6222115) -พ.ศ. 1638 (revision 4723508) -พ.ศ. 2438 (revision 5737055) -พ.ศ. 2515 (revision 6197082) -พ.ศ. 2544 (revision 6189598) -พินัยกรรม (revision 5607889) -มูลนิธิวิกิมีเดีย (revision 5816103) -ระบบสุริยะ (revision 6201228) -รางวัลโนเบล (revision 5828030) -รางวัลโนเบลสาขาฟิสิกส์ (revision 6177103) -รายชื่อบทความวันนี้ในอดีต (revision 5410610) -ลักกีสไตรก์ (เพลง) (revision 6195816) -ลุฟต์ฮันซา (revision 6116038) -วิกฤตการณ์ผู้ย้ายถิ่นยุโรป (revision 6219634) -วิกิพีเดีย (revision 6086299) -วิกิพีเดียภาษาไทย (revision 6209148) -สงครามครูเสด (revision 6228828) -สงครามอังกฤษ–แซนซิบาร์ (revision 5829349) -สติ (จิตวิทยา) (revision 6039161) -สมมติฐาน (revision 6221744) -สมเด็จพระราชินีมารีแห่งโรมาเนีย (revision 6211695) -สมเด็จพระสันตะปาปาเออร์บันที่ 2 (revision 5828365) -สารานุกรม (revision 6070482) -อัลเฟรด โนเบล (revision 6214514) -อาร์เธอร์ แมคโดนัลด์ (revision 6188035) -เซนต์ปีเตอร์สเบิร์ก (revision 6162201) -เทือกเขาฮินดูกูช (revision 5218921) -เนื้อหาเสรี (revision 6160507) +หน้าหลัก (revision 9018985) +14 มีนาคม (revision 9303173) +15 มีนาคม (revision 8874275) +16 มีนาคม (revision 9306392) +กองทัพพม่า (revision 9251263) +การปฏิวัติเดือนกุมภาพันธ์ (revision 8956448) +การประท้วงในประเทศพม่า พ.ศ. 2564 (revision 9304791) +การประท้วงในประเทศรัสเซีย พ.ศ. 2564 (revision 9236735) +การระบาดทั่วของโควิด-19 (revision 9289675) +การระบาดทั่วของโควิด-19 ในประเทศไทย (revision 9305901) +การระบาดทั่วของไวรัสโคโรนา พ.ศ. 2562–2563 เรียงตามประเทศและดินแดน (revision 9284827) +การลอบสังหารจูเลียส ซีซาร์ (revision 8174866) +จักรพรรดินิโคลัสที่ 2 แห่งรัสเซีย (revision 9299709) +จักรวรรดิรัสเซีย (revision 9260038) +จังหวัดสมุทรสาคร (revision 9227141) +จูเลียส ซีซาร์ (revision 9304624) +ชาวพม่าในไทย (revision 9289634) +ซีแอตเทิล (revision 9295680) +นิวซีแลนด์ (revision 9099286) +บาตา (ประเทศอิเควทอเรียลกินี) (revision 8750850) +บุคคลที่เสียชีวิตในปี พ.ศ. 2564 (revision 9306385) +บูโพรพิออน (revision 9180305) +ประเทศอิเควทอเรียลกินี (revision 9043997) +ผลกระทบทางเศรษฐกิจและสังคมของการระบาดทั่วของไวรัสโคโรนา พ.ศ. 2562–2563 (revision 9289675) +ผู้เผด็จการโรมัน (revision 6381320) +พ.ศ. 2435 (revision 9191544) +พ.ศ. 2460 (revision 9193829) +พ.ศ. 2515 (revision 9293724) +พ.ศ. 2554 (revision 9304980) +พ.ศ. 500 (revision 5354846) +พระคเณศ (revision 9259789) +ฟรานซิส ฟอร์ด คอปโปลา (revision 8699907) +ฟุตบอล (revision 9267162) +มหาวิทยาลัยเคมบริดจ์ (revision 9278720) +มาริโอ พูโซ (revision 4707337) +มาร์คัส จูนิอัส บรูตัสผู้ลูก (revision 7223903) +มาร์เกเรเธ ซัมบีเรีย (revision 9294258) +มูลนิธิวิกิมีเดีย (revision 9155482) +รัฐประหารในประเทศพม่า พ.ศ. 2564 (revision 9293480) +รัฐอุตตราขัณฑ์ (revision 9246094) +รางวัลโนเบลสาขาสรีรวิทยาหรือการแพทย์ (revision 9088756) +ราชวงศ์โรมานอฟ (revision 8702698) +รายชื่อบทความวันนี้ในอดีต (revision 8925803) +วลาดีมีร์ ปูติน (revision 9137037) +วัคซีนโรคติดเชื้อไวรัสโคโรนา 2019 (revision 9297189) +วิกิพีเดีย (revision 9235310) +วิกิพีเดียภาษาไทย (revision 9176821) +วุฒิสภาโรมัน (revision 9281945) +ศกุนตลา เทวี (revision 9296935) +สงครามกลางเมืองซีเรีย (revision 8541828) +สถาปัตยกรรมกอทิก (revision 8232804) +สถาปัตยกรรมฟื้นฟูกอทิก (revision 6453482) +สหรัฐ (revision 9288976) +สาธารณรัฐโรมัน (revision 9050973) +สารานุกรม (revision 9290003) +สโมสรฟุตบอลบีจี ปทุม ยูไนเต็ด (revision 9292580) +สโมสรฟุตบอลลิเวอร์พูล (revision 9262545) +อองซานซูจี (revision 9292643) +อะเลกเซย์ นาวัลนืย (revision 9230310) +อาหรับสปริง (revision 8171494) +อิตส์อะวันเดอร์ฟูลไลฟ์ (revision 9291334) +อุทกภัยจากธารน้ำแข็งแตกในรัฐอุตตราขัณฑ์ พ.ศ. 2564 (revision 9300387) +อู่ เหลียนเต๋อ (revision 9295504) +เดนมาร์ก (revision 9103140) +เดอะก็อดฟาเธอร์ (revision 8942413) +เดอะก็อดฟาเธอร์ (นวนิยาย) (revision 4707337) +เนื้อหาเสรี (revision 9063375) +เบนจามิน เมานต์ฟอร์ต (revision 8820016) +เบอร์มิงแฮม (revision 8949103) +เหตุระเบิดที่บาตา พ.ศ. 2564 (revision 9301940) +เอจออฟเอ็มไพร์ส (revision 8812026) +แคว้นแคนเทอร์เบอรี (revision 8763458) +แผ่นดินไหวในเกาะซูลาเวซี พ.ศ. 2564 (revision 9213896) +โรคติดเชื้อไวรัสโคโรนา 2019 (revision 9303763) +ไครสต์เชิร์ช (revision 9065152) +ไทยลีก ฤดูกาล 2563–64 (revision 9306310) +ไวรัสโคโรนาสายพันธุ์ใหม่ (SARS-CoV-2) (revision 9239363) +0 มกราคม (revision 8811984) +10 กรกฎาคม (revision 9204508) +10 กันยายน (revision 9223073) +10 กุมภาพันธ์ (revision 8791647) +10 ตุลาคม (revision 9299190) +10 ธันวาคม (revision 9187465) +10 พฤศจิกายน (revision 9255261) +10 พฤษภาคม (revision 9293733) +10 มกราคม (revision 9256728) +10 มิถุนายน (revision 8950621) +10 มีนาคม (revision 9296320) +10 สิงหาคม (revision 9287893) +10 เมษายน (revision 9239957) +11 กรกฎาคม (revision 9272225) +11 กันยายน (revision 9263121) +11 กุมภาพันธ์ (revision 9255762) +11 ตุลาคม (revision 8872097) +11 ธันวาคม (revision 9299195) +11 พฤศจิกายน (revision 9301626) +11 พฤษภาคม (revision 9295172) +11 มกราคม (revision 9273530) +11 มิถุนายน (revision 9261737) +11 มีนาคม (revision 9204281) +11 สิงหาคม (revision 9281431) == End of Parsed pages == -- Wikipedia parsing ended at: 2015-12-04 03:05:06.181487 +- Wikipedia parsing ended at: 2021-03-16 20:29:56.645650 -105 characters appeared 401052 times. +106 characters appeared 708244 times. First 64 characters: -[ 0] Char า: 8.857704237854442 % -[ 1] Char น: 6.7679502907353655 % -[ 2] Char ร: 6.739026360671434 % -[ 3] Char ก: 5.388079351306065 % -[ 4] Char อ: 5.099837427565503 % -[ 5] Char ง: 4.861713692987443 % -[ 6] Char เ: 4.5198627609387305 % -[ 7] Char ม: 4.133628556895365 % -[ 8] Char ว: 3.864336794231172 % -[ 9] Char ด: 3.3152808114658447 % -[10] Char ย: 3.195844927839781 % -[11] Char ล: 3.1312647736453125 % -[12] Char ท: 2.69615910156289 % -[13] Char ส: 2.6001615750575984 % -[14] Char ะ: 2.392457835891605 % -[15] Char ค: 2.384229476476866 % -[16] Char บ: 2.3321165335168503 % -[17] Char ต: 2.196473275285998 % -[18] Char ห: 1.983782651626223 % -[19] Char ป: 1.9192024974317545 % -[20] Char แ: 1.7813151411787 % -[21] Char จ: 1.76261432432702 % -[22] Char พ: 1.5075351824701035 % -[23] Char ข: 1.3519443862641254 % -[24] Char ใ: 1.3295034060421091 % -[25] Char ไ: 1.2227840778751882 % -[26] Char ช: 1.0407627938521689 % -[27] Char โ: 0.9382823175049619 % -[28] Char ศ: 0.8078752879925796 % -[29] Char ำ: 0.7393056262030859 % -[30] Char ถ: 0.599672860377208 % -[31] Char ซ: 0.541076967575277 % -[32] Char e: 0.43734977010462484 % -[33] Char ผ: 0.43585370475649043 % -[34] Char ณ: 0.4019428901987772 % -[35] Char a: 0.3897250231890129 % -[36] Char i: 0.3657879776188624 % -[37] Char ษ: 0.3647906007201061 % -[38] Char ภ: 0.34185093204871186 % -[39] Char ธ: 0.3181632307032505 % -[40] Char o: 0.3176645422538723 % -[41] Char n: 0.3139243788835363 % -[42] Char ญ: 0.29248077556027646 % -[43] Char r: 0.28350438347147006 % -[44] Char t: 0.2705384837876385 % -[45] Char s: 0.2488455362396896 % -[46] Char l: 0.19598456060560726 % -[47] Char ฟ: 0.19473783948216192 % -[48] Char c: 0.16356981139602844 % -[49] Char ฐ: 0.15833358267755804 % -[50] Char ฤ: 0.15284800973439852 % -[51] Char ๆ: 0.14910784636406252 % -[52] Char d: 0.13090571796176056 % -[53] Char ฮ: 0.1244227681198448 % -[54] Char h: 0.12043326052481973 % -[55] Char u: 0.12043326052481973 % -[56] Char m: 0.09599752650529109 % -[57] Char y: 0.08951457666337533 % -[58] Char ฏ: 0.08677179019179557 % -[59] Char p: 0.08253293837208142 % -[60] Char f: 0.08153556147332515 % -[61] Char S: 0.07604998853016566 % -[62] Char ฝ: 0.07330720205858592 % -[63] Char ฉ: 0.0673229406660483 % +[ 0] Char า: 8.374373803378496 % +[ 1] Char น: 7.171539751836938 % +[ 2] Char ร: 6.9999887044577855 % +[ 3] Char ก: 5.42581370262226 % +[ 4] Char อ: 5.080028916588068 % +[ 5] Char เ: 4.61507616019338 % +[ 6] Char ง: 4.240911324345847 % +[ 7] Char ม: 4.100846600888959 % +[ 8] Char ว: 3.364377248518872 % +[ 9] Char ย: 3.31594761127521 % +[10] Char ล: 3.300981017841309 % +[11] Char ด: 2.779550550375294 % +[12] Char ส: 2.7203901480280805 % +[13] Char ท: 2.6363795528094838 % +[14] Char ต: 2.4035501889179436 % +[15] Char ค: 2.3338002157448563 % +[16] Char ะ: 2.3099383828172213 % +[17] Char บ: 2.1609784198665998 % +[18] Char ป: 2.104077125962239 % +[19] Char แ: 1.9566703000660788 % +[20] Char ห: 1.8754835904010483 % +[21] Char พ: 1.6814826528710445 % +[22] Char จ: 1.4730798990178526 % +[23] Char ช: 1.385680641134976 % +[24] Char ใ: 1.3149423080181406 % +[25] Char ข: 1.2114469024799364 % +[26] Char ศ: 1.095808789061397 % +[27] Char โ: 1.0651696308052028 % +[28] Char ไ: 1.045967209040952 % +[29] Char ซ: 0.7435290662540028 % +[30] Char ำ: 0.6989116745076556 % +[31] Char ผ: 0.550375294390069 % +[32] Char ถ: 0.47314202450003107 % +[33] Char ธ: 0.461422899452731 % +[34] Char ภ: 0.42386522159029943 % +[35] Char ณ: 0.4122872908206776 % +[36] Char ษ: 0.40988698810014623 % +[37] Char a: 0.4049451883814053 % +[38] Char e: 0.38193052111984005 % +[39] Char i: 0.33717193509581445 % +[40] Char ฐ: 0.31359249072353595 % +[41] Char ญ: 0.29749634306820816 % +[42] Char n: 0.29213096051643217 % +[43] Char ฟ: 0.29071901773964903 % +[44] Char o: 0.28874229785215266 % +[45] Char r: 0.2702458474762935 % +[46] Char t: 0.2569735853745319 % +[47] Char s: 0.19682482308357005 % +[48] Char l: 0.17070388171308193 % +[49] Char h: 0.13385217523904192 % +[50] Char u: 0.12919276407565755 % +[51] Char c: 0.12834559840958765 % +[52] Char ฮ: 0.12269782730245507 % +[53] Char ฤ: 0.11690886191764421 % +[54] Char d: 0.1139437820863996 % +[55] Char ฉ: 0.10886078808998029 % +[56] Char S: 0.1009539085399947 % +[57] Char C: 0.09883599437481996 % +[58] Char m: 0.09544733171054044 % +[59] Char ฏ: 0.08005715544360419 % +[60] Char ๆ: 0.07906879549985599 % +[61] Char y: 0.0773744641677162 % +[62] Char ฝ: 0.07412699578111498 % +[63] Char ฒ: 0.07059713883915714 % -The first 64 characters have an accumulated ratio of 0.989480167160368. +The first 64 characters have an accumulated ratio of 0.9874944228260318. -2324 sequences found. +2704 sequences found. -First 512 (typical positive ratio): 0.8815720594354438 -Next 512 (512-1024): 7.480326740672033e-06 -Rest: 0.026341928296264486 +First 512 (typical positive ratio): 0.8690353564146914 +Next 512 (512-1024): 0.0007906879549985598 +Rest: 0.03156084221511464 -- Processing end: 2015-12-04 03:05:06.800467 +- Processing end: 2021-03-16 20:29:57.119132 diff --git a/script/BuildLangModelLogs/LangTurkishModel.log b/script/BuildLangModelLogs/LangTurkishModel.log index 51b31ad..b683c86 100644 --- a/script/BuildLangModelLogs/LangTurkishModel.log +++ b/script/BuildLangModelLogs/LangTurkishModel.log @@ -1,113 +1,161 @@ = Logs of language model for Turkish (tr) = - Generated by BuildLangModel.py -- Started: 2015-12-04 02:22:03.929245 -- Maximum depth: 3 -- Max number of pages: 50 +- Started: 2021-03-16 20:29:57.369383 +- Maximum depth: 4 +- Max number of pages: 100 == Parsed pages == -Ana_Sayfa (revision 16293313) -1048 (revision 12894005) -1131 (revision 14840814) -16. yüzyıl (revision 15185081) -1859 (revision 16014427) -1866 (revision 16120346) -1869 (revision 12888270) -1892 (revision 13955858) -1895 (revision 15334635) -1902 (revision 16283638) -1906 (revision 15874323) -1918 (revision 16099474) -1926 (revision 16180584) -1927 (revision 15370980) -1940 (revision 15370990) -1943 (revision 16091797) -1944 (revision 16247827) -1945 (revision 16281147) -1948 (revision 15443886) -1961 (revision 15799529) -1964 (revision 16085332) -1975 (revision 15006928) -1980 (revision 16213240) -1981 (revision 16295456) -1983 (revision 16327128) -1993 (revision 16300456) -2002 (revision 16297206) -2015 (revision 16328338) -24 Ekim (revision 16213661) -4 Aralık (revision 16341162) -ABD (revision 16325951) -ABD Senatosu (revision 15970439) -Adam Horowitz (revision 14362106) -Akçe (revision 16261547) -Altın Takım (revision 13503001) -American Broadcasting Company (revision 16055235) -Amerika Birleşik Devletleri (revision 16325951) -Ana Sayfa/Kardeş projeler (revision 16293313) -Ana Sayfa/Kategoriler (revision 16293313) -Aptullah Kuran (revision 15744893) -Avrupa (revision 16299756) -Ayasofya (revision 16305207) -BM Güvenlik Konseyi (revision 16085518) -Birleşmiş Milletler (revision 16258474) -Budapeşte (revision 16219173) -CIA (revision 16054325) -Charlie Pace (revision 16129416) -Cuma (revision 14197127) -Desmond Hume (revision 16035300) -Diğerleri (Lost) (revision 16329444) +Ana_Sayfa (revision 25131171) +15 Mart (revision 25133274) +16 Mart (revision 25130723) +17 Mart (revision 25101714) +1920 (revision 24886521) +1921 (revision 24934034) +1926 (revision 24937098) +1968 (revision 25060729) +2003 (revision 25043871) +Abdullah Cevdet (revision 25117345) +Afganistan (revision 25053860) +Albanya (revision 25130585) +Anaheim, Kaliforniya (revision 25012994) +Azerbaycan Yahudileri (revision 25132094) +Georg Ohm (revision 24888782) +Haldun Taner (revision 25064462) +Hazar Kağanlığı (revision 25113376) +Interscope Records (revision 24937048) +Kaliforniya (revision 25130601) +Kamil Rıfkı Urga (revision 25105741) +Kuzey Lefkoşa (revision 24753125) +Kâbil (revision 24861920) +Latin Grammy Ödülleri (revision 22281504) +Lefkoşa (revision 24897461) +Moskova Antlaşması (revision 25031021) +Mustafa Kemal Atatürk (revision 25133394) +My Lai Katliamı (revision 25132972) +Nar (revision 25023035) +Natalia Oreiro (revision 25131895) +No Doubt (revision 24925807) +Osmanlı-Venedik Savaşı (1570-1573) (revision 24483832) +Osmanlı İmparatorluğu (revision 25136006) +Rachel Corrie (revision 24929876) +Robert H. Goddard (revision 24930216) +Rock müzik (revision 24864552) +Selimiye, Lefkoşa (revision 24306825) +Selimiye Meydanı (revision 24185756) +Selma Lagerlöf (revision 25097031) +Sovyetler Birliği (revision 25004103) +Sıcak çikolata (revision 24978056) +The Beacon Street Collection (revision 24950711) +Türbe (revision 25041350) +Türkiye Büyük Millet Meclisi (revision 25113834) +Türkçe (revision 25069652) +Vietnam Savaşı (revision 24942314) +Vikipedi (revision 25130148) +Yılın günleri listesi (revision 24802413) +Ziya Gökalp (revision 24942014) +Özgür içerik (revision 24349743) +İstanbul (revision 25106647) +İtilaf Devletleri (revision 25043005) +İttihat ve Terakki (revision 25125484) +İttik Dede Türbesi (revision 25133559) +0 Mart (revision 24329470) +0 Ocak (revision 23186786) +10 Aralık (revision 24772485) +10 Ağustos (revision 24980345) +10 Ekim (revision 24850081) +10 Eylül (revision 25090510) +10 Haziran (revision 25121277) +10 Kasım (revision 24973976) +10 Mart (revision 25105572) +10 Mayıs (revision 25120763) +10 Nisan (revision 25021557) +10 Ocak (revision 25093298) +10 Temmuz (revision 24907247) +10 Şubat (revision 25005286) +11 Aralık (revision 24822783) +11 Ağustos (revision 24750760) +11 Ekim (revision 25021451) +11 Eylül (revision 24878760) +11 Haziran (revision 24946135) +11 Kasım (revision 24751390) +11 Mart (revision 25101669) +11 Mayıs (revision 25123240) +11 Nisan (revision 25114265) +11 Ocak (revision 25121144) +11 Temmuz (revision 25018276) +11 Şubat (revision 25044631) +12 Aralık (revision 25120395) +12 Ağustos (revision 24964866) +12 Ekim (revision 24822300) +12 Eylül (revision 25105547) +12 Haziran (revision 24891411) +12 Kasım (revision 25105520) +12 Mart (revision 25105618) +12 Mayıs (revision 25084509) +12 Nisan (revision 25133262) +12 Ocak (revision 25105557) +12 Temmuz (revision 25132218) +12 Şubat (revision 25121399) +13 Aralık (revision 24801826) +13 Ağustos (revision 25136701) +13 Ekim (revision 25121155) +13 Eylül (revision 24750978) +13 Haziran (revision 24815847) +13 Kasım (revision 25084464) +13 Mart (revision 25125469) +13 Mayıs (revision 24897682) +13 Nisan (revision 25084441) +13 Ocak (revision 24756340) == End of Parsed pages == -- Wikipedia parsing ended at: 2015-12-04 02:24:44.728803 +- Wikipedia parsing ended at: 2021-03-16 20:34:51.082747 -48 characters appeared 267623 times. +54 characters appeared 913820 times. -First 36 characters: -[ 0] Char a: 12.311722086666691 % -[ 1] Char e: 8.716365932673948 % -[ 2] Char i: 8.507863673899479 % -[ 3] Char n: 7.322987934519828 % -[ 4] Char r: 6.979220769515326 % -[ 5] Char l: 6.609297407173524 % -[ 6] Char ı: 4.514933320379788 % -[ 7] Char d: 4.3475336574210734 % -[ 8] Char t: 4.2634601659797555 % -[ 9] Char k: 4.240293248338147 % -[10] Char s: 3.929781819948211 % -[11] Char m: 3.429451130881875 % -[12] Char u: 3.0998830444319063 % -[13] Char y: 2.9212735826143494 % -[14] Char o: 2.7135186437638024 % -[15] Char b: 2.3129551645411643 % -[16] Char ü: 1.8305601536489764 % -[17] Char ş: 1.5988909772328985 % -[18] Char z: 1.2267256551193282 % -[19] Char h: 1.1983274980102607 % -[20] Char v: 1.194964558352608 % -[21] Char c: 1.143773143563894 % -[22] Char g: 1.1004285879763698 % -[23] Char p: 1.0178497363828969 % -[24] Char ç: 0.8295251155543433 % -[25] Char ğ: 0.8205572764672693 % -[26] Char f: 0.7047226882592303 % -[27] Char ö: 0.6710932916827029 % -[28] Char j: 0.1296600068006113 % -[29] Char w: 0.11359262843627041 % -[30] Char â: 0.07846859201189733 % -[31] Char î: 0.04147625577771716 % -[32] Char x: 0.024287897527492032 % -[33] Char é: 0.014946398478456635 % -[34] Char q: 0.01083613889688106 % -[35] Char û: 0.009341499049035397 % +First 33 characters: +[ 0] Char a: 12.104681447112123 % +[ 1] Char e: 8.960189096320939 % +[ 2] Char i: 8.522575561926857 % +[ 3] Char n: 7.2878685080212735 % +[ 4] Char r: 6.9632969293734 % +[ 5] Char l: 6.837889299862117 % +[ 6] Char ı: 4.501433542710818 % +[ 7] Char k: 4.343196690814383 % +[ 8] Char t: 4.3038016239522 % +[ 9] Char d: 4.30150357838524 % +[10] Char s: 3.781816988028277 % +[11] Char m: 3.4274802477511983 % +[12] Char u: 3.119761003261036 % +[13] Char y: 2.903635289225449 % +[14] Char o: 2.639688341248824 % +[15] Char b: 2.1207677660808475 % +[16] Char ü: 1.8651375544417939 % +[17] Char ş: 1.4568514587117813 % +[18] Char v: 1.4549911361099561 % +[19] Char h: 1.216869843076317 % +[20] Char z: 1.1867763892232606 % +[21] Char g: 1.1811954214177847 % +[22] Char c: 1.125714035586877 % +[23] Char p: 0.8964566325972293 % +[24] Char ç: 0.8571709964763301 % +[25] Char ö: 0.7883390602087939 % +[26] Char ğ: 0.7411744107154582 % +[27] Char f: 0.7040773894202359 % +[28] Char j: 0.13821102624149176 % +[29] Char w: 0.07933728743078505 % +[30] Char â: 0.05865487732813902 % +[31] Char î: 0.03994222056860213 % +[32] Char û: 0.028014269768663416 % -The first 36 characters have an accumulated ratio of 0.99980569681978. +The first 33 characters have an accumulated ratio of 0.9993849992339848. -935 sequences found. +1097 sequences found. -First 512 (typical positive ratio): 0.991865243864388 -Next 512 (512-1024): 3.7365996196141585e-06 -Rest: 2.949029909160572e-17 +First 512 (typical positive ratio): 0.9923593121944019 +Next 512 (512-1024): 0.014568514587117814 +Rest: 9.536163614441446e-05 -- Processing end: 2015-12-04 02:24:44.883537 +- Processing end: 2021-03-16 20:34:51.176659 diff --git a/script/BuildLangModelLogs/LangVietnameseModel.log b/script/BuildLangModelLogs/LangVietnameseModel.log index 6732b1a..1c111ad 100644 --- a/script/BuildLangModelLogs/LangVietnameseModel.log +++ b/script/BuildLangModelLogs/LangVietnameseModel.log @@ -1,121 +1,179 @@ = Logs of language model for Vietnamese (vi) = - Generated by BuildLangModel.py -- Started: 2016-02-13 03:37:17.480303 -- Maximum depth: 3 -- Max number of pages: 40 +- Started: 2021-03-16 20:34:51.373194 +- Maximum depth: 4 +- Max number of pages: 100 == Parsed pages == -Chữ_Quốc_ngữ (revision 22887853) -1651 (revision 21455247) -1773 (revision 21354755) -1815 (revision 21361292) -1838 (revision 21361314) -1865 (revision 21361338) -1869 (revision 21361342) -1888 (revision 21389506) -1902 (revision 21354811) -1918 (revision 21354828) -1919 (revision 21354829) -1938 (revision 21354849) -1945 (revision 21354857) -22 tháng 2 (revision 21376086) -26 tháng 11 (revision 22579845) -28 tháng 12 (revision 22475308) -A (revision 22549334) -ASCII (revision 22528409) -Alexandre de Rhodes (revision 22859954) -Antonio Barbosa (revision 22145269) -B (revision 22836557) -BBC (revision 22863903) -Biên khảo (revision 22531516) -Bán nguyên âm (revision 22655600) -Bình luận (revision 22117664) -Bảng chữ cái Bồ Đào Nha (revision 22887853) -Bảng chữ cái Hy Lạp (revision 21362081) -Bảng chữ cái Latinh (revision 22442448) -Bắc Kỳ (revision 22393289) -Bồ Đào Nha (revision 22620858) -C (revision 21341881) -Cao Xuân Dục (revision 22620201) -Chính tả (revision 22187359) -Chính tả tiếng Việt (revision 20897580) -Chữ Hán (revision 22889609) -Chữ Nôm (revision 22781506) -Chữ cái (revision 22169220) -Công giáo (revision 22173119) -D (revision 21447691) +Chữ_Quốc_ngữ (revision 64521024) +1651 (revision 26251708) +1838 (revision 63252802) +1865 (revision 64100421) +1869 (revision 59848285) +1888 (revision 64474933) +1902 (revision 64405865) +1918 (revision 64446780) +1919 (revision 64400438) +1938 (revision 63147818) +22 tháng 2 (revision 64199177) +26 tháng 11 (revision 60306925) +28 tháng 12 (revision 64197178) +A (revision 64396586) +ASCII (revision 64542934) +Alexandre de Rhodes (revision 64481737) +Antonio Barbosa (revision 28290803) +B (revision 63753684) +BBC (revision 64477721) +Biên khảo (revision 64480018) +Bàn phím máy tính (revision 63261029) +Bá Đa Lộc (revision 64107557) +Bán nguyên âm (revision 64296580) +Bình luận (revision 26758605) +Bảng chữ cái Bồ Đào Nha (revision 64521024) +Bảng chữ cái Hy Lạp (revision 64540140) +Bảng chữ cái Latinh (revision 64566174) +Bảng chữ cái Latinh cơ bản của ISO (revision 64566174) +Bảng chữ cái Phoenicia (revision 64540140) +Bảng mẫu tự ngữ âm quốc tế (revision 64494501) +Bắc Kỳ (revision 64538623) +Bồ Đào Nha (revision 64477762) +Bộ Giáo dục và Đào tạo (Việt Nam) (revision 64439920) +Bộ gõ tiếng Việt (revision 64399872) +C (revision 64341946) +Cao Xuân Dục (revision 64403009) +Chiều cao (revision 63620682) +Christoforo Borri (revision 39684524) +Chính tả (revision 64168374) +Chính tả tiếng Việt (revision 64566759) +Chủ tịch Hồ Chí Minh (revision 64592392) +Chữ Hán (revision 64488663) +Chữ Latinh (revision 64566174) +Chữ Nôm (revision 64497361) +Chữ b đuôi quặp (revision 63724573) +Chữ cái (revision 63906900) +Chữ số La Mã (revision 64606955) +Chữ tượng hình Ai Cập (revision 64545532) +Chữ viết tiếng Việt (revision 64521025) +Các dân tộc Việt Nam (revision 64521289) +Công giáo tại Việt Nam (revision 64479778) +Cư Jút (revision 64446849) +Cư Kuin (revision 64351798) +Cư Ê Wi (revision 64324496) +Cải cách giáo dục của Cộng hòa Xã hội chủ nghĩa Việt Nam (revision 63800666) +Cổ tự học (revision 63417312) +D (revision 64521463) +Danh sách các chữ cái Latinh (revision 64566174) +De facto (revision 64458216) +Di chúc Hồ Chí Minh (revision 64479855) +Du ký (revision 64306751) +Dòng Tên (revision 64563470) +Dấu câu (revision 64430387) +Dấu huyền (revision 64200881) +Dấu hỏi (revision 64314350) +Dấu ngã (revision 64005169) +Dấu nặng (revision 64200881) +Dấu phụ (revision 43648394) +Dấu sắc (revision 64200881) +Dấu âm ngắn (revision 64560651) +E (revision 63474436) +Ea H'leo (revision 64600906) +Ea Wy (revision 64564116) +F (revision 64556895) +Francesco Buzomi (revision 64573844) +Francisco de Pina (revision 64573938) +G (revision 63840275) +Gaspar do Amaral (revision 61771486) +Gemeinsame Normdatei (revision 63835749) +Gen (revision 64577144) +Gia Định báo (revision 64521887) +Giovanni Filippo de Marini (revision 64381034) +Girolamo Maiorica (revision 64500026) +Giáo hội Công giáo Rôma (revision 64587044) +H (revision 63175940) +Hiến pháp nước Cộng hòa Xã hội chủ nghĩa Việt Nam 2013 (revision 64587062) +Hoàng Phê (revision 63792712) +Hán học (revision 64209708) +Hệ chữ viết Latinh (revision 64566174) +Hệ thống chữ nổi tiếng Việt (revision 64158849) +Hồ Chí Minh (revision 64592392) +Hồ Dzếnh (revision 64471051) +Hội Trí Tri (revision 64593204) +I (revision 55105217) +IPA (revision 64494501) +ISBN (revision 64594093) +ISO/IEC 646 (revision 64542934) +J (revision 64280732) == End of Parsed pages == -- Wikipedia parsing ended at: 2016-02-13 03:42:06.560479 +- Wikipedia parsing ended at: 2021-03-16 20:57:28.725327 -101 characters appeared 222814 times. +107 characters appeared 961999 times. -First 55 characters: -[ 0] Char n: 11.262308472537633 % -[ 1] Char h: 8.881398834902654 % -[ 2] Char t: 7.022898022565907 % -[ 3] Char c: 6.365398942615815 % -[ 4] Char i: 6.198443544840091 % -[ 5] Char g: 5.591210606155808 % -[ 6] Char a: 3.5998635633308496 % -[ 7] Char u: 2.8499106878382867 % -[ 8] Char m: 2.615185760320267 % -[ 9] Char o: 2.6012728105056238 % -[10] Char đ: 2.222032726848403 % -[11] Char r: 2.1102803234985235 % -[12] Char à: 2.0447548179198796 % -[13] Char v: 1.9437737305555307 % -[14] Char l: 1.9119085874316697 % -[15] Char á: 1.7539292863105551 % -[16] Char p: 1.6453185167897888 % -[17] Char b: 1.541195795596327 % -[18] Char ư: 1.4397659033992478 % -[19] Char s: 1.3760356171515256 % -[20] Char y: 1.280440187779942 % -[21] Char e: 1.2454334108269678 % -[22] Char d: 1.1251537156552103 % -[23] Char ế: 1.071745940560288 % -[24] Char k: 1.0695019163966357 % -[25] Char â: 0.9658280000359044 % -[26] Char ữ: 0.9604423420431392 % -[27] Char ê: 0.8374698178749989 % -[28] Char ệ: 0.7459136319979893 % -[29] Char ô: 0.7073164163831717 % -[30] Char ạ: 0.6727584442629277 % -[31] Char ộ: 0.6705144200992756 % -[32] Char ố: 0.6476253736300233 % -[33] Char ó: 0.6072329386842837 % -[34] Char ả: 0.5484395055965963 % -[35] Char ủ: 0.5475418959311353 % -[36] Char q: 0.5138815334763525 % -[37] Char ợ: 0.48560682901433483 % -[38] Char ờ: 0.4851580241816044 % -[39] Char ể: 0.4748355130288043 % -[40] Char ớ: 0.4676546357051173 % -[41] Char ấ: 0.418286104104769 % -[42] Char ị: 0.40212913012647317 % -[43] Char ầ: 0.3904602044754818 % -[44] Char ọ: 0.3801376933226817 % -[45] Char ề: 0.3787912788244904 % -[46] Char ơ: 0.3590438661843511 % -[47] Char í: 0.35679984202069887 % -[48] Char ụ: 0.35276059852612496 % -[49] Char ậ: 0.3469261357006292 % -[50] Char ì: 0.32762752789322036 % -[51] Char ă: 0.3253835037295682 % -[52] Char ứ: 0.29665999443482005 % -[53] Char ồ: 0.29665999443482005 % -[54] Char x: 0.2939671654384374 % +First 54 characters: +[ 0] Char n: 11.732340678108812 % +[ 1] Char h: 8.846059091537517 % +[ 2] Char t: 6.799279417130371 % +[ 3] Char c: 6.610713732550658 % +[ 4] Char i: 6.088467867430215 % +[ 5] Char g: 5.545639860332495 % +[ 6] Char a: 3.414244713352093 % +[ 7] Char u: 2.916842948901194 % +[ 8] Char m: 2.5668425850754524 % +[ 9] Char o: 2.5124766241960748 % +[10] Char đ: 2.3970918888689074 % +[11] Char à: 2.0960520748982066 % +[12] Char v: 2.0507297824633914 % +[13] Char r: 1.966114309890135 % +[14] Char l: 1.7723511147100985 % +[15] Char á: 1.7447003583163807 % +[16] Char p: 1.523390356954633 % +[17] Char ư: 1.47359820540354 % +[18] Char b: 1.435656378021183 % +[19] Char s: 1.3317061660147256 % +[20] Char y: 1.2888786786680653 % +[21] Char d: 1.1103961646529779 % +[22] Char k: 1.0495852906292003 % +[23] Char ế: 0.9804583996449061 % +[24] Char e: 0.9535352947352336 % +[25] Char ộ: 0.8640341621976738 % +[26] Char ệ: 0.8197513718829229 % +[27] Char â: 0.8006245328737348 % +[28] Char ê: 0.792724316761244 % +[29] Char ô: 0.7877347065849342 % +[30] Char ố: 0.7180880645406076 % +[31] Char ạ: 0.7030152837996714 % +[32] Char q: 0.6624747011171529 % +[33] Char ả: 0.650208576100391 % +[34] Char ữ: 0.622038068646641 % +[35] Char ủ: 0.589085851440594 % +[36] Char ó: 0.5876305484725036 % +[37] Char ớ: 0.5369028450133524 % +[38] Char ề: 0.48440798795009143 % +[39] Char í: 0.47162211187329717 % +[40] Char ờ: 0.47131026123727776 % +[41] Char ợ: 0.46403374639682576 % +[42] Char ấ: 0.44532270823566344 % +[43] Char ể: 0.4278590726185786 % +[44] Char ă: 0.4115388893335648 % +[45] Char ị: 0.40748483106531297 % +[46] Char ậ: 0.3686074517748979 % +[47] Char ơ: 0.36434549308263314 % +[48] Char ự: 0.35519787442606493 % +[49] Char ồ: 0.3434515004693352 % +[50] Char ụ: 0.3314972260885926 % +[51] Char ầ: 0.32848266994040537 % +[52] Char ì: 0.32785896866836656 % +[53] Char x: 0.32650761591228267 % -The first 55 characters have an accumulated ratio of 0.9603301408349568. +The first 54 characters have an accumulated ratio of 0.9567099342099108. -1494 sequences found. +1890 sequences found. -First 512 (typical positive ratio): 0.9321889118082535 -Next 512 (512-1024): 0.009604423420431392 -Rest: 0.0068905733918831966 +First 512 (typical positive ratio): 0.9336493792477815 +Next 512 (512-1024): 0.003551978744260649 +Rest: 0.007456342500128027 -- Processing end: 2016-02-13 03:42:07.174723 +- Processing end: 2021-03-16 20:57:29.603172 diff --git a/src/LangModels/LangCroatianModel.cpp b/src/LangModels/LangCroatianModel.cpp index 961bd0e..e1410b8 100644 --- a/src/LangModels/LangCroatianModel.cpp +++ b/src/LangModels/LangCroatianModel.cpp @@ -36,12 +36,13 @@ * ***** END LICENSE BLOCK ***** */ #include "../nsSBCharSetProber.h" +#include "../nsLanguageDetector.h" /********* Language model for: Croatian *********/ /** * Generated by BuildLangModel.py - * On: 2016-09-25 23:50:27.590137 + * On: 2021-03-16 19:18:55.486472 **/ /* Character Mapping Table: @@ -61,45 +62,45 @@ * ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1 * even though they are both used for French. Same for the euro sign. */ -static const unsigned char Windows_1250_CharToOrderMap[] = +static const unsigned char Iso_8859_2_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 12, 4, 3, /* 4X */ - 14, 30, 6, 8, 5, 11, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 12, 4, 3, /* 6X */ - 14, 30, 6, 8, 5, 11, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM, 23,SYM, 49, 50, 24, 51, /* 8X */ - ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 23,SYM, 52, 53, 24, 54, /* 9X */ - SYM,SYM,SYM, 40,SYM, 55,SYM,SYM,SYM,SYM, 56,SYM,SYM,SYM,SYM, 57, /* AX */ - SYM,SYM,SYM, 40,SYM,SYM,SYM,SYM,SYM, 58, 59,SYM, 60,SYM, 61, 62, /* BX */ - 63, 41, 43, 64, 36, 65, 25, 39, 18, 31, 66, 47, 67, 68, 69, 70, /* CX */ - 26, 71, 72, 44, 73, 74, 32,SYM, 75, 76, 48, 77, 33, 78, 79, 80, /* DX */ - 81, 41, 43, 82, 36, 83, 25, 39, 18, 31, 84, 47, 85, 86, 87, 88, /* EX */ - 26, 89, 90, 44, 91, 92, 32,SYM, 93, 94, 48, 95, 33, 96, 97,SYM, /* FX */ + SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 11, 4, 3, /* 4X */ + 14, 30, 6, 8, 5, 12, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 11, 4, 3, /* 6X */ + 14, 30, 6, 8, 5, 12, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ + SYM, 49,SYM, 38,SYM, 50, 51,SYM,SYM, 23, 52, 53, 54,SYM, 24, 55, /* AX */ + SYM, 56,SYM, 38,SYM, 57, 58,SYM,SYM, 23, 59, 60, 61,SYM, 24, 62, /* BX */ + 63, 39, 46, 64, 36, 65, 25, 43, 18, 31, 66, 45, 67, 68, 69, 70, /* CX */ + 26, 71, 72, 47, 73, 74, 32,SYM, 75, 76, 48, 77, 33, 78, 79, 80, /* DX */ + 81, 39, 46, 82, 36, 83, 25, 43, 18, 31, 84, 45, 85, 86, 87, 88, /* EX */ + 26, 89, 90, 47, 91, 92, 32,SYM, 93, 94, 48, 95, 33, 96, 97,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const unsigned char Iso_8859_2_CharToOrderMap[] = +static const unsigned char Iso_8859_13_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 12, 4, 3, /* 4X */ - 14, 30, 6, 8, 5, 11, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 12, 4, 3, /* 6X */ - 14, 30, 6, 8, 5, 11, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 11, 4, 3, /* 4X */ + 14, 30, 6, 8, 5, 12, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 11, 4, 3, /* 6X */ + 14, 30, 6, 8, 5, 12, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM, 98,SYM, 40,SYM, 99,100,SYM,SYM, 23,101,102,103,SYM, 24,104, /* AX */ - SYM,105,SYM, 40,SYM,106,107,SYM,SYM, 23,108,109,110,SYM, 24,111, /* BX */ - 112, 41, 43,113, 36,114, 25, 39, 18, 31,115, 47,116,117,118,119, /* CX */ - 26,120,121, 44,122,123, 32,SYM,124,125, 48,126, 33,127,128,129, /* DX */ - 130, 41, 43,131, 36,132, 25, 39, 18, 31,133, 47,134,135,136,137, /* EX */ - 26,138,139, 44,140,141, 32,SYM,142,143, 48,144, 33,145,146,SYM, /* FX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 35,SYM, 98,SYM,SYM,SYM,SYM, 99, /* AX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 35,SYM,100,SYM,SYM,SYM,SYM,101, /* BX */ + 102,103,104, 25, 36,105,106,107, 18, 31,108,109,110,111,112,113, /* CX */ + 23,114,115, 47, 40, 37, 32,SYM,116, 38,117,118, 33,119, 24,120, /* DX */ + 121,122,123, 25, 36,124,125,126, 18, 31,127,128,129,130,131,132, /* EX */ + 23,133,134, 47, 40, 37, 32,SYM,135, 38,136,137, 33,138, 24,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -109,147 +110,160 @@ static const unsigned char Iso_8859_16_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 12, 4, 3, /* 4X */ - 14, 30, 6, 8, 5, 11, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 12, 4, 3, /* 6X */ - 14, 30, 6, 8, 5, 11, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 11, 4, 3, /* 4X */ + 14, 30, 6, 8, 5, 12, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 11, 4, 3, /* 6X */ + 14, 30, 6, 8, 5, 12, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,147,148, 40,SYM,SYM, 23,SYM, 23,SYM,149,SYM,150,SYM,151,152, /* AX */ - SYM,SYM, 18, 40, 24,SYM,SYM,SYM, 24, 18,153,SYM, 45, 45,154,155, /* BX */ - 46, 41, 43,156, 36, 25,157, 39, 35, 31, 42, 47,158,159,160,161, /* CX */ - 26,162,163, 44,164,165, 32,166,167,168, 48,169, 33,170,171,172, /* DX */ - 46, 41, 43,173, 36, 25,174, 39, 35, 31, 42, 47,175,176,177,178, /* EX */ - 26,179,180, 44,181,182, 32,183,184,185, 48,186, 33,187,188,189, /* FX */ + SYM,139,140, 38,SYM,SYM, 23,SYM, 23,SYM,141,SYM,142,SYM,143,144, /* AX */ + SYM,SYM, 18, 38, 24,SYM,SYM,SYM, 24, 18,145,SYM, 44, 44,146,147, /* BX */ + 42, 39, 46,148, 36, 25,149, 43, 34, 31, 41, 45,150,151,152,153, /* CX */ + 26,154,155, 47,156,157, 32,158,159,160, 48,161, 33,162,163,164, /* DX */ + 42, 39, 46,165, 36, 25,166, 43, 34, 31, 41, 45,167,168,169,170, /* EX */ + 26,171,172, 47,173,174, 32,175,176,177, 48,178, 33,179,180,181, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const unsigned char Mac_Centraleurope_CharToOrderMap[] = +static const unsigned char Windows_1250_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 12, 4, 3, /* 4X */ - 14, 30, 6, 8, 5, 11, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 12, 4, 3, /* 6X */ - 14, 30, 6, 8, 5, 11, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */ - 36,190,191, 31,192, 32, 33, 41,193, 18, 36, 18, 25, 25, 31,194, /* 8X */ - 195,196,197,198,199,200,201, 44,202,203, 32, 37, 48,204,205, 33, /* 9X */ - SYM,SYM,206,SYM,SYM,SYM,SYM,207,SYM,SYM,SYM,208,SYM,SYM,209,210, /* AX */ - 211,212,SYM,SYM,213,214,SYM,SYM, 40,215,216,217,218,219,220,221, /* BX */ - 222,223,SYM,SYM,224,225,SYM,SYM,SYM,SYM,SYM,226,227, 37,228, 38, /* CX */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 38,229,230,231,SYM,SYM,232,233, /* DX */ - 234, 23,SYM,SYM, 23,235,236, 41,237,238,239, 24, 24,240, 44,241, /* EX */ - 242,243, 48,244,245,246,247,248,249,249,249,249, 40,249,249,SYM, /* FX */ + SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 11, 4, 3, /* 4X */ + 14, 30, 6, 8, 5, 12, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 11, 4, 3, /* 6X */ + 14, 30, 6, 8, 5, 12, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM, 23,SYM,182,183, 24,184, /* 8X */ + ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 23,SYM,185,186, 24,187, /* 9X */ + SYM,SYM,SYM, 38,SYM,188,SYM,SYM,SYM,SYM,189,SYM,SYM,SYM,SYM,190, /* AX */ + SYM,SYM,SYM, 38,SYM,SYM,SYM,SYM,SYM,191,192,SYM,193,SYM,194,195, /* BX */ + 196, 39, 46,197, 36,198, 25, 43, 18, 31,199, 45,200,201,202,203, /* CX */ + 26,204,205, 47,206,207, 32,SYM,208,209, 48,210, 33,211,212,213, /* DX */ + 214, 39, 46,215, 36,216, 25, 43, 18, 31,217, 45,218,219,220,221, /* EX */ + 26,222,223, 47,224,225, 32,SYM,226,227, 48,228, 33,229,230,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const unsigned char Iso_8859_13_CharToOrderMap[] = +static const unsigned char Ibm852_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 12, 4, 3, /* 4X */ - 14, 30, 6, 8, 5, 11, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 12, 4, 3, /* 6X */ - 14, 30, 6, 8, 5, 11, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 34,SYM,249,SYM,SYM,SYM,SYM,249, /* AX */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 34,SYM,249,SYM,SYM,SYM,SYM,249, /* BX */ - 249,249,249, 25, 36,249,249,249, 18, 31,249,249,249,249,249,249, /* CX */ - 23,249,249, 44, 38, 37, 32,SYM,249, 40,249,249, 33,249, 24,249, /* DX */ - 249,249,249, 25, 36,249,249,249, 18, 31,249,249,249,249,249,249, /* EX */ - 23,249,249, 44, 38, 37, 32,SYM,249, 40,249,249, 33,249, 24,SYM, /* FX */ + SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 11, 4, 3, /* 4X */ + 14, 30, 6, 8, 5, 12, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 11, 4, 3, /* 6X */ + 14, 30, 6, 8, 5, 12, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */ + 43, 33, 31, 46, 36,231, 25, 43, 38, 45,232,233,234,235, 36, 25, /* 8X */ + 31,236,237,238, 32,239,240,241,242, 32, 33,243,244, 38,SYM, 18, /* 9X */ + 39,245, 47, 48,246,247, 24, 24,248,249,SYM,249, 18,249,SYM,SYM, /* AX */ + SYM,SYM,SYM,SYM,SYM, 39, 46,249,249,SYM,SYM,SYM,SYM,249,249,SYM, /* BX */ + SYM,SYM,SYM,SYM,SYM,SYM,249,249,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* CX */ + 26, 26,249, 45,249,249,249,249,249,SYM,SYM,SYM,SYM,249,249,SYM, /* DX */ + 47,249,249,249,249,249, 23, 23,249, 48,249,249,249,249,249,SYM, /* EX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,249,249,249,SYM,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const unsigned char Ibm852_CharToOrderMap[] = +static const unsigned char Mac_Centraleurope_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 12, 4, 3, /* 4X */ - 14, 30, 6, 8, 5, 11, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 12, 4, 3, /* 6X */ - 14, 30, 6, 8, 5, 11, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */ - 39, 33, 31, 43, 36,249, 25, 39, 40, 47,249,249,249,249, 36, 25, /* 8X */ - 31,249,249,249, 32,249,249,249,249, 32, 33,249,249, 40,SYM, 18, /* 9X */ - 41,249, 44, 48,249,249, 24, 24,249,249,SYM,249, 18,249,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 41, 43,249,249,SYM,SYM,SYM,SYM,249,249,SYM, /* BX */ - SYM,SYM,SYM,SYM,SYM,SYM,249,249,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* CX */ - 26, 26,249, 47,249,249,249,249,249,SYM,SYM,SYM,SYM,249,249,SYM, /* DX */ - 44,249,249,249,249,249, 23, 23,249, 48,249,249,249,249,249,SYM, /* EX */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,249,249,249,SYM,SYM, /* FX */ + SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 11, 4, 3, /* 4X */ + 14, 30, 6, 8, 5, 12, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 11, 4, 3, /* 6X */ + 14, 30, 6, 8, 5, 12, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */ + 36,249,249, 31,249, 32, 33, 39,249, 18, 36, 18, 25, 25, 31,249, /* 8X */ + 249,249,249,249,249,249,249, 47,249,249, 32, 37, 48,249,249, 33, /* 9X */ + SYM,SYM,249,SYM,SYM,SYM,SYM,249,SYM,SYM,SYM,249,SYM,SYM,249,249, /* AX */ + 249,249,SYM,SYM,249,249,SYM,SYM, 38,249,249,249,249,249,249,249, /* BX */ + 249,249,SYM,SYM,249,249,SYM,SYM,SYM,SYM,SYM,249,249, 37,249, 40, /* CX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 40,249,249,249,SYM,SYM,249,249, /* DX */ + 249, 23,SYM,SYM, 23,249,249, 39,249,249,249, 24, 24,249, 47,249, /* EX */ + 249,249, 48,249,249,249,249,249,249,249,249,249, 38,249,249,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ +static const int Unicode_Char_size = 62; +static const unsigned int Unicode_CharOrder[] = +{ + 65, 0, 66, 19, 67, 20, 68, 15, 69, 2, 70, 22, 71, 17, 72, 21, + 73, 1, 74, 7, 75, 9, 76, 10, 77, 11, 78, 4, 79, 3, 80, 14, + 81, 30, 82, 6, 83, 8, 84, 5, 85, 12, 86, 13, 87, 28, 88, 29, + 89, 27, 90, 16, 97, 0, 98, 19, 99, 20, 100, 15, 101, 2,102, 22, + 103, 17, 104, 21, 105, 1, 106, 7, 107, 9, 108, 10, 109, 11,110, 4, + 111, 3, 112, 14, 113, 30, 114, 6, 115, 8, 116, 5, 117, 12,118, 13, + 119, 28, 120, 29, 121, 27, 122, 16, 262, 25, 263, 25, 268, 18,269, 18, + 272, 26, 273, 26, 352, 23, 353, 23, 381, 24, 382, 24, +}; + /* Model Table: - * Total sequences: 712 - * First 512 sequences: 0.9989731099787131 - * Next 512 sequences (512-1024): 0.0010268900212868262 - * Rest: 3.7513395167998453e-17 + * Total sequences: 725 + * First 512 sequences: 0.9990568119867879 + * Next 512 sequences (512-1024): 0.0009431880132121777 + * Rest: -4.0440741033709315e-17 * Negative sequences: TODO */ static const PRUint8 CroatianLangModel[] = { - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,2,2, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,2,2,2,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,0,3,3,2,0,0,0,0,3,2,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,0,2,2,2,0,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,2,3,3,3,0,0,0,0,3,2,0,0, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,0,0, - 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,0,3,3,2,0,2,3,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,3,3,3,3,0,0,0,0,3,2,0,2, - 3,3,3,3,3,3,3,3,3,0,3,3,3,3,2,2,0,3,3,0,3,2,0,3,0,2,0,2,3,0,0, - 3,3,3,3,3,3,0,3,3,3,3,3,3,3,2,3,3,3,0,3,3,3,3,2,2,0,0,3,0,2,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,2,0, - 3,3,3,3,3,2,3,3,3,3,3,3,3,2,3,3,0,3,2,3,3,2,3,0,0,0,0,2,3,0,0, - 3,3,3,3,3,0,3,3,3,3,3,3,2,0,2,3,0,0,2,0,3,0,0,3,0,0,0,2,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,2,2,2,2,3,3,0,2,0,3,0,2,0,0,0, - 3,3,3,3,3,3,3,3,3,0,3,3,3,3,0,3,3,3,0,3,2,2,3,0,3,0,0,2,3,2,2, - 3,3,3,3,3,0,3,3,2,0,3,3,3,3,0,3,0,3,0,3,0,3,0,0,0,0,0,2,2,0,0, - 3,3,3,3,3,2,3,2,2,0,3,3,3,3,2,3,3,2,0,0,0,3,2,0,0,0,0,3,2,0,0, - 3,3,3,3,3,0,2,3,0,3,3,3,0,3,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,2,3,3,3,0,3,3,2,2,0,3,3,0,0,2,3,0,3,0,0,0,0,2,0,0,2, - 3,3,3,3,2,3,3,3,3,3,3,3,3,3,0,3,0,2,0,0,0,3,0,0,0,0,0,2,0,0,3, - 3,3,3,3,3,3,3,0,3,2,3,3,2,3,0,2,3,2,0,3,3,2,2,0,0,0,0,3,3,2,0, - 3,3,3,3,3,3,3,0,3,2,3,3,2,0,2,2,0,2,0,0,0,0,3,0,0,0,0,0,0,0,0, - 3,3,3,2,3,3,2,0,0,3,3,3,2,3,3,0,0,0,2,0,2,0,0,0,0,3,0,0,0,0,0, - 3,3,3,3,3,0,0,2,0,0,2,3,0,0,0,3,0,0,0,3,0,0,0,0,0,0,2,0,0,0,0, - 3,3,3,3,3,0,0,0,0,2,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0, - 3,3,3,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,2,3,3,3,2,2,2,3,0,3,3,0,0,0,2,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,2,3,2,2,2,0,0,3,0,0,0,0,0,0,0,2,2,3,2,0,0,0,0,2,2,0,0, - 2,3,2,0,0,0,2,0,0,0,0,2,0,2,3,0,0,2,0,0,0,0,2,0,0,0,0,0,3,0,0, - 0,3,2,0,0,0,2,0,0,0,0,3,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2, + 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,2,3,3,3,2,2,3,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,3,3,3,3,0,0,0,0,3,2,0,3, + 3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,2,2,3,3,0,3,2,2,3,0,2,0,2,3,0,0, + 3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,3,0,3,3,2,3,0,0,0,0,3,2,2,0, + 3,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3,0,3,2,3,3,2,3,0,0,0,0,2,3,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,0,2,0, + 3,3,3,3,3,0,3,3,3,3,3,2,3,0,2,3,0,0,2,2,3,2,2,3,0,0,0,2,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,2,2,2,2,3,3,0,2,0,3,0,2,0,2,0, + 3,3,3,3,3,2,3,3,3,0,3,3,3,3,0,2,3,3,0,3,2,2,3,0,3,0,0,2,3,2,0, + 3,3,3,3,3,0,3,3,2,0,3,3,3,3,0,3,2,3,0,3,0,3,0,0,0,0,0,2,2,0,0, + 3,3,3,3,3,2,3,2,2,0,3,3,3,2,2,3,3,2,0,0,0,3,2,0,0,0,0,2,2,0,0, + 3,3,3,3,3,0,2,3,0,3,3,0,3,3,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,2,3,3,2,0,3,2,3,2,0,3,3,0,0,2,3,0,3,0,0,0,0,3,0,0,2, + 3,3,3,3,2,3,3,3,3,3,3,3,3,2,0,3,0,2,0,0,2,3,0,0,0,0,0,2,0,0,3, + 3,3,3,3,3,3,3,0,3,2,3,3,3,3,0,2,3,2,0,3,3,2,2,0,0,0,0,3,2,2,0, + 3,3,3,3,3,3,3,2,3,2,3,2,3,0,2,2,0,2,0,0,0,0,3,0,0,0,0,0,0,0,0, + 3,3,3,2,3,3,2,0,0,3,3,2,3,3,3,0,0,0,3,0,2,0,0,0,0,3,0,0,0,0,0, + 3,3,3,3,3,0,2,2,0,0,2,0,3,0,0,3,0,0,0,3,0,0,0,0,0,0,2,0,0,0,0, + 3,3,3,3,3,0,0,0,0,2,2,0,3,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0, + 3,3,3,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,2,3,3,2,2,2,2,3,2,3,2,3,0,0,0,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,2,3,2,2,3,0,0,0,3,0,0,0,0,0,0,2,2,3,2,0,0,0,0,2,2,0,2, + 3,3,2,0,0,2,2,0,0,0,0,0,2,2,2,0,0,2,0,0,0,0,2,0,0,0,0,0,3,0,0, + 0,3,0,0,0,0,3,0,0,0,0,0,3,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, }; -const SequenceModel Windows_1250CroatianModel = +const SequenceModel Iso_8859_2CroatianModel = { - Windows_1250_CharToOrderMap, + Iso_8859_2_CharToOrderMap, CroatianLangModel, 31, - (float)0.9989731099787131, + (float)0.9990568119867879, PR_TRUE, - "WINDOWS-1250", + "ISO-8859-2", "hr" }; -const SequenceModel Iso_8859_2CroatianModel = +const SequenceModel Iso_8859_13CroatianModel = { - Iso_8859_2_CharToOrderMap, + Iso_8859_13_CharToOrderMap, CroatianLangModel, 31, - (float)0.9989731099787131, + (float)0.9990568119867879, PR_TRUE, - "ISO-8859-2", + "ISO-8859-13", "hr" }; @@ -258,41 +272,51 @@ const SequenceModel Iso_8859_16CroatianModel = Iso_8859_16_CharToOrderMap, CroatianLangModel, 31, - (float)0.9989731099787131, + (float)0.9990568119867879, PR_TRUE, "ISO-8859-16", "hr" }; -const SequenceModel Mac_CentraleuropeCroatianModel = +const SequenceModel Windows_1250CroatianModel = { - Mac_Centraleurope_CharToOrderMap, + Windows_1250_CharToOrderMap, CroatianLangModel, 31, - (float)0.9989731099787131, + (float)0.9990568119867879, PR_TRUE, - "MAC-CENTRALEUROPE", + "WINDOWS-1250", "hr" }; -const SequenceModel Iso_8859_13CroatianModel = +const SequenceModel Ibm852CroatianModel = { - Iso_8859_13_CharToOrderMap, + Ibm852_CharToOrderMap, CroatianLangModel, 31, - (float)0.9989731099787131, + (float)0.9990568119867879, PR_TRUE, - "ISO-8859-13", + "IBM852", "hr" }; -const SequenceModel Ibm852CroatianModel = +const SequenceModel Mac_CentraleuropeCroatianModel = { - Ibm852_CharToOrderMap, + Mac_Centraleurope_CharToOrderMap, CroatianLangModel, 31, - (float)0.9989731099787131, + (float)0.9990568119867879, PR_TRUE, - "IBM852", + "MAC-CENTRALEUROPE", "hr" }; + +const LanguageModel CroatianModel = +{ + "hr", + Unicode_CharOrder, + 62, + CroatianLangModel, + 31, + (float)0.9990568119867879, +}; diff --git a/src/LangModels/LangCzechModel.cpp b/src/LangModels/LangCzechModel.cpp index c12c07e..75d9dea 100644 --- a/src/LangModels/LangCzechModel.cpp +++ b/src/LangModels/LangCzechModel.cpp @@ -36,12 +36,13 @@ * ***** END LICENSE BLOCK ***** */ #include "../nsSBCharSetProber.h" +#include "../nsLanguageDetector.h" /********* Language model for: Czech *********/ /** * Generated by BuildLangModel.py - * On: 2016-09-21 03:28:11.733089 + * On: 2021-03-16 18:50:25.564246 **/ /* Character Mapping Table: @@ -61,45 +62,45 @@ * ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1 * even though they are both used for French. Same for the euro sign. */ -static const unsigned char Windows_1250_CharToOrderMap[] = +static const unsigned char Iso_8859_2_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 3, 22, 14, 15, 1, 30, 32, 17, 4, 21, 12, 10, 16, 2, 0, /* 4X */ - 8, 40, 9, 6, 5, 13, 7, 36, 34, 20, 19,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 3, 22, 14, 15, 1, 30, 32, 17, 4, 21, 12, 10, 16, 2, 0, /* 6X */ - 8, 40, 9, 6, 5, 13, 7, 36, 34, 20, 19,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM, 29,SYM, 46, 38, 26, 47, /* 8X */ - ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 29,SYM, 46, 38, 26, 48, /* 9X */ - SYM,SYM,SYM, 49,SYM, 50,SYM,SYM,SYM,SYM, 51,SYM,SYM,SYM,SYM, 52, /* AX */ - SYM,SYM,SYM, 53,SYM,SYM,SYM,SYM,SYM, 54, 55,SYM, 45,SYM, 45, 56, /* BX */ - 57, 18, 58, 59, 42, 60, 61, 62, 25, 24, 63, 64, 23, 11, 65, 39, /* CX */ - 66, 67, 35, 37, 68, 69, 41,SYM, 27, 31, 33, 70, 43, 28, 71, 72, /* DX */ - 73, 18, 74, 75, 42, 76, 77, 78, 25, 24, 79, 80, 23, 11, 81, 39, /* EX */ - 82, 83, 35, 37, 84, 85, 41,SYM, 27, 31, 33, 86, 43, 28, 87,SYM, /* FX */ + SYM, 3, 22, 14, 13, 1, 31, 30, 17, 4, 21, 11, 10, 16, 2, 0, /* 4X */ + 9, 39, 8, 6, 5, 15, 7, 35, 34, 20, 18,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 3, 22, 14, 13, 1, 31, 30, 17, 4, 21, 11, 10, 16, 2, 0, /* 6X */ + 9, 39, 8, 6, 5, 15, 7, 35, 34, 20, 18,SYM,SYM,SYM,SYM,CTR, /* 7X */ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ + SYM, 48,SYM, 49,SYM, 44, 45,SYM,SYM, 29, 50, 38, 51,SYM, 26, 52, /* AX */ + SYM, 53,SYM, 54,SYM, 44, 45,SYM,SYM, 29, 55, 38, 56,SYM, 26, 57, /* BX */ + 58, 19, 59, 60, 41, 61, 62, 63, 25, 24, 64, 65, 23, 12, 66, 40, /* CX */ + 67, 68, 36, 37, 69, 70, 42,SYM, 27, 32, 33, 71, 43, 28, 72, 46, /* DX */ + 73, 19, 74, 75, 41, 76, 77, 78, 25, 24, 79, 80, 23, 12, 81, 40, /* EX */ + 82, 83, 36, 37, 84, 85, 42,SYM, 27, 32, 33, 86, 43, 28, 87,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const unsigned char Mac_Centraleurope_CharToOrderMap[] = +static const unsigned char Windows_1250_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 3, 22, 14, 15, 1, 30, 32, 17, 4, 21, 12, 10, 16, 2, 0, /* 4X */ - 8, 40, 9, 6, 5, 13, 7, 36, 34, 20, 19,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 3, 22, 14, 15, 1, 30, 32, 17, 4, 21, 12, 10, 16, 2, 0, /* 6X */ - 8, 40, 9, 6, 5, 13, 7, 36, 34, 20, 19,SYM,SYM,SYM,SYM,CTR, /* 7X */ - 42, 88, 89, 24, 90, 41, 43, 18, 91, 25, 42, 25, 92, 93, 24, 94, /* 8X */ - 95, 39, 11, 39, 44, 44, 96, 37, 97, 98, 41, 99, 33, 23, 23, 43, /* 9X */ - SYM,SYM,100,SYM,SYM,SYM,SYM,101,SYM,SYM,SYM,102,SYM,SYM,103,104, /* AX */ - 105,106,SYM,SYM,107,108,SYM,SYM,109,110,111, 45, 45,112,113,114, /* BX */ - 115,116,SYM,SYM,117, 35,SYM,SYM,SYM,SYM,SYM, 35,118,119,120,121, /* CX */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,122,123,124, 27,SYM,SYM, 27,125, /* DX */ - 126, 29,SYM,SYM, 29, 46, 46, 18, 38, 38, 11, 26, 26,127, 37,128, /* EX */ - 129, 31, 33, 31,130,131,132,133, 28, 28,134,135,136,137,138,SYM, /* FX */ + SYM, 3, 22, 14, 13, 1, 31, 30, 17, 4, 21, 11, 10, 16, 2, 0, /* 4X */ + 9, 39, 8, 6, 5, 15, 7, 35, 34, 20, 18,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 3, 22, 14, 13, 1, 31, 30, 17, 4, 21, 11, 10, 16, 2, 0, /* 6X */ + 9, 39, 8, 6, 5, 15, 7, 35, 34, 20, 18,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM, 29,SYM, 45, 38, 26, 88, /* 8X */ + ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 29,SYM, 45, 38, 26, 89, /* 9X */ + SYM,SYM,SYM, 90,SYM, 91,SYM,SYM,SYM,SYM, 92,SYM,SYM,SYM,SYM, 93, /* AX */ + SYM,SYM,SYM, 94,SYM,SYM,SYM,SYM,SYM, 95, 96,SYM, 44,SYM, 44, 97, /* BX */ + 98, 19, 99,100, 41,101,102,103, 25, 24,104,105, 23, 12,106, 40, /* CX */ + 107,108, 36, 37,109,110, 42,SYM, 27, 32, 33,111, 43, 28,112, 46, /* DX */ + 113, 19,114,115, 41,116,117,118, 25, 24,119,120, 23, 12,121, 40, /* EX */ + 122,123, 36, 37,124,125, 42,SYM, 27, 32, 33,126, 43, 28,127,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -109,156 +110,172 @@ static const unsigned char Ibm852_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 3, 22, 14, 15, 1, 30, 32, 17, 4, 21, 12, 10, 16, 2, 0, /* 4X */ - 8, 40, 9, 6, 5, 13, 7, 36, 34, 20, 19,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 3, 22, 14, 15, 1, 30, 32, 17, 4, 21, 12, 10, 16, 2, 0, /* 6X */ - 8, 40, 9, 6, 5, 13, 7, 36, 34, 20, 19,SYM,SYM,SYM,SYM,CTR, /* 7X */ - 139, 43, 24,140, 42, 31,141,142,143,144,145,146,147,148, 42,149, /* 8X */ - 24,150,151,152, 41, 45, 45, 46, 46, 41, 43, 38, 38,153,SYM, 25, /* 9X */ - 18, 11, 37, 33,154,155, 26, 26,156,157,SYM,158, 25,159,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 18,160, 23,161,SYM,SYM,SYM,SYM,162,163,SYM, /* BX */ - SYM,SYM,SYM,SYM,SYM,SYM,164,165,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* CX */ - 166,167, 39,168, 39, 35, 11,169, 23,SYM,SYM,SYM,SYM,170, 31,SYM, /* DX */ - 37,171,172,173,174, 35, 29, 29,175, 33,176,177, 28, 28,178,SYM, /* EX */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,179, 27, 27,SYM,SYM, /* FX */ + SYM, 3, 22, 14, 13, 1, 31, 30, 17, 4, 21, 11, 10, 16, 2, 0, /* 4X */ + 9, 39, 8, 6, 5, 15, 7, 35, 34, 20, 18,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 3, 22, 14, 13, 1, 31, 30, 17, 4, 21, 11, 10, 16, 2, 0, /* 6X */ + 9, 39, 8, 6, 5, 15, 7, 35, 34, 20, 18,SYM,SYM,SYM,SYM,CTR, /* 7X */ + 128, 43, 24,129, 41, 32,130,131,132,133,134,135,136,137, 41,138, /* 8X */ + 24,139,140,141, 42, 44, 44, 45, 45, 42, 43, 38, 38,142,SYM, 25, /* 9X */ + 19, 12, 37, 33,143,144, 26, 26,145,146,SYM,147, 25,148,SYM,SYM, /* AX */ + SYM,SYM,SYM,SYM,SYM, 19,149, 23,150,SYM,SYM,SYM,SYM,151,152,SYM, /* BX */ + SYM,SYM,SYM,SYM,SYM,SYM,153,154,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* CX */ + 155,156, 40,157, 40, 36, 12,158, 23,SYM,SYM,SYM,SYM,159, 32,SYM, /* DX */ + 37, 46,160,161,162, 36, 29, 29,163, 33,164,165, 28, 28,166,SYM, /* EX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,167, 27, 27,SYM,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const unsigned char Iso_8859_2_CharToOrderMap[] = +static const unsigned char Mac_Centraleurope_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 3, 22, 14, 15, 1, 30, 32, 17, 4, 21, 12, 10, 16, 2, 0, /* 4X */ - 8, 40, 9, 6, 5, 13, 7, 36, 34, 20, 19,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 3, 22, 14, 15, 1, 30, 32, 17, 4, 21, 12, 10, 16, 2, 0, /* 6X */ - 8, 40, 9, 6, 5, 13, 7, 36, 34, 20, 19,SYM,SYM,SYM,SYM,CTR, /* 7X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,180,SYM,181,SYM, 45, 46,SYM,SYM, 29,182, 38,183,SYM, 26,184, /* AX */ - SYM,185,SYM,186,SYM, 45, 46,SYM,SYM, 29,187, 38,188,SYM, 26,189, /* BX */ - 190, 18,191,192, 42,193,194,195, 25, 24,196,197, 23, 11,198, 39, /* CX */ - 199,200, 35, 37,201,202, 41,SYM, 27, 31, 33,203, 43, 28,204,205, /* DX */ - 206, 18,207,208, 42,209,210,211, 25, 24,212,213, 23, 11,214, 39, /* EX */ - 215,216, 35, 37,217,218, 41,SYM, 27, 31, 33,219, 43, 28,220,SYM, /* FX */ + SYM, 3, 22, 14, 13, 1, 31, 30, 17, 4, 21, 11, 10, 16, 2, 0, /* 4X */ + 9, 39, 8, 6, 5, 15, 7, 35, 34, 20, 18,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 3, 22, 14, 13, 1, 31, 30, 17, 4, 21, 11, 10, 16, 2, 0, /* 6X */ + 9, 39, 8, 6, 5, 15, 7, 35, 34, 20, 18,SYM,SYM,SYM,SYM,CTR, /* 7X */ + 41,168,169, 24,170, 42, 43, 19,171, 25, 41, 25,172,173, 24,174, /* 8X */ + 175, 40, 12, 40, 47, 47,176, 37,177,178, 42,179, 33, 23, 23, 43, /* 9X */ + SYM,SYM,180,SYM,SYM,SYM,SYM, 46,SYM,SYM,SYM,181,SYM,SYM,182,183, /* AX */ + 184,185,SYM,SYM,186,187,SYM,SYM,188,189,190, 44, 44,191,192,193, /* BX */ + 194,195,SYM,SYM,196, 36,SYM,SYM,SYM,SYM,SYM, 36,197,198,199,200, /* CX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,201,202,203, 27,SYM,SYM, 27,204, /* DX */ + 205, 29,SYM,SYM, 29, 45, 45, 19, 38, 38, 12, 26, 26,206, 37,207, /* EX */ + 208, 32, 33, 32,209,210,211,212, 28, 28,213,214,215,216,217,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ +static const int Unicode_Char_size = 82; +static const unsigned int Unicode_CharOrder[] = +{ + 65, 3, 66, 22, 67, 14, 68, 13, 69, 1, 70, 31, 71, 30, 72, 17, + 73, 4, 74, 21, 75, 11, 76, 10, 77, 16, 78, 2, 79, 0, 80, 9, + 81, 39, 82, 8, 83, 6, 84, 5, 85, 15, 86, 7, 87, 35, 88, 34, + 89, 20, 90, 18, 97, 3, 98, 22, 99, 14, 100, 13, 101, 1,102, 31, + 103, 30, 104, 17, 105, 4, 106, 21, 107, 11, 108, 10, 109, 16,110, 2, + 111, 0, 112, 9, 113, 39, 114, 8, 115, 6, 116, 5, 117, 15,118, 7, + 119, 35, 120, 34, 121, 20, 122, 18, 193, 19, 201, 24, 205, 12,211, 37, + 218, 33, 221, 28, 225, 19, 233, 24, 237, 12, 243, 37, 250, 33,253, 28, + 268, 25, 269, 25, 270, 40, 271, 40, 282, 23, 283, 23, 327, 36,328, 36, + 344, 27, 345, 27, 352, 29, 353, 29, 356, 38, 357, 38, 366, 32,367, 32, + 381, 26, 382, 26, +}; + /* Model Table: - * Total sequences: 1025 - * First 512 sequences: 0.9786035192432675 - * Next 512 sequences (512-1024): 0.02139445610866691 - * Rest: 2.0246480655940202e-06 + * Total sequences: 1037 + * First 512 sequences: 0.9751874547460189 + * Next 512 sequences (512-1024): 0.024780958582584566 + * Rest: 3.158667139656693e-05 * Negative sequences: TODO */ static const PRUint8 CzechLangModel[] = { - 2,2,3,2,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,3, - 2,3,3,0,0,3,3,3,0,2,3,0,3,0,3,2,2,0,2,0,0, - 3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,2,3, - 2,3,3,0,0,3,3,3,0,3,3,2,3,2,3,2,2,2,2,2,2, - 3,3,3,3,3,3,3,2,0,2,3,3,3,3,3,3,2,3,3,3, - 3,2,2,3,3,2,2,0,3,2,3,3,3,0,2,0,0,2,0,0,2, - 3,3,3,2,2,3,3,3,3,3,3,0,3,3,3,3,3,3,0,3, - 3,3,3,0,0,3,3,3,0,3,3,0,3,0,3,2,2,0,2,2,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3, - 0,2,3,0,2,3,3,2,0,3,3,0,3,0,2,2,2,2,2,0,2, - 3,3,3,3,3,2,2,3,2,3,3,3,3,3,2,2,2,3,3,3, - 3,2,2,3,3,2,0,3,3,3,0,3,2,0,0,2,2,2,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2, - 3,2,3,0,2,2,0,0,2,0,2,2,2,2,0,2,2,0,2,0,0, - 3,3,3,3,3,2,2,0,2,3,3,3,3,3,2,3,0,2,3,3, - 3,2,2,3,3,2,2,2,3,3,0,3,0,0,0,2,0,2,0,0,0, - 3,3,3,3,3,3,3,0,2,3,3,3,2,3,2,2,2,2,3,0, - 3,2,2,3,2,2,0,3,2,2,2,3,2,0,2,2,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,2,2,2,3,3,2,3,3,2,3,3, - 3,0,3,0,3,3,2,0,3,2,2,3,3,0,0,2,2,2,2,2,2, - 3,3,3,3,3,3,2,2,2,2,2,3,3,3,2,2,2,2,3,3, - 3,0,2,0,3,2,2,0,3,3,2,3,2,0,0,2,0,2,0,0,0, - 0,2,3,0,2,3,3,3,3,3,3,2,3,0,3,2,3,3,0,3, - 0,3,2,0,0,3,3,2,0,2,0,0,2,0,0,0,0,0,2,0,0, - 3,3,3,3,3,3,2,3,0,3,3,0,2,3,3,3,2,2,3,2, - 3,2,3,0,3,2,2,2,3,0,2,3,2,0,0,0,0,2,0,0,0, - 2,2,3,3,3,3,3,3,3,3,3,0,3,2,3,3,3,3,3,3, - 2,3,3,0,0,3,3,2,0,3,2,0,2,0,2,2,2,0,2,2,0, - 3,3,3,3,3,3,2,2,2,2,2,3,3,2,2,3,2,3,2,2, - 2,0,2,0,2,0,0,0,0,0,2,2,0,0,0,2,0,0,0,0,2, - 3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,2,3,2,3,2, - 3,0,2,3,3,2,2,2,2,2,2,3,2,0,2,2,2,0,0,0,0, - 3,3,3,3,3,2,2,0,3,3,3,3,2,3,2,2,2,2,3,2, - 3,2,3,3,3,2,3,2,2,2,2,3,2,0,0,2,0,2,0,0,0, - 3,3,3,3,3,3,2,3,2,3,3,2,2,3,2,2,2,0,3,0, - 3,2,2,0,2,2,2,2,3,0,2,2,0,0,0,0,2,2,2,0,0, - 0,0,3,0,0,3,3,3,2,3,3,0,3,0,3,3,3,3,0,3, - 0,2,2,0,0,2,3,2,0,3,2,0,0,0,0,2,0,0,0,2,0, - 3,3,3,3,3,3,2,3,3,2,3,3,3,3,2,3,3,3,3,2, - 3,2,2,0,2,2,0,2,2,2,2,2,0,2,0,2,0,2,0,0,0, - 2,2,3,2,2,3,3,3,3,2,3,2,3,2,3,2,3,3,0,3, - 0,2,3,0,0,2,3,2,0,3,2,0,2,2,2,0,0,0,2,0,0, - 2,3,3,3,3,2,3,2,2,2,2,3,2,2,2,2,3,2,2,2, - 0,2,2,0,0,2,0,0,0,3,2,2,0,2,0,2,0,2,0,2,0, - 3,3,3,3,3,3,3,3,0,3,3,3,2,3,2,2,2,2,3,2, - 3,3,2,3,2,2,0,2,3,2,0,2,0,0,0,2,0,2,0,0,0, - 0,0,3,2,0,3,3,2,3,3,3,0,3,0,3,3,2,3,0,2, - 0,3,0,0,0,2,3,3,0,3,0,0,0,0,0,2,0,0,2,2,0, - 2,0,3,0,0,3,2,2,2,2,2,0,3,0,0,2,3,3,0,3, + 2,2,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2, + 2,3,3,0,0,3,3,3,0,2,3,3,0,0,3,2,2,0,2,0,0, + 3,2,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,2, + 2,3,3,0,0,3,3,3,0,3,3,3,2,2,3,2,2,1,2,2,2, + 3,3,3,3,3,3,3,2,2,2,2,3,3,3,3,3,2,2,3,3, + 3,2,2,3,3,3,2,0,3,2,3,3,3,0,2,1,0,2,0,2,0, + 2,2,3,2,2,3,3,3,3,3,3,3,0,3,3,3,3,3,3,0, + 3,3,3,0,0,3,3,3,0,3,3,3,0,0,2,2,2,0,2,0,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3, + 2,2,3,0,2,3,3,2,0,3,3,3,0,0,2,2,2,2,2,2,0, + 3,3,3,3,3,2,3,3,3,2,3,3,3,2,2,3,2,3,3,3, + 3,2,2,3,3,1,0,3,3,3,2,2,3,0,0,2,2,2,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,2,3, + 3,2,3,0,2,2,0,0,1,0,2,2,2,2,0,2,0,0,2,0,0, + 3,3,3,3,3,2,3,0,3,2,3,3,3,3,2,3,0,2,3,3, + 3,2,2,3,3,2,2,2,3,3,0,0,3,0,0,0,2,2,0,0,0, + 3,3,3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,2,3,3, + 3,0,3,0,3,3,2,0,3,3,3,2,3,0,2,2,2,2,0,2,0, + 3,3,3,3,3,3,3,2,3,2,3,2,3,2,2,3,2,2,0,3, + 2,2,2,3,2,2,0,3,2,2,0,0,3,0,0,0,2,0,0,0,0, + 3,3,3,3,3,3,3,2,2,2,3,3,3,2,2,3,3,2,3,3, + 3,0,2,0,3,2,2,0,3,3,2,2,3,0,0,0,2,2,0,0,0, + 3,3,3,3,3,3,2,3,3,2,3,2,0,3,3,3,2,2,2,3, + 3,2,2,0,3,2,2,2,3,0,2,2,3,0,0,0,0,2,0,0,0, + 0,2,3,2,2,3,3,3,3,3,3,3,0,3,3,0,3,3,3,0, + 0,3,2,0,0,3,3,2,0,2,2,0,0,0,0,0,0,0,2,0,0, + 3,3,3,3,3,2,3,3,3,3,3,3,3,2,3,3,3,2,2,3, + 3,2,2,3,3,2,2,2,2,2,2,2,3,0,2,2,2,0,0,0,0, + 3,3,3,3,3,3,2,2,2,2,2,3,3,3,2,2,2,3,2,2, + 2,0,2,0,2,0,0,0,0,0,0,2,2,0,0,0,2,0,0,2,0, + 2,2,3,3,2,3,3,3,3,3,3,3,0,3,3,2,3,3,3,3, + 2,3,3,0,0,3,3,2,0,3,2,2,0,0,2,2,2,0,2,0,2, + 3,3,3,3,3,2,2,0,3,3,3,2,3,2,2,3,2,2,2,3, + 3,1,3,3,3,2,3,2,2,2,2,2,3,0,0,0,0,2,0,0,0, + 3,3,3,3,3,3,2,3,3,2,3,2,2,2,2,3,2,0,0,3, + 3,2,2,0,2,2,2,2,3,0,0,2,2,2,0,2,0,2,2,0,0, + 3,3,3,3,3,3,2,3,2,3,3,3,3,3,2,3,3,3,2,3, + 3,2,2,0,2,2,0,2,2,2,0,0,2,2,0,2,2,2,0,0,0, + 0,0,3,2,0,3,3,3,3,3,3,3,0,3,3,0,3,3,3,0, + 0,2,2,0,0,2,3,2,0,3,0,2,0,0,0,0,2,0,0,0,0, + 2,2,3,2,2,3,3,3,2,3,3,3,2,2,3,2,3,3,3,0, + 0,2,3,0,0,2,3,2,0,3,2,1,0,2,0,0,0,0,2,0,0, + 2,3,3,3,3,2,3,2,2,2,2,2,3,2,2,2,3,2,2,3, + 0,2,2,0,0,2,0,0,0,3,0,2,2,2,0,0,2,2,0,0,2, + 3,3,3,3,3,3,3,2,3,0,3,2,3,2,2,3,2,2,2,3, + 3,3,2,3,2,2,0,2,3,2,0,0,2,2,0,0,2,2,0,0,0, + 0,0,3,2,0,3,3,2,3,3,3,3,0,3,3,0,3,3,2,0, + 0,3,2,0,0,2,3,3,0,2,0,0,0,0,0,0,2,0,2,0,2, + 2,0,3,0,0,3,2,2,2,2,2,3,0,2,0,0,3,3,2,0, 0,0,0,0,0,3,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,2,0,0,2,3,3,3,3,2,0,0,0,3,0, - 0,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, - 3,3,3,2,3,2,2,0,0,2,2,3,2,2,2,3,2,0,3,0, - 0,0,2,0,0,0,0,0,0,3,0,2,0,0,0,2,0,0,0,2,0, - 2,3,2,3,3,0,2,0,0,0,0,3,2,2,0,0,0,0,2,2, - 0,0,2,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, - 0,0,2,0,0,3,3,3,2,3,2,0,2,2,3,2,3,2,0,3, - 0,2,3,0,0,2,2,2,0,3,2,0,0,0,0,0,0,0,0,0,0, - 2,3,3,3,3,3,2,2,2,0,3,3,3,3,0,0,0,0,2,0, + 3,3,3,3,3,3,2,0,2,0,3,3,3,0,2,3,0,0,0,3, + 0,0,3,0,0,0,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0, + 3,3,3,2,3,2,2,0,2,0,2,2,3,3,2,2,2,0,0,3, + 0,0,2,0,0,0,0,0,0,2,0,0,2,0,0,0,2,0,0,0,2, + 2,3,2,3,3,0,2,0,0,0,0,2,3,0,0,2,0,0,2,2, + 0,0,2,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, + 0,0,2,0,0,3,3,3,2,2,2,2,0,2,3,2,3,2,3,0, + 0,2,3,0,0,2,2,2,0,3,0,1,0,0,0,0,0,0,0,0,0, + 2,3,3,3,3,3,2,2,0,2,3,3,3,0,0,3,0,0,0,2, 0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,2,0,0, - 3,3,2,3,3,2,2,0,2,3,3,2,0,3,0,2,2,0,2,2, - 3,0,0,0,2,0,0,0,2,0,2,2,0,2,0,0,0,2,0,0,0, - 0,0,2,2,0,0,3,3,0,2,2,0,2,0,2,2,3,2,0,3, - 0,2,2,0,0,2,3,2,0,0,0,0,0,0,0,2,0,0,0,0,0, - 3,3,3,3,3,2,2,2,2,3,3,0,0,2,2,2,2,2,2,0, + 3,3,3,3,3,2,2,2,3,2,3,1,0,2,2,3,2,2,0,2, 2,0,0,0,2,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0, - 0,0,2,0,0,2,3,2,2,2,2,0,2,0,2,2,2,2,0,3, - 0,2,2,0,0,3,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0, - 2,2,2,2,3,3,0,0,3,2,2,2,2,2,2,2,2,0,2,0, - 2,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,2,0,0,0,0, - 2,0,0,2,0,0,2,0,0,0,0,0,2,3,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, - 2,2,2,2,3,0,2,0,0,0,2,0,2,2,2,0,0,2,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0, - 0,0,2,0,0,0,2,0,0,0,2,0,0,0,0,2,2,0,0,3, - 0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, - 2,0,0,2,0,0,0,0,0,0,0,0,2,2,0,0,0,0,2,0, + 3,3,2,3,3,2,2,0,3,2,3,0,1,2,2,3,2,0,2,2, + 3,2,2,0,2,0,0,0,2,0,2,2,2,2,0,0,0,2,0,0,0, + 0,0,2,0,0,0,3,3,2,0,2,2,0,2,2,0,3,2,3,0, + 0,2,2,0,0,2,3,2,0,0,0,0,0,0,0,0,1,0,0,0,0, + 0,0,2,0,0,2,3,2,2,2,2,2,0,2,2,0,2,2,3,0, + 0,2,2,0,0,3,2,2,0,0,0,2,0,0,0,0,0,0,0,0,0, + 2,2,2,2,3,3,0,0,2,2,2,2,2,2,2,2,2,0,0,2, + 2,0,0,0,0,0,0,0,0,0,0,0,2,0,2,2,0,0,0,0,0, + 2,2,2,2,3,0,2,0,2,0,0,2,0,0,2,2,0,2,0,0, + 0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0, + 2,0,0,0,0,0,2,0,0,0,0,2,0,0,0,3,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, + 0,0,2,0,0,2,2,0,0,0,2,0,0,2,0,0,2,0,3,0, + 0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0, + 2,0,0,2,0,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,0,0,2,0,2,0,0,0,0,0,0,0,2,0,0,2,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0, + 2,0,0,2,0,2,0,0,0,0,0,0,0,0,0,2,2,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, }; -const SequenceModel Windows_1250CzechModel = +const SequenceModel Iso_8859_2CzechModel = { - Windows_1250_CharToOrderMap, + Iso_8859_2_CharToOrderMap, CzechLangModel, 41, - (float)0.9786035192432675, + (float)0.9751874547460189, PR_TRUE, - "WINDOWS-1250", + "ISO-8859-2", "cs" }; -const SequenceModel Mac_CentraleuropeCzechModel = +const SequenceModel Windows_1250CzechModel = { - Mac_Centraleurope_CharToOrderMap, + Windows_1250_CharToOrderMap, CzechLangModel, 41, - (float)0.9786035192432675, + (float)0.9751874547460189, PR_TRUE, - "MAC-CENTRALEUROPE", + "WINDOWS-1250", "cs" }; @@ -267,19 +284,29 @@ const SequenceModel Ibm852CzechModel = Ibm852_CharToOrderMap, CzechLangModel, 41, - (float)0.9786035192432675, + (float)0.9751874547460189, PR_TRUE, "IBM852", "cs" }; -const SequenceModel Iso_8859_2CzechModel = +const SequenceModel Mac_CentraleuropeCzechModel = { - Iso_8859_2_CharToOrderMap, + Mac_Centraleurope_CharToOrderMap, CzechLangModel, 41, - (float)0.9786035192432675, + (float)0.9751874547460189, PR_TRUE, - "ISO-8859-2", + "MAC-CENTRALEUROPE", "cs" }; + +const LanguageModel CzechModel = +{ + "cs", + Unicode_CharOrder, + 82, + CzechLangModel, + 41, + (float)0.9751874547460189, +}; diff --git a/src/LangModels/LangEsperantoModel.cpp b/src/LangModels/LangEsperantoModel.cpp index 1d55ec7..e0b8fed 100644 --- a/src/LangModels/LangEsperantoModel.cpp +++ b/src/LangModels/LangEsperantoModel.cpp @@ -36,12 +36,13 @@ * ***** END LICENSE BLOCK ***** */ #include "../nsSBCharSetProber.h" +#include "../nsLanguageDetector.h" /********* Language model for: Esperanto *********/ /** * Generated by BuildLangModel.py - * On: 2015-12-04 01:27:38.177516 + * On: 2021-03-16 18:54:42.163514 **/ /* Character Mapping Table: @@ -67,66 +68,76 @@ static const unsigned char Iso_8859_3_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 18, 17, 10, 2, 19, 15, 21, 3, 11, 9, 7, 13, 4, 1, /* 4X */ - 14, 32, 5, 8, 6, 12, 16, 27, 33, 25, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 18, 17, 10, 2, 19, 15, 21, 3, 11, 9, 7, 13, 4, 1, /* 6X */ - 14, 32, 5, 8, 6, 12, 16, 27, 33, 25, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 0, 18, 17, 10, 2, 19, 15, 20, 3, 11, 9, 7, 13, 4, 1, /* 4X */ + 14, 34, 5, 8, 6, 12, 16, 25, 33, 26, 21,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 18, 17, 10, 2, 19, 15, 20, 3, 11, 9, 7, 13, 4, 1, /* 6X */ + 14, 34, 5, 8, 6, 12, 16, 25, 33, 26, 21,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM, 56,SYM,SYM,SYM,ILL, 34,SYM,SYM, 57, 53, 58, 28,SYM,ILL, 40, /* AX */ - SYM, 59,SYM,SYM,SYM,SYM, 34,SYM,SYM, 60, 53, 61, 28,SYM,ILL, 40, /* BX */ - 44, 29, 46,ILL, 43, 62, 24, 38, 41, 31, 48, 50, 54, 35, 49, 52, /* CX */ - ILL, 42, 63, 30, 47, 64, 36,SYM, 22, 51, 39, 55, 37, 23, 26, 45, /* DX */ - 44, 29, 46,ILL, 43, 65, 24, 38, 41, 31, 48, 50, 54, 35, 49, 52, /* EX */ - ILL, 42, 66, 30, 47, 67, 36,SYM, 22, 51, 39, 55, 37, 23, 26,SYM, /* FX */ + SYM, 55,SYM,SYM,SYM,ILL, 31,SYM,SYM, 56, 51, 57, 28,SYM,ILL, 41, /* AX */ + SYM, 58,SYM,SYM,SYM,SYM, 31,SYM,SYM, 53, 51, 59, 28,SYM,ILL, 41, /* BX */ + 46, 29, 50,ILL, 39, 60, 24, 40, 38, 30, 48, 49, 61, 36, 47, 54, /* CX */ + ILL, 42, 52, 32, 45, 62, 35,SYM, 22, 63, 44, 64, 37, 23, 27, 43, /* DX */ + 46, 29, 50,ILL, 39, 65, 24, 40, 38, 30, 48, 49, 66, 36, 47, 54, /* EX */ + ILL, 42, 52, 32, 45, 67, 35,SYM, 22, 68, 44, 69, 37, 23, 27,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ +static const int Unicode_Char_size = 64; +static const unsigned int Unicode_CharOrder[] = +{ + 65, 0, 66, 18, 67, 17, 68, 10, 69, 2, 70, 19, 71, 15, 72, 20, + 73, 3, 74, 11, 75, 9, 76, 7, 77, 13, 78, 4, 79, 1, 80, 14, + 82, 5, 83, 8, 84, 6, 85, 12, 86, 16, 87, 25, 89, 26, 90, 21, + 97, 0, 98, 18, 99, 17, 100, 10, 101, 2, 102, 19, 103, 15,104, 20, + 105, 3, 106, 11, 107, 9, 108, 7, 109, 13, 110, 4, 111, 1,112, 14, + 114, 5, 115, 8, 116, 6, 117, 12, 118, 16, 119, 25, 121, 26,122, 21, + 193, 29, 201, 30, 225, 29, 233, 30, 264, 24, 265, 24, 284, 22,285, 22, + 292, 31, 293, 31, 308, 28, 309, 28, 348, 27, 349, 27, 364, 23,365, 23, +}; + /* Model Table: - * Total sequences: 989 - * First 512 sequences: 0.9942980632768038 - * Next 512 sequences (512-1024): 0.0057019367231962385 - * Rest: -5.0306980803327406e-17 + * Total sequences: 1066 + * First 512 sequences: 0.995442680189542 + * Next 512 sequences (512-1024): 0.0044874885692908805 + * Rest: 6.983124116715766e-05 * Negative sequences: TODO */ static const PRUint8 EsperantoLangModel[] = { - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,0,2,3,3, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,0,0,0,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,0,0,2,3,3, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,3,2,2,2,3,0,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,2,3,2,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,0,3,3,3,2,2,2, - 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,2,3,3,3,3,0,0,2,3,2,2,2,3,3,2,0,2,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,2,2,3,2,2,0,3,3,3,2,0,2, - 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3,0,0,0,3,0,2,0,3,2,3,2,2,0, - 3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,2,3,3,2,3,3,3,0,0,0,3,2,0,2,3,2,2,0,0,0, - 3,3,3,3,3,3,2,3,3,2,3,2,3,3,3,3,3,2,2,2,3,3,0,0,2,3,0,3,2,2,2,2,0,0,0, - 3,3,3,3,3,3,3,3,3,2,3,2,3,3,2,2,0,2,2,2,2,2,2,0,0,0,0,0,0,3,3,2,0,2,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,3,2,3,2,0,0,0,2,0,2,2, - 3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,2,3,3,3,2,0,0,0,2,3,2,2,0,3,2,2,0,0,0, - 3,3,3,3,2,3,3,3,3,2,2,2,3,2,3,2,0,2,2,2,2,3,0,0,0,2,2,0,0,3,2,2,0,0,0, - 3,3,3,3,3,3,2,3,3,2,3,0,3,3,2,2,3,2,2,2,2,3,0,2,2,3,2,2,2,2,2,3,0,2,0, - 3,3,3,3,2,3,2,2,2,2,2,3,3,2,2,2,0,0,2,0,2,2,0,0,2,2,0,0,0,3,2,2,0,0,0, - 3,3,3,3,0,3,3,3,3,3,2,0,3,2,2,2,0,3,2,2,3,3,0,0,0,3,0,0,0,2,2,2,2,2,2, - 3,3,3,3,3,3,3,3,3,2,3,2,3,2,2,2,2,2,3,2,0,2,0,0,0,3,2,0,0,3,3,3,0,0,0, - 3,3,3,3,0,3,3,3,2,2,2,2,3,3,2,3,2,0,2,3,0,0,0,0,0,2,0,0,0,0,0,2,0,3,0, - 3,3,3,3,3,2,2,3,3,3,2,2,3,2,2,2,2,3,3,2,2,0,0,0,0,3,2,2,0,2,2,2,2,0,0, - 3,3,3,3,3,3,3,3,2,2,2,0,3,3,2,0,2,0,2,2,0,2,0,0,0,2,0,2,0,2,2,2,0,2,0, - 3,3,3,3,0,0,2,3,0,0,2,2,3,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,2,3,3,2,3,3,3,3,3,3,2,2,2,2,3,2,0,2,2,3,2,0,0,2,0,3,0,0,0,0,0,0,0,0, - 3,3,3,3,0,0,2,2,0,2,3,2,3,3,0,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,2,3,3,2,3,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,0,2,0,2,0,0,0, - 3,3,3,3,2,2,3,2,0,2,0,2,3,2,2,0,3,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,2,2,2,2,3,2,0,0,2,0,0,0,0,0,0,2,0,2,0,0,0,2,0,3,0,0,2,0,0,0,0, - 3,3,2,2,2,2,0,2,0,2,0,0,3,0,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,2,0,3,3,3,3,3,2,3,0,0,2,2,2,2,3,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,3,3,2,2,2,2,2,2,0,0,2,2,2,0,2,2,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0, - 2,2,2,0,3,3,3,3,3,2,2,0,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,2,0,0, - 2,0,0,2,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0, - 2,2,2,3,0,0,2,2,0,0,0,0,2,2,2,2,0,0,0,2,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0, - 3,3,3,2,2,0,2,0,0,0,2,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,0,1,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,0,3, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,3,3,2,3,2,2,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,3,2,2,2,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,2,2,3,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,2,3,3,2,2,2,2,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,3,2,3,3,0, + 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3,0,0,2,2,3,2,0,3,3,0, + 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,2,0,0,3,2,2,0,3,2,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,3,0,0,2,3,3,0,0,2,2,0, + 3,3,3,3,3,3,3,3,3,3,2,1,3,3,3,2,2,2,2,2,2,2,2,2,2,0,0,0,0,3,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,0,3,2,2,3,2,2,0,2, + 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,2,2,3,3,2,2,0,0,2,2,3,2,0,3,3,0, + 3,3,3,3,2,3,3,3,3,2,2,2,3,2,3,0,0,2,2,2,3,0,0,1,0,0,2,0,0,3,3,0, + 3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,2,3,2,3,2,0,2,0,2,3,2,2,2,2,2, + 3,3,3,3,2,3,2,3,3,2,2,3,3,2,2,2,2,2,2,2,1,2,0,0,2,0,2,2,0,3,1,0, + 3,3,3,3,2,3,3,3,3,3,2,2,3,2,2,2,2,3,2,2,3,3,0,0,2,1,3,0,0,2,2,2, + 3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,2,2,2,3,0,2,2,2,0,0,2,3,2,0,2,2,0, + 3,3,3,3,2,3,3,3,2,2,2,2,3,3,2,2,2,0,2,3,2,0,0,0,0,0,2,0,0,2,2,0, + 3,3,3,3,3,3,3,3,2,2,2,2,3,3,2,2,0,2,2,2,2,0,0,2,0,3,2,0,0,2,2,0, + 3,3,3,3,3,1,2,3,3,3,2,2,3,2,2,2,2,2,2,2,2,3,0,0,0,2,3,1,0,2,2,0, + 3,3,3,3,0,2,2,3,2,2,2,2,3,2,2,2,1,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0, + 3,2,3,3,3,3,3,3,3,3,3,2,3,2,2,3,2,0,2,2,0,3,0,0,2,0,0,3,0,0,0,0, + 3,3,3,3,0,2,0,2,0,2,3,0,3,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,2,3,2,3,2,2,0,2,2,2,0,0,2,0,2,2,2,0,0,0,2,2,0,0,0,0,0, + 3,3,3,3,3,2,2,2,3,2,2,2,2,2,2,2,2,3,2,2,2,2,0,0,0,2,0,0,0,2,1,0, + 3,3,3,3,2,2,3,2,0,2,2,2,3,0,2,0,3,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0, + 3,3,2,2,2,2,0,2,0,0,0,0,3,2,0,2,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,1,2,3,3,3,3,3,2,2,0,0,2,1,2,2,2,2,0,0,2,0,0,0,0,2,0,0,0,0,0, + 2,2,2,2,3,3,3,2,2,2,2,1,0,2,2,2,2,2,2,0,2,2,0,0,0,0,0,0,0,0,2,0, + 3,3,3,3,2,0,2,2,0,0,1,0,2,2,0,0,0,0,2,0,0,0,0,1,0,0,0,0,0,0,0,0, }; @@ -134,9 +145,19 @@ const SequenceModel Iso_8859_3EsperantoModel = { Iso_8859_3_CharToOrderMap, EsperantoLangModel, - 35, - (float)0.9942980632768038, + 32, + (float)0.995442680189542, PR_FALSE, "ISO-8859-3", "eo" }; + +const LanguageModel EsperantoModel = +{ + "eo", + Unicode_CharOrder, + 64, + EsperantoLangModel, + 32, + (float)0.995442680189542, +}; diff --git a/src/LangModels/LangEstonianModel.cpp b/src/LangModels/LangEstonianModel.cpp index 71d9c66..f1ed29c 100644 --- a/src/LangModels/LangEstonianModel.cpp +++ b/src/LangModels/LangEstonianModel.cpp @@ -36,12 +36,13 @@ * ***** END LICENSE BLOCK ***** */ #include "../nsSBCharSetProber.h" +#include "../nsLanguageDetector.h" /********* Language model for: Estonian *********/ /** * Generated by BuildLangModel.py - * On: 2016-09-26 23:47:54.476870 + * On: 2021-03-16 19:01:52.571827 **/ /* Character Mapping Table: @@ -67,39 +68,39 @@ static const unsigned char Iso_8859_4_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 19, 23, 10, 2, 22, 15, 16, 1, 17, 8, 5, 12, 7, 9, /* 4X */ - 14, 28, 11, 3, 4, 6, 13, 27, 26, 25, 30,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 19, 23, 10, 2, 22, 15, 16, 1, 17, 8, 5, 12, 7, 9, /* 6X */ - 14, 28, 11, 3, 4, 6, 13, 27, 26, 25, 30,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 0, 20, 23, 10, 2, 22, 15, 16, 1, 17, 8, 5, 12, 7, 9, /* 4X */ + 14, 29, 11, 3, 4, 6, 13, 27, 26, 25, 28,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 20, 23, 10, 2, 22, 15, 16, 1, 17, 8, 5, 12, 7, 9, /* 6X */ + 14, 29, 11, 3, 4, 6, 13, 27, 26, 25, 28,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM, 55, 56, 57,SYM, 58, 59,SYM,SYM, 29, 45, 60, 61,SYM, 32,SYM, /* AX */ - SYM, 62,SYM, 63,SYM, 64, 65,SYM,SYM, 29, 45, 66, 67, 68, 32, 69, /* BX */ - 37, 43, 70, 71, 18, 44, 47, 72, 73, 33, 74, 75, 76, 36, 77, 39, /* CX */ - 78, 79, 31, 80, 81, 20, 24,SYM, 38, 82, 52, 83, 21, 84, 34, 85, /* DX */ - 37, 43, 86, 87, 18, 44, 47, 88, 89, 33, 90, 91, 92, 36, 93, 39, /* EX */ - 94, 95, 31, 96, 97, 20, 24,SYM, 38, 98, 52, 99, 21,100, 34,SYM, /* FX */ + SYM, 55, 56, 57,SYM, 58, 59,SYM,SYM, 30, 47, 60, 61,SYM, 33,SYM, /* AX */ + SYM, 62,SYM, 63,SYM, 64, 65,SYM,SYM, 30, 47, 66, 67, 68, 33, 69, /* BX */ + 37, 44, 70, 71, 18, 43, 45, 72, 73, 31, 74, 75, 76, 36, 77, 41, /* CX */ + 78, 79, 32, 80, 81, 19, 24,SYM, 39, 82, 53, 83, 21, 84, 34, 85, /* DX */ + 37, 44, 86, 87, 18, 43, 45, 88, 89, 31, 90, 91, 92, 36, 93, 41, /* EX */ + 94, 95, 32, 96, 97, 19, 24,SYM, 39, 98, 53, 99, 21,100, 34,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const unsigned char Windows_1252_CharToOrderMap[] = +static const unsigned char Iso_8859_13_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 19, 23, 10, 2, 22, 15, 16, 1, 17, 8, 5, 12, 7, 9, /* 4X */ - 14, 28, 11, 3, 4, 6, 13, 27, 26, 25, 30,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 19, 23, 10, 2, 22, 15, 16, 1, 17, 8, 5, 12, 7, 9, /* 6X */ - 14, 28, 11, 3, 4, 6, 13, 27, 26, 25, 30,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM,ILL,SYM,101,SYM,SYM,SYM,SYM,SYM,SYM, 29,SYM,102,ILL, 32,ILL, /* 8X */ - ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 29,SYM,103,ILL, 32,104, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 50,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 40, 43,105,106, 18, 44, 47, 48, 41, 33,107,108, 35, 36,109,110, /* CX */ - 46,111, 53, 42,112, 20, 24,SYM, 38, 54, 52,113, 21,114,115,116, /* DX */ - 40, 43,117,118, 18, 44, 47, 48, 41, 33,119,120, 35, 36,121,122, /* EX */ - 46,123, 53, 42,124, 20, 24,SYM, 38, 54, 52,125, 21,126,127,128, /* FX */ + SYM, 0, 20, 23, 10, 2, 22, 15, 16, 1, 17, 8, 5, 12, 7, 9, /* 4X */ + 14, 29, 11, 3, 4, 6, 13, 27, 26, 25, 28,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 20, 23, 10, 2, 22, 15, 16, 1, 17, 8, 5, 12, 7, 9, /* 6X */ + 14, 29, 11, 3, 4, 6, 13, 27, 26, 25, 28,SYM,SYM,SYM,SYM,CTR, /* 7X */ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 39,SYM,101,SYM,SYM,SYM,SYM, 45, /* AX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 39,SYM,102,SYM,SYM,SYM,SYM, 45, /* BX */ + 103,104, 37,105, 18, 43,106, 47,107, 31,108,109,110,111, 41,112, /* CX */ + 30,113,114, 42, 32, 19, 24,SYM,115, 54,116, 34, 21, 51, 33,117, /* DX */ + 118,119, 37,120, 18, 43,121, 47,122, 31,123,124,125,126, 41,127, /* EX */ + 30,128,129, 42, 32, 19, 24,SYM,130, 54,131, 34, 21, 51, 33,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -109,39 +110,39 @@ static const unsigned char Iso_8859_15_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 19, 23, 10, 2, 22, 15, 16, 1, 17, 8, 5, 12, 7, 9, /* 4X */ - 14, 28, 11, 3, 4, 6, 13, 27, 26, 25, 30,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 19, 23, 10, 2, 22, 15, 16, 1, 17, 8, 5, 12, 7, 9, /* 6X */ - 14, 28, 11, 3, 4, 6, 13, 27, 26, 25, 30,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 0, 20, 23, 10, 2, 22, 15, 16, 1, 17, 8, 5, 12, 7, 9, /* 4X */ + 14, 29, 11, 3, 4, 6, 13, 27, 26, 25, 28,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 20, 23, 10, 2, 22, 15, 16, 1, 17, 8, 5, 12, 7, 9, /* 6X */ + 14, 29, 11, 3, 4, 6, 13, 27, 26, 25, 28,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM, 29,SYM, 29,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM, 32, 50,SYM,SYM, 32,SYM,SYM,SYM,129,130,131,SYM, /* BX */ - 40, 43,132,133, 18, 44, 47, 48, 41, 33,134,135, 35, 36,136,137, /* CX */ - 46,138, 53, 42,139, 20, 24,SYM, 38, 54, 52,140, 21,141,142,143, /* DX */ - 40, 43,144,145, 18, 44, 47, 48, 41, 33,146,147, 35, 36,148,149, /* EX */ - 46,150, 53, 42,151, 20, 24,SYM, 38, 54, 52,152, 21,153,154,155, /* FX */ + SYM,SYM,SYM,SYM,SYM,SYM, 30,SYM, 30,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ + SYM,SYM,SYM,SYM, 33, 52,SYM,SYM, 33,SYM,SYM,SYM,132,133,134,SYM, /* BX */ + 38, 44,135,136, 18, 43, 45, 50, 40, 31,137,138, 35, 36,139,140, /* CX */ + 46,141, 49, 42,142, 19, 24,SYM, 39, 48, 53,143, 21,144,145,146, /* DX */ + 38, 44,147,148, 18, 43, 45, 50, 40, 31,149,150, 35, 36,151,152, /* EX */ + 46,153, 49, 42,154, 19, 24,SYM, 39, 48, 53,155, 21,156,157,158, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const unsigned char Iso_8859_13_CharToOrderMap[] = +static const unsigned char Windows_1252_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 19, 23, 10, 2, 22, 15, 16, 1, 17, 8, 5, 12, 7, 9, /* 4X */ - 14, 28, 11, 3, 4, 6, 13, 27, 26, 25, 30,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 19, 23, 10, 2, 22, 15, 16, 1, 17, 8, 5, 12, 7, 9, /* 6X */ - 14, 28, 11, 3, 4, 6, 13, 27, 26, 25, 30,SYM,SYM,SYM,SYM,CTR, /* 7X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 38,SYM,156,SYM,SYM,SYM,SYM, 47, /* AX */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 38,SYM,157,SYM,SYM,SYM,SYM, 47, /* BX */ - 158,159, 37,160, 18, 44,161, 45,162, 33,163,164,165,166, 39,167, /* CX */ - 29,168,169, 42, 31, 20, 24,SYM,170, 51,171, 34, 21, 49, 32,172, /* DX */ - 173,174, 37,175, 18, 44,176, 45,177, 33,178,179,180,181, 39,182, /* EX */ - 29,183,184, 42, 31, 20, 24,SYM,185, 51,186, 34, 21, 49, 32,SYM, /* FX */ + SYM, 0, 20, 23, 10, 2, 22, 15, 16, 1, 17, 8, 5, 12, 7, 9, /* 4X */ + 14, 29, 11, 3, 4, 6, 13, 27, 26, 25, 28,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 20, 23, 10, 2, 22, 15, 16, 1, 17, 8, 5, 12, 7, 9, /* 6X */ + 14, 29, 11, 3, 4, 6, 13, 27, 26, 25, 28,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM,ILL,SYM,159,SYM,SYM,SYM,SYM,SYM,SYM, 30,SYM,160,ILL, 33,ILL, /* 8X */ + ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 30,SYM,161,ILL, 33,162, /* 9X */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ + SYM,SYM,SYM,SYM,SYM, 52,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 38, 44,163,164, 18, 43, 45, 50, 40, 31,165,166, 35, 36,167,168, /* CX */ + 46,169, 49, 42,170, 19, 24,SYM, 39, 48, 53,171, 21,172,173,174, /* DX */ + 38, 44,175,176, 18, 43, 45, 50, 40, 31,177,178, 35, 36,179,180, /* EX */ + 46,181, 49, 42,182, 19, 24,SYM, 39, 48, 53,183, 21,184,185,186, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -151,64 +152,79 @@ static const unsigned char Windows_1257_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 19, 23, 10, 2, 22, 15, 16, 1, 17, 8, 5, 12, 7, 9, /* 4X */ - 14, 28, 11, 3, 4, 6, 13, 27, 26, 25, 30,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 19, 23, 10, 2, 22, 15, 16, 1, 17, 8, 5, 12, 7, 9, /* 6X */ - 14, 28, 11, 3, 4, 6, 13, 27, 26, 25, 30,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 0, 20, 23, 10, 2, 22, 15, 16, 1, 17, 8, 5, 12, 7, 9, /* 4X */ + 14, 29, 11, 3, 4, 6, 13, 27, 26, 25, 28,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 20, 23, 10, 2, 22, 15, 16, 1, 17, 8, 5, 12, 7, 9, /* 6X */ + 14, 29, 11, 3, 4, 6, 13, 27, 26, 25, 28,SYM,SYM,SYM,SYM,CTR, /* 7X */ SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM,ILL,SYM,ILL,SYM,SYM,SYM, /* 8X */ ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,ILL,SYM,ILL,SYM,SYM,ILL, /* 9X */ - SYM,ILL,SYM,SYM,SYM,ILL,SYM,SYM, 38,SYM,187,SYM,SYM,SYM,SYM, 47, /* AX */ - SYM,SYM,SYM,SYM,SYM, 50,SYM,SYM, 38,SYM,188,SYM,SYM,SYM,SYM, 47, /* BX */ - 189,190, 37,191, 18, 44,192, 45,193, 33,194,195,196,197, 39,198, /* CX */ - 29,199,200, 42, 31, 20, 24,SYM,201, 51,202, 34, 21, 49, 32,203, /* DX */ - 204,205, 37,206, 18, 44,207, 45,208, 33,209,210,211,212, 39,213, /* EX */ - 29,214,215, 42, 31, 20, 24,SYM,216, 51,217, 34, 21, 49, 32,SYM, /* FX */ + SYM,ILL,SYM,SYM,SYM,ILL,SYM,SYM, 39,SYM,187,SYM,SYM,SYM,SYM, 45, /* AX */ + SYM,SYM,SYM,SYM,SYM, 52,SYM,SYM, 39,SYM,188,SYM,SYM,SYM,SYM, 45, /* BX */ + 189,190, 37,191, 18, 43,192, 47,193, 31,194,195,196,197, 41,198, /* CX */ + 30,199,200, 42, 32, 19, 24,SYM,201, 54,202, 34, 21, 51, 33,203, /* DX */ + 204,205, 37,206, 18, 43,207, 47,208, 31,209,210,211,212, 41,213, /* EX */ + 30,214,215, 42, 32, 19, 24,SYM,216, 54,217, 34, 21, 51, 33,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ +static const int Unicode_Char_size = 68; +static const unsigned int Unicode_CharOrder[] = +{ + 65, 0, 66, 20, 67, 23, 68, 10, 69, 2, 70, 22, 71, 15, 72, 16, + 73, 1, 74, 17, 75, 8, 76, 5, 77, 12, 78, 7, 79, 9, 80, 14, + 81, 29, 82, 11, 83, 3, 84, 4, 85, 6, 86, 13, 87, 27, 88, 26, + 89, 25, 90, 28, 97, 0, 98, 20, 99, 23, 100, 10, 101, 2,102, 22, + 103, 15, 104, 16, 105, 1, 106, 17, 107, 8, 108, 5, 109, 12,110, 7, + 111, 9, 112, 14, 113, 29, 114, 11, 115, 3, 116, 4, 117, 6,118, 13, + 119, 27, 120, 26, 121, 25, 122, 28, 196, 18, 201, 31, 213, 19,214, 24, + 220, 21, 228, 18, 233, 31, 245, 19, 246, 24, 252, 21, 332, 32,333, 32, + 352, 30, 353, 30, 381, 33, 382, 33, +}; + /* Model Table: - * Total sequences: 853 - * First 512 sequences: 0.9972721312183132 - * Next 512 sequences (512-1024): 0.0027278687816868537 - * Rest: -5.204170427930421e-18 + * Total sequences: 869 + * First 512 sequences: 0.9973685549586747 + * Next 512 sequences (512-1024): 0.002631445041325318 + * Rest: -3.122502256758253e-17 * Negative sequences: TODO */ static const PRUint8 EstonianLangModel[] = { - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,3,3,0,3,3,3,3,2,2,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,2,3,3,2,2,3,3,2,2,2,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,3,3,0,3,3,3,2,0,2,0,2, - 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,2,2,0,2,2,0, - 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,0,3,3,2,3,3,3,2,2,0,3,2,2,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,0,0,0,2,2,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,2,3,3,0,0,2,2,2,2,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,3,3,2,3,3,3,3,2,3,3,0,2,2,2,0,2, - 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,0,3,3,2,2,3,3,0,2,0,0,0,2,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,0,2,3,3,0,3,3,3,2,2,2,0,0, - 3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,2,2,3,0,2,0,3,0,0,0,2,2,2,0,0,0,3,3, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,0,2,0,0,0, - 3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,3,2,3,3,3,3,3,2,3,3,0,2,0,2,2,0,0, - 3,3,3,3,2,3,3,3,3,3,2,2,2,2,2,2,2,2,3,0,3,2,0,2,3,2,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,3,2,3,0,3,3,0,2,3,3,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,0,2,2,2,2,2,0,3,2,0,2,0,2,0,0, - 3,3,3,3,3,3,3,3,3,3,0,3,3,3,0,0,3,3,3,0,3,3,3,2,0,3,0,2,0,0,0,2,0, - 3,3,3,2,3,0,3,3,0,3,0,2,3,0,3,0,0,0,3,0,3,3,0,0,2,0,0,0,0,0,0,0,0, - 2,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,3,0,0,0,0,0,0,2,0,0,0,0,0,0, - 3,3,3,3,2,3,3,3,2,3,0,3,2,0,0,0,2,3,0,2,0,2,0,2,0,2,2,0,0,0,0,0,0, - 0,3,3,3,3,3,3,3,2,0,3,3,3,3,3,3,3,3,0,3,3,0,0,0,0,0,0,0,0,0,2,0,0, - 3,0,2,3,3,3,0,3,3,3,3,3,3,3,3,3,3,0,0,3,0,3,2,0,0,0,0,0,0,0,0,0,0, - 3,3,3,2,3,3,3,2,0,3,2,3,0,0,0,2,0,2,2,0,0,3,3,3,2,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,2,3,0,0,2,0,0,2,3,0,3,0,0,2,0,0,0,0, - 2,3,3,3,3,3,0,3,3,2,3,3,2,3,3,3,2,2,0,3,0,0,0,0,3,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,2,3,2,3,2,0,3,3,0,0,0,0,0,0,0,3,2,0,2,0,0,0,2,3,0, - 3,3,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,0,0,0,0,0,2,0,2,0,0,0,0,0,0,0, - 3,3,3,2,2,2,2,2,2,3,0,2,0,0,0,2,2,0,0,0,0,0,2,0,0,2,0,2,0,0,0,0,0, - 3,3,2,0,0,0,3,0,0,2,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0, - 2,3,3,0,0,2,3,2,2,2,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0, - 2,3,2,2,0,2,2,2,2,3,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0, - 0,0,0,2,2,2,2,2,2,0,0,0,2,0,0,2,2,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,3,0,0,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,0,3,3,3,3,3,2,0,0,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,2,3,3,2,2,3,3,3,2,2,2,0,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,0,2,3,3,2,2,0,0,0,2, + 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,2,2,2,0,0,2,0, + 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,0,3,2,3,3,3,2,2,2,0,3,0,2,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,2,0,2,2,0,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,3,2,2,3,0,0,2,2,0,2,2,2,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,2,3,3,3,2,2,3,0,3,2,2,3,0,2, + 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,0,3,2,2,3,3,0,2,0,0,0,0,2,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,3,2,3,3,0,3,3,3,2,2,2,0,0,0, + 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,2,3,0,2,0,3,0,0,0,2,2,2,0,0,0,0,2,3, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,2,2,2,2,0,0, + 3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,2,3,3,3,3,3,2,3,3,0,2,2,0,2,2,0,0, + 3,3,3,3,2,3,3,3,3,3,2,2,2,2,2,2,2,2,3,3,0,2,0,2,3,2,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,3,2,3,3,0,3,0,2,3,2,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,0,2,2,2,2,2,0,2,2,0,2,2,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,0,3,3,3,0,0,3,3,3,3,0,3,3,2,0,3,0,2,0,0,0,2,2,0, + 3,3,3,2,3,0,3,3,2,3,2,0,3,0,2,0,0,0,3,3,0,3,0,0,2,0,0,0,0,0,0,0,0,0, + 2,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,0,3,0,0,0,0,0,2,0,0,0,0,0,0,0, + 0,3,3,3,3,3,3,3,2,0,3,3,3,3,3,3,3,3,0,3,3,0,0,0,0,0,0,0,2,0,0,0,0,0, + 3,3,3,3,2,3,3,3,3,3,0,3,2,0,0,0,2,3,0,2,2,2,0,2,0,2,2,0,0,0,0,0,0,0, + 3,0,2,3,3,3,0,3,3,3,3,3,3,3,3,3,3,0,0,0,3,3,2,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,2,3,3,3,2,0,3,2,3,0,0,0,2,0,2,2,0,0,3,3,3,2,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,2,3,0,0,0,2,0,2,3,0,3,0,0,2,2,0,0,0,0, + 2,2,3,3,3,3,0,3,3,2,3,3,3,3,3,3,2,2,0,0,3,0,0,0,3,0,0,0,0,0,0,0,0,0, + 3,3,3,3,2,3,3,3,2,3,2,3,2,0,3,2,0,0,0,0,2,0,0,3,2,0,2,0,2,0,0,0,2,0, + 2,3,2,2,2,0,2,2,2,2,0,0,0,0,0,0,0,2,0,0,0,0,0,2,0,2,0,0,0,0,0,0,0,0, + 3,3,3,2,2,2,2,2,2,3,0,2,0,0,0,2,3,0,0,0,0,0,2,0,0,2,0,2,0,0,0,0,0,0, + 2,3,3,2,0,2,2,2,2,3,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,2,2,0,0,0,0,0, + 3,3,2,0,0,0,3,0,0,2,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0, + 2,3,3,0,0,2,3,2,2,2,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0, + 0,0,0,2,0,2,0,2,2,2,2,2,2,0,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,2,2,2,2,2,2,0,0,0,2,0,0,2,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, + 2,3,2,0,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, }; @@ -216,21 +232,21 @@ const SequenceModel Iso_8859_4EstonianModel = { Iso_8859_4_CharToOrderMap, EstonianLangModel, - 33, - (float)0.9972721312183132, + 34, + (float)0.9973685549586747, PR_TRUE, "ISO-8859-4", "et" }; -const SequenceModel Windows_1252EstonianModel = +const SequenceModel Iso_8859_13EstonianModel = { - Windows_1252_CharToOrderMap, + Iso_8859_13_CharToOrderMap, EstonianLangModel, - 33, - (float)0.9972721312183132, + 34, + (float)0.9973685549586747, PR_TRUE, - "WINDOWS-1252", + "ISO-8859-13", "et" }; @@ -238,21 +254,21 @@ const SequenceModel Iso_8859_15EstonianModel = { Iso_8859_15_CharToOrderMap, EstonianLangModel, - 33, - (float)0.9972721312183132, + 34, + (float)0.9973685549586747, PR_TRUE, "ISO-8859-15", "et" }; -const SequenceModel Iso_8859_13EstonianModel = +const SequenceModel Windows_1252EstonianModel = { - Iso_8859_13_CharToOrderMap, + Windows_1252_CharToOrderMap, EstonianLangModel, - 33, - (float)0.9972721312183132, + 34, + (float)0.9973685549586747, PR_TRUE, - "ISO-8859-13", + "WINDOWS-1252", "et" }; @@ -260,9 +276,19 @@ const SequenceModel Windows_1257EstonianModel = { Windows_1257_CharToOrderMap, EstonianLangModel, - 33, - (float)0.9972721312183132, + 34, + (float)0.9973685549586747, PR_TRUE, "WINDOWS-1257", "et" }; + +const LanguageModel EstonianModel = +{ + "et", + Unicode_CharOrder, + 68, + EstonianLangModel, + 34, + (float)0.9973685549586747, +}; diff --git a/src/LangModels/LangFinnishModel.cpp b/src/LangModels/LangFinnishModel.cpp index cbc9528..23f7c58 100644 --- a/src/LangModels/LangFinnishModel.cpp +++ b/src/LangModels/LangFinnishModel.cpp @@ -36,12 +36,13 @@ * ***** END LICENSE BLOCK ***** */ #include "../nsSBCharSetProber.h" +#include "../nsLanguageDetector.h" /********* Language model for: Finnish *********/ /** * Generated by BuildLangModel.py - * On: 2016-09-21 18:15:05.189948 + * On: 2021-03-16 19:06:31.129345 **/ /* Character Mapping Table: @@ -61,66 +62,66 @@ * ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1 * even though they are both used for French. Same for the euro sign. */ -static const unsigned char Iso_8859_15_CharToOrderMap[] = +static const unsigned char Iso_8859_1_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 19, 21, 18, 4, 23, 20, 14, 1, 15, 9, 6, 12, 2, 7, /* 4X */ + SYM, 0, 19, 21, 18, 4, 23, 20, 15, 1, 14, 9, 6, 11, 2, 7, /* 4X */ 16, 29, 10, 5, 3, 8, 13, 24, 26, 17, 25,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 19, 21, 18, 4, 23, 20, 14, 1, 15, 9, 6, 12, 2, 7, /* 6X */ + SYM, 0, 19, 21, 18, 4, 23, 20, 15, 1, 14, 9, 6, 11, 2, 7, /* 6X */ 16, 29, 10, 5, 3, 8, 13, 24, 26, 17, 25,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM, 27,SYM, 27,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM, 28, 61,SYM,SYM, 28,SYM,SYM,SYM, 62, 63, 64,SYM, /* BX */ - 49, 35, 65, 46, 11, 56, 39, 37, 40, 30, 51, 31, 66, 36, 67, 57, /* CX */ - 68, 58, 52, 33, 34, 59, 22,SYM, 69, 70, 38, 71, 32, 72, 73, 55, /* DX */ - 49, 35, 74, 46, 11, 56, 39, 37, 40, 30, 51, 31, 75, 36, 76, 57, /* EX */ - 77, 58, 52, 33, 34, 59, 22,SYM, 78, 79, 38, 80, 32, 81, 82, 83, /* FX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ + SYM,SYM,SYM,SYM,SYM, 65,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 44, 36, 62, 47, 12, 54, 37, 39, 38, 30, 52, 31, 60, 32, 66, 59, /* CX */ + 67, 58, 50, 33, 35, 53, 22,SYM, 68, 69, 41, 70, 34, 71, 72, 56, /* DX */ + 44, 36, 62, 47, 12, 54, 37, 39, 38, 30, 52, 31, 60, 32, 73, 59, /* EX */ + 74, 58, 50, 33, 35, 53, 22,SYM, 75, 76, 41, 77, 34, 78, 79, 80, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const unsigned char Windows_1252_CharToOrderMap[] = +static const unsigned char Iso_8859_4_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 19, 21, 18, 4, 23, 20, 14, 1, 15, 9, 6, 12, 2, 7, /* 4X */ + SYM, 0, 19, 21, 18, 4, 23, 20, 15, 1, 14, 9, 6, 11, 2, 7, /* 4X */ 16, 29, 10, 5, 3, 8, 13, 24, 26, 17, 25,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 19, 21, 18, 4, 23, 20, 14, 1, 15, 9, 6, 12, 2, 7, /* 6X */ + SYM, 0, 19, 21, 18, 4, 23, 20, 15, 1, 14, 9, 6, 11, 2, 7, /* 6X */ 16, 29, 10, 5, 3, 8, 13, 24, 26, 17, 25,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM,ILL,SYM, 84,SYM,SYM,SYM,SYM,SYM,SYM, 27,SYM, 85,ILL, 28,ILL, /* 8X */ - ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 27,SYM, 86,ILL, 28, 87, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 88,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 49, 35, 89, 46, 11, 56, 39, 37, 40, 30, 51, 31, 90, 36, 91, 57, /* CX */ - 92, 58, 52, 33, 34, 59, 22,SYM, 93, 94, 38, 95, 32, 96, 97, 55, /* DX */ - 49, 35, 98, 46, 11, 56, 39, 37, 40, 30, 51, 31, 99, 36,100, 57, /* EX */ - 101, 58, 52, 33, 34, 59, 22,SYM,102,103, 38,104, 32,105,106,107, /* FX */ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ + SYM, 81, 82, 48,SYM, 83, 84,SYM,SYM, 28, 85, 86, 87,SYM, 27,SYM, /* AX */ + SYM, 88,SYM, 48,SYM, 89, 90,SYM,SYM, 28, 91, 92, 93, 42, 27, 42, /* BX */ + 63, 36, 62, 47, 12, 54, 37, 94, 46, 30, 95, 31, 96, 32, 97, 98, /* CX */ + 99, 64,100,101, 35, 53, 22,SYM,102,103, 41,104, 34,105, 55, 56, /* DX */ + 63, 36, 62, 47, 12, 54, 37,106, 46, 30,107, 31,108, 32,109,110, /* EX */ + 111, 64,112,113, 35, 53, 22,SYM,114,115, 41,116, 34,117, 55,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const unsigned char Iso_8859_4_CharToOrderMap[] = +static const unsigned char Iso_8859_9_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 19, 21, 18, 4, 23, 20, 14, 1, 15, 9, 6, 12, 2, 7, /* 4X */ + SYM, 0, 19, 21, 18, 4, 23, 20, 15, 1, 14, 9, 6, 11, 2, 7, /* 4X */ 16, 29, 10, 5, 3, 8, 13, 24, 26, 17, 25,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 19, 21, 18, 4, 23, 20, 14, 1, 15, 9, 6, 12, 2, 7, /* 6X */ + SYM, 0, 19, 21, 18, 4, 23, 20, 15, 1, 14, 9, 6, 11, 2, 7, /* 6X */ 16, 29, 10, 5, 3, 8, 13, 24, 26, 17, 25,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,108,109, 47,SYM,110,111,SYM,SYM, 27,112,113,114,SYM, 28,SYM, /* AX */ - SYM,115,SYM, 47,SYM,116,117,SYM,SYM, 27,118,119,120, 45, 28, 45, /* BX */ - 53, 35,121, 46, 11, 56, 39,122, 43, 30,123, 31,124, 36,125,126, /* CX */ - 127, 54,128,129, 34, 59, 22,SYM,130,131, 38,132, 32,133,134, 55, /* DX */ - 53, 35,135, 46, 11, 56, 39,136, 43, 30,137, 31,138, 36,139,140, /* EX */ - 141, 54,142,143, 34, 59, 22,SYM,144,145, 38,146, 32,147,148,SYM, /* FX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ + SYM,SYM,SYM,SYM,SYM,118,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 44, 36, 62, 47, 12, 54, 37, 39, 38, 30, 52, 31, 60, 32,119, 59, /* CX */ + 51, 58, 50, 33, 35, 53, 22,SYM,120,121, 41,122, 34, 49, 43, 56, /* DX */ + 44, 36, 62, 47, 12, 54, 37, 39, 38, 30, 52, 31, 60, 32,123, 59, /* EX */ + 51, 58, 50, 33, 35, 53, 22,SYM,124,125, 41,126, 34, 45, 43,127, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -130,136 +131,149 @@ static const unsigned char Iso_8859_13_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 19, 21, 18, 4, 23, 20, 14, 1, 15, 9, 6, 12, 2, 7, /* 4X */ + SYM, 0, 19, 21, 18, 4, 23, 20, 15, 1, 14, 9, 6, 11, 2, 7, /* 4X */ 16, 29, 10, 5, 3, 8, 13, 24, 26, 17, 25,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 19, 21, 18, 4, 23, 20, 14, 1, 15, 9, 6, 12, 2, 7, /* 6X */ + SYM, 0, 19, 21, 18, 4, 23, 20, 15, 1, 14, 9, 6, 11, 2, 7, /* 6X */ 16, 29, 10, 5, 3, 8, 13, 24, 26, 17, 25,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,149,SYM, 47,SYM,SYM,SYM,SYM, 39, /* AX */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,150,SYM, 47,SYM,SYM,SYM,SYM, 39, /* BX */ - 151,152, 53, 41, 11, 56,153,154, 43, 30,155,156,157,158,159,160, /* CX */ - 27,161, 54, 33,162, 59, 22,SYM,163,164,165,166, 32, 60, 28, 55, /* DX */ - 167,168, 53, 41, 11, 56,169,170, 43, 30,171,172,173,174,175,176, /* EX */ - 27,177, 54, 33,178, 59, 22,SYM,179,180,181,182, 32, 60, 28,SYM, /* FX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,128,SYM, 48,SYM,SYM,SYM,SYM, 37, /* AX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,129,SYM, 48,SYM,SYM,SYM,SYM, 37, /* BX */ + 130,131, 63, 40, 12, 54,132,133, 46, 30, 61,134,135,136,137,138, /* CX */ + 28,139, 64, 33,140, 53, 22,SYM,141,142,143, 55, 34, 57, 27, 56, /* DX */ + 144,145, 63, 40, 12, 54,146,147, 46, 30, 61,148,149,150,151,152, /* EX */ + 28,153, 64, 33,154, 53, 22,SYM,155,156,157, 55, 34, 57, 27,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const unsigned char Iso_8859_9_CharToOrderMap[] = +static const unsigned char Iso_8859_15_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 19, 21, 18, 4, 23, 20, 14, 1, 15, 9, 6, 12, 2, 7, /* 4X */ + SYM, 0, 19, 21, 18, 4, 23, 20, 15, 1, 14, 9, 6, 11, 2, 7, /* 4X */ 16, 29, 10, 5, 3, 8, 13, 24, 26, 17, 25,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 19, 21, 18, 4, 23, 20, 14, 1, 15, 9, 6, 12, 2, 7, /* 6X */ + SYM, 0, 19, 21, 18, 4, 23, 20, 15, 1, 14, 9, 6, 11, 2, 7, /* 6X */ 16, 29, 10, 5, 3, 8, 13, 24, 26, 17, 25,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM,183,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 49, 35,184, 46, 11, 56, 39, 37, 40, 30, 51, 31,185, 36,186, 57, /* CX */ - 50, 58, 52, 33, 34, 59, 22,SYM,187,188, 38,189, 32, 48, 42, 55, /* DX */ - 49, 35,190, 46, 11, 56, 39, 37, 40, 30, 51, 31,191, 36,192, 57, /* EX */ - 50, 58, 52, 33, 34, 59, 22,SYM,193,194, 38,195, 32, 44, 42,196, /* FX */ + SYM,SYM,SYM,SYM,SYM,SYM, 28,SYM, 28,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ + SYM,SYM,SYM,SYM, 27,158,SYM,SYM, 27,SYM,SYM,SYM,159,160,161,SYM, /* BX */ + 44, 36, 62, 47, 12, 54, 37, 39, 38, 30, 52, 31, 60, 32,162, 59, /* CX */ + 163, 58, 50, 33, 35, 53, 22,SYM,164,165, 41,166, 34,167,168, 56, /* DX */ + 44, 36, 62, 47, 12, 54, 37, 39, 38, 30, 52, 31, 60, 32,169, 59, /* EX */ + 170, 58, 50, 33, 35, 53, 22,SYM,171,172, 41,173, 34,174,175,176, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const unsigned char Iso_8859_1_CharToOrderMap[] = +static const unsigned char Windows_1252_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 19, 21, 18, 4, 23, 20, 14, 1, 15, 9, 6, 12, 2, 7, /* 4X */ + SYM, 0, 19, 21, 18, 4, 23, 20, 15, 1, 14, 9, 6, 11, 2, 7, /* 4X */ 16, 29, 10, 5, 3, 8, 13, 24, 26, 17, 25,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 19, 21, 18, 4, 23, 20, 14, 1, 15, 9, 6, 12, 2, 7, /* 6X */ + SYM, 0, 19, 21, 18, 4, 23, 20, 15, 1, 14, 9, 6, 11, 2, 7, /* 6X */ 16, 29, 10, 5, 3, 8, 13, 24, 26, 17, 25,SYM,SYM,SYM,SYM,CTR, /* 7X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ + SYM,ILL,SYM,177,SYM,SYM,SYM,SYM,SYM,SYM, 28,SYM,178,ILL, 27,ILL, /* 8X */ + ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 28,SYM,179,ILL, 27,180, /* 9X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM,197,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 49, 35,198, 46, 11, 56, 39, 37, 40, 30, 51, 31,199, 36,200, 57, /* CX */ - 201, 58, 52, 33, 34, 59, 22,SYM,202,203, 38,204, 32,205,206, 55, /* DX */ - 49, 35,207, 46, 11, 56, 39, 37, 40, 30, 51, 31,208, 36,209, 57, /* EX */ - 210, 58, 52, 33, 34, 59, 22,SYM,211,212, 38,213, 32,214,215,216, /* FX */ + SYM,SYM,SYM,SYM,SYM,181,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 44, 36, 62, 47, 12, 54, 37, 39, 38, 30, 52, 31, 60, 32,182, 59, /* CX */ + 183, 58, 50, 33, 35, 53, 22,SYM,184,185, 41,186, 34,187,188, 56, /* DX */ + 44, 36, 62, 47, 12, 54, 37, 39, 38, 30, 52, 31, 60, 32,189, 59, /* EX */ + 190, 58, 50, 33, 35, 53, 22,SYM,191,192, 41,193, 34,194,195,196, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ +static const int Unicode_Char_size = 60; +static const unsigned int Unicode_CharOrder[] = +{ + 65, 0, 66, 19, 67, 21, 68, 18, 69, 4, 70, 23, 71, 20, 72, 15, + 73, 1, 74, 14, 75, 9, 76, 6, 77, 11, 78, 2, 79, 7, 80, 16, + 81, 29, 82, 10, 83, 5, 84, 3, 85, 8, 86, 13, 87, 24, 88, 26, + 89, 17, 90, 25, 97, 0, 98, 19, 99, 21, 100, 18, 101, 4,102, 23, + 103, 20, 104, 15, 105, 1, 106, 14, 107, 9, 108, 6, 109, 11,110, 2, + 111, 7, 112, 16, 113, 29, 114, 10, 115, 5, 116, 3, 117, 8,118, 13, + 119, 24, 120, 26, 121, 17, 122, 25, 196, 12, 214, 22, 228, 12,246, 22, + 352, 28, 353, 28, 381, 27, 382, 27, +}; + /* Model Table: - * Total sequences: 919 - * First 512 sequences: 0.9985378147555799 - * Next 512 sequences (512-1024): 0.0014621852444200612 - * Rest: 3.881443777498106e-17 + * Total sequences: 940 + * First 512 sequences: 0.9985812031154878 + * Next 512 sequences (512-1024): 0.0014187968845121583 + * Rest: 2.7321894746634712e-17 * Negative sequences: TODO */ static const PRUint8 FinnishLangModel[] = { - 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,0,2, + 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,2,2, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,2,3,3,0,3,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,2,0,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,3,3,3,3,2,3,3,2,0,3,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,2,2,2, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,0,0,0,2, - 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,0,0, - 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,2,0,2, - 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,0,3,3,2,3,2,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,0,2,3,2,3,2,2,0,2,0,0,0, + 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,0,2,0, + 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,0,3,3,3,3,0,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,0,3,3,2,3,0,2,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,0,2,3,2,3,2,2,0,2,0,0,0, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2, - 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,0,0,2,2,0,0,0,0,0,0,0, 3,3,2,2,3,3,2,3,3,2,3,3,3,2,2,2,3,3,2,3,3,3,3,2,2,2,2,0,0,0, - 3,3,2,2,3,2,2,3,3,3,2,3,0,2,2,2,2,3,2,2,0,0,2,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,0,3,3,2,2,0,0,0,0,2, - 3,3,3,2,3,2,2,3,3,2,2,3,2,0,2,0,2,3,0,2,0,0,3,2,0,0,0,0,0,0, - 3,3,2,3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,2,0,2,2,3,2,3,0,0,2,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,2,3,2,2,2,0,0, - 3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,0,0,3,2, - 3,3,3,3,3,3,3,3,3,3,3,2,2,0,3,2,0,3,3,3,2,3,2,0,2,2,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,3,3,3,3,3,3,3,2,3,2,0,0,0,0, - 3,3,2,3,3,3,3,3,3,3,3,0,2,0,3,0,2,3,3,2,2,3,0,0,0,2,0,0,0,2, - 2,3,3,3,2,3,3,2,0,3,3,3,3,3,3,3,3,3,3,2,0,0,3,2,0,0,0,0,0,0, - 3,3,2,3,3,3,3,3,3,2,3,2,0,2,0,2,2,3,0,2,2,2,0,3,0,2,0,0,0,0, - 3,3,3,2,3,3,2,3,2,2,3,0,2,0,3,0,0,2,2,2,2,2,0,2,2,0,0,0,0,0, - 3,3,3,2,3,2,2,3,2,2,2,2,2,2,2,0,2,3,2,2,2,0,0,2,2,3,0,0,0,0, - 3,3,0,2,2,2,3,2,0,0,0,0,2,2,3,0,2,0,0,2,0,2,0,3,2,0,2,0,0,0, - 3,3,2,2,3,0,0,2,2,2,2,0,2,2,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,0,2,2,2,0,0,0,0,0,0, + 3,3,2,2,3,2,2,3,3,2,2,0,3,2,2,2,2,3,2,0,0,0,2,0,0,0,0,0,0,0, + 3,3,2,0,3,2,2,3,3,2,2,2,3,2,0,2,2,3,0,2,0,2,3,2,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,2,0,3,3,2,2,0,0,0,0,2, + 3,3,2,3,3,3,3,3,3,2,3,3,3,2,0,3,3,3,3,2,2,2,3,2,3,0,0,0,2,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,2,3,2,2,0,2,0, + 3,3,3,2,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,0,3,0,2, + 3,3,3,3,3,3,3,3,3,3,3,2,2,0,2,3,0,3,3,3,2,3,2,0,2,2,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,2,3,3,3,3,3,3,2,3,2,0,0,0,2, + 3,3,2,3,3,3,3,3,3,3,3,2,0,0,0,3,2,3,3,2,3,3,0,0,2,2,0,0,0,2, + 3,3,3,3,2,3,3,2,0,3,3,3,3,3,3,3,3,3,3,0,2,0,3,2,0,0,0,0,0,0, + 3,3,2,3,3,3,3,3,3,2,3,2,2,2,2,0,2,3,0,2,2,3,0,3,2,2,0,0,0,0, + 3,3,3,2,3,3,2,3,2,2,3,2,0,0,0,3,0,2,2,2,0,2,0,2,0,0,0,0,0,0, + 3,3,3,2,3,0,2,3,2,2,2,2,2,2,0,2,2,3,2,2,2,0,0,2,2,3,0,0,0,0, + 3,3,0,2,2,2,3,2,2,0,0,2,0,2,0,2,2,0,0,2,0,2,0,3,2,0,2,0,0,0, 3,2,0,0,2,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,2,2,3,0,0,2,2,2,2,2,0,2,2,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0, 2,2,0,0,0,2,0,0,3,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, }; -const SequenceModel Iso_8859_15FinnishModel = +const SequenceModel Iso_8859_1FinnishModel = { - Iso_8859_15_CharToOrderMap, + Iso_8859_1_CharToOrderMap, FinnishLangModel, 30, - (float)0.9985378147555799, + (float)0.9985812031154878, PR_TRUE, - "ISO-8859-15", + "ISO-8859-1", "fi" }; -const SequenceModel Windows_1252FinnishModel = +const SequenceModel Iso_8859_4FinnishModel = { - Windows_1252_CharToOrderMap, + Iso_8859_4_CharToOrderMap, FinnishLangModel, 30, - (float)0.9985378147555799, + (float)0.9985812031154878, PR_TRUE, - "WINDOWS-1252", + "ISO-8859-4", "fi" }; -const SequenceModel Iso_8859_4FinnishModel = +const SequenceModel Iso_8859_9FinnishModel = { - Iso_8859_4_CharToOrderMap, + Iso_8859_9_CharToOrderMap, FinnishLangModel, 30, - (float)0.9985378147555799, + (float)0.9985812031154878, PR_TRUE, - "ISO-8859-4", + "ISO-8859-9", "fi" }; @@ -268,30 +282,40 @@ const SequenceModel Iso_8859_13FinnishModel = Iso_8859_13_CharToOrderMap, FinnishLangModel, 30, - (float)0.9985378147555799, + (float)0.9985812031154878, PR_TRUE, "ISO-8859-13", "fi" }; -const SequenceModel Iso_8859_9FinnishModel = +const SequenceModel Iso_8859_15FinnishModel = { - Iso_8859_9_CharToOrderMap, + Iso_8859_15_CharToOrderMap, FinnishLangModel, 30, - (float)0.9985378147555799, + (float)0.9985812031154878, PR_TRUE, - "ISO-8859-9", + "ISO-8859-15", "fi" }; -const SequenceModel Iso_8859_1FinnishModel = +const SequenceModel Windows_1252FinnishModel = { - Iso_8859_1_CharToOrderMap, + Windows_1252_CharToOrderMap, FinnishLangModel, 30, - (float)0.9985378147555799, + (float)0.9985812031154878, PR_TRUE, - "ISO-8859-1", + "WINDOWS-1252", "fi" }; + +const LanguageModel FinnishModel = +{ + "fi", + Unicode_CharOrder, + 60, + FinnishLangModel, + 30, + (float)0.9985812031154878, +}; diff --git a/src/LangModels/LangGreekModel.cpp b/src/LangModels/LangGreekModel.cpp index 28951e6..4038450 100644 --- a/src/LangModels/LangGreekModel.cpp +++ b/src/LangModels/LangGreekModel.cpp @@ -36,12 +36,13 @@ * ***** END LICENSE BLOCK ***** */ #include "../nsSBCharSetProber.h" +#include "../nsLanguageDetector.h" /********* Language model for: Greek *********/ /** * Generated by BuildLangModel.py - * On: 2016-05-25 15:21:50.073117 + * On: 2021-03-16 18:58:31.005768 **/ /* Character Mapping Table: @@ -61,171 +62,200 @@ * ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1 * even though they are both used for French. Same for the euro sign. */ -static const unsigned char Windows_1253_CharToOrderMap[] = +static const unsigned char Iso_8859_7_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 32, 46, 41, 40, 30, 52, 48, 42, 33, 56, 49, 39, 44, 36, 34, /* 4X */ - 47, 59, 35, 38, 37, 43, 54, 50, 58, 53, 57,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 32, 46, 41, 40, 30, 52, 48, 42, 33, 56, 49, 39, 44, 36, 34, /* 6X */ - 47, 59, 35, 38, 37, 43, 54, 50, 58, 53, 57,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM,ILL,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,ILL,SYM,ILL,ILL,ILL,ILL, /* 8X */ - ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,ILL,SYM,ILL,ILL,ILL,ILL, /* 9X */ - SYM,SYM, 17,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 62,SYM,SYM, 19, 22, 15,SYM, 16,SYM, 24, 28, /* BX */ - 55, 0, 25, 18, 20, 5, 29, 10, 26, 3, 8, 14, 13, 4, 31, 1, /* CX */ - 11, 6,ILL, 7, 2, 12, 27, 23, 45, 21, 51, 60, 17, 19, 22, 15, /* DX */ - 61, 0, 25, 18, 20, 5, 29, 10, 26, 3, 8, 14, 13, 4, 31, 1, /* EX */ - 11, 6, 9, 7, 2, 12, 27, 23, 45, 21, 51, 60, 16, 24, 28,ILL, /* FX */ + SYM, 33, 51, 41, 40, 30, 53, 48, 42, 32, 56, 49, 39, 44, 34, 36, /* 4X */ + 47, 60, 35, 37, 38, 43, 55, 52, 58, 54, 57,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 33, 51, 41, 40, 30, 53, 48, 42, 32, 56, 49, 39, 44, 34, 36, /* 6X */ + 47, 60, 35, 37, 38, 43, 55, 52, 58, 54, 57,SYM,SYM,SYM,SYM,CTR, /* 7X */ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, /* AX */ + SYM,SYM,SYM,SYM,SYM,SYM, 18,SYM, 19, 23, 15,SYM, 16,SYM, 25, 28, /* BX */ + 45, 0, 21, 17, 20, 5, 29, 11, 27, 3, 8, 12, 14, 4, 31, 1, /* CX */ + 13, 6,ILL, 9, 2, 10, 26, 24, 46, 22, 50, 59, 18, 19, 23, 15, /* DX */ + 61, 0, 21, 17, 20, 5, 29, 11, 27, 3, 8, 12, 14, 4, 31, 1, /* EX */ + 13, 6, 7, 9, 2, 10, 26, 24, 46, 22, 50, 59, 16, 25, 28,ILL, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const unsigned char Iso_8859_7_CharToOrderMap[] = +static const unsigned char Windows_1253_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 32, 46, 41, 40, 30, 52, 48, 42, 33, 56, 49, 39, 44, 36, 34, /* 4X */ - 47, 59, 35, 38, 37, 43, 54, 50, 58, 53, 57,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 32, 46, 41, 40, 30, 52, 48, 42, 33, 56, 49, 39, 44, 36, 34, /* 6X */ - 47, 59, 35, 38, 37, 43, 54, 50, 58, 53, 57,SYM,SYM,SYM,SYM,CTR, /* 7X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM,SYM, 17,SYM, 19, 22, 15,SYM, 16,SYM, 24, 28, /* BX */ - 55, 0, 25, 18, 20, 5, 29, 10, 26, 3, 8, 14, 13, 4, 31, 1, /* CX */ - 11, 6,ILL, 7, 2, 12, 27, 23, 45, 21, 51, 60, 17, 19, 22, 15, /* DX */ - 61, 0, 25, 18, 20, 5, 29, 10, 26, 3, 8, 14, 13, 4, 31, 1, /* EX */ - 11, 6, 9, 7, 2, 12, 27, 23, 45, 21, 51, 60, 16, 24, 28,ILL, /* FX */ + SYM, 33, 51, 41, 40, 30, 53, 48, 42, 32, 56, 49, 39, 44, 34, 36, /* 4X */ + 47, 60, 35, 37, 38, 43, 55, 52, 58, 54, 57,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 33, 51, 41, 40, 30, 53, 48, 42, 32, 56, 49, 39, 44, 34, 36, /* 6X */ + 47, 60, 35, 37, 38, 43, 55, 52, 58, 54, 57,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM,ILL,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,ILL,SYM,ILL,ILL,ILL,ILL, /* 8X */ + ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,ILL,SYM,ILL,ILL,ILL,ILL, /* 9X */ + SYM,SYM, 18,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,SYM,SYM,SYM,SYM, /* AX */ + SYM,SYM,SYM,SYM,SYM, 62,SYM,SYM, 19, 23, 15,SYM, 16,SYM, 25, 28, /* BX */ + 45, 0, 21, 17, 20, 5, 29, 11, 27, 3, 8, 12, 14, 4, 31, 1, /* CX */ + 13, 6,ILL, 9, 2, 10, 26, 24, 46, 22, 50, 59, 18, 19, 23, 15, /* DX */ + 61, 0, 21, 17, 20, 5, 29, 11, 27, 3, 8, 12, 14, 4, 31, 1, /* EX */ + 13, 6, 7, 9, 2, 10, 26, 24, 46, 22, 50, 59, 16, 25, 28,ILL, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ +static const int Unicode_Char_size = 93; +static const unsigned int Unicode_CharOrder[] = +{ + 65, 33, 67, 41, 68, 40, 69, 30, 72, 42, 73, 32, 76, 39, 77, 44, + 78, 34, 79, 36, 82, 35, 83, 37, 84, 38, 85, 43, 97, 33, 99, 41, + 100, 40, 101, 30, 104, 42, 105, 32, 108, 39, 109, 44, 110, 34,111, 36, + 114, 35, 115, 37, 116, 38, 117, 43, 902, 18, 904, 19, 905, 23,906, 15, + 908, 16, 910, 25, 911, 28, 912, 45, 913, 0, 914, 21, 915, 17,916, 20, + 917, 5, 918, 29, 919, 11, 920, 27, 921, 3, 922, 8, 923, 12,924, 14, + 925, 4, 926, 31, 927, 1, 928, 13, 929, 6, 931, 7, 931, 9,932, 2, + 933, 10, 934, 26, 935, 24, 936, 46, 937, 22, 940, 18, 941, 19,942, 23, + 943, 15, 945, 0, 946, 21, 947, 17, 948, 20, 949, 5, 950, 29,951, 11, + 952, 27, 953, 3, 954, 8, 955, 12, 956, 14, 957, 4, 958, 31,959, 1, + 960, 13, 961, 6, 962, 7, 963, 9, 964, 2, 965, 10, 966, 26,967, 24, + 968, 46, 969, 22, 972, 16, 973, 25, 974, 28, +}; + /* Model Table: - * Total sequences: 1579 - * First 512 sequences: 0.958419074626211 - * Next 512 sequences (512-1024): 0.03968891876305471 - * Rest: 0.0018920066107342773 + * Total sequences: 1390 + * First 512 sequences: 0.9624941725288916 + * Next 512 sequences (512-1024): 0.035897222027766316 + * Rest: 0.0016086054433421051 * Negative sequences: TODO */ static const PRUint8 GreekLangModel[] = { - 1,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,2,3,2,3,1,2, - 3,3,3,3,3,1,3,0,3,0,0,0,0,0,0,1,0,0,1,0,0,0,2, - 2,2,3,3,3,3,3,3,3,3,2,3,3,3,3,3,1,2,3,2,3,1,2, - 3,3,3,3,3,2,2,0,2,0,0,0,0,0,0,0,0,1,0,0,1,0,2, - 3,3,2,3,2,3,3,3,2,3,3,1,3,2,2,3,3,3,2,3,0,3,3, - 2,2,2,2,2,3,3,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,2,3,3,3,3,3,3,3,3,1,3,3,1,3,3,3,3,3,3,2, - 3,1,3,3,2,3,3,0,2,0,0,1,0,0,0,1,0,0,0,0,0,0,2, - 3,3,3,3,3,3,2,3,2,2,3,1,2,2,2,3,3,3,3,3,3,3,3, - 2,2,1,3,2,3,2,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, - 2,3,3,3,3,2,3,3,3,3,2,3,3,3,3,3,2,2,3,1,3,3,1, - 3,3,3,3,3,2,2,0,3,0,0,0,0,0,0,1,0,0,0,0,0,0,2, - 3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,3,3,3,3, - 3,3,2,3,2,3,2,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,1, - 3,3,3,3,2,3,2,3,3,0,3,3,3,3,2,3,3,3,2,3,2,3,3, - 3,3,2,2,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,2,3,3,2,3,2,3,2,3,2,3,3,3,3,1,3,3,3,3, - 2,3,2,2,2,3,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0, - 1,1,0,1,1,1,0,1,1,0,2,1,0,1,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0, - 1,1,3,0,3,2,3,3,3,3,0,3,0,3,3,1,0,0,3,1,2,0,0, - 2,1,1,3,2,0,0,0,2,0,0,1,0,0,0,0,0,0,1,0,0,0,2, - 3,3,3,3,2,3,3,2,1,1,3,2,3,1,3,3,3,3,1,3,0,3,3, - 1,2,1,1,1,2,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0, - 3,2,3,2,3,2,3,3,3,3,2,3,0,3,3,2,2,3,3,2,3,1,2, - 3,0,3,3,2,1,3,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,2, - 3,3,1,3,2,3,1,2,1,2,3,3,2,3,1,3,3,3,1,3,1,3,3, - 1,2,3,0,3,2,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,2, - 3,3,3,3,2,3,1,2,2,2,3,2,3,3,3,3,3,3,2,3,2,3,3, - 2,3,2,2,2,3,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1, - 3,3,3,1,3,3,3,3,3,3,2,3,0,3,3,0,0,0,3,0,3,3,0, - 3,0,2,3,2,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2, - 2,2,3,2,3,3,3,3,3,3,2,3,1,3,3,0,0,0,3,0,3,1,0, - 3,1,2,2,3,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2, - 2,2,3,3,3,2,3,3,3,3,2,3,1,3,3,0,0,0,3,0,3,1,0, - 3,0,3,3,3,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2, - 3,3,0,3,3,3,3,0,3,0,3,0,2,3,3,3,3,3,3,3,2,3,3, - 2,2,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,2,3,3,3,3,2,3,1,3,3,0,0,0,3,0,3,3,0, - 3,0,3,3,3,0,2,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2, - 3,3,1,3,2,3,3,1,0,0,3,0,3,1,0,3,3,3,0,3,0,3,3, - 0,3,0,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,1,3,2,3,1,3,3,2,3,1,3,1,3,2,2,1,2,3,1,2,0,2, - 2,0,3,3,2,1,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,3,1,3,1,3,3,3,3,1,2,0,3,3,0,0,0,2,0,2,1,0, - 2,0,1,3,2,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2, - 3,3,3,3,3,3,3,1,0,1,3,1,2,2,2,3,2,3,0,3,0,3,3, - 0,2,1,3,1,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,3,3,2,3,3,3,3,2,3,2,3,0,3,3,0,0,0,3,0,2,1,0, - 2,0,2,3,2,0,2,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,2, - 3,3,1,3,2,3,3,1,1,1,2,1,2,0,3,3,3,3,2,3,2,2,2, - 0,2,2,0,0,2,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,0,3,3,3,3,1,1,0,3,0,3,3,3,2,2,3,1,3,0,2,3, - 0,2,0,0,1,3,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,2,3,1,3,3,3,2,0,3,1,3,1,2,3,3,3,2,3,0,3,3, - 0,2,0,2,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,1,3,2,3,0,3,3,2,3,2,3,0,3,2,0,0,0,1,0,2,1,0, - 1,0,2,2,1,0,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,1,3,1,3,1,1,1,0,2,0,2,2,1,2,2,2,1,2,0,3,2, - 0,2,1,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,1,0,0,1,0,1,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,2,1,2,2,2,3,3,2,3,2,2,2,2,2,2,0, - 3,3,1,3,1,3,0,0,1,0,3,1,2,1,1,2,2,3,1,2,0,2,2, - 0,3,0,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,1,0,1,1,0,0,0,1,0,0,1,1,1,1,0,0,0,1,0,0,0,0, - 0,0,0,1,1,0,0,2,0,2,2,1,3,3,3,2,3,2,2,2,2,2,0, - 0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, - 0,0,1,0,0,0,0,2,0,3,2,3,2,3,3,3,2,2,3,1,2,2,0, - 0,0,1,0,1,0,0,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,0, - 0,1,0,1,0,0,0,2,0,2,2,2,3,3,2,2,2,2,2,2,2,2,0, - 0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,1,0,0,0,3,0,3,3,3,2,2,2,2,2,2,2,1,2,2,0, - 0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0, - 0,0,0,0,0,0,0,3,0,3,2,2,1,2,2,2,2,3,2,1,2,1,0, - 1,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0, - 0,0,0,0,0,0,1,3,0,3,3,3,2,1,2,2,2,1,1,3,2,2,0, - 0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,2,0,2,2,2,1,1,3,2,2,1,2,2,2,2,0, + 1,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,2,2,3,3,1, + 2,3,3,3,3,1,3,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,3,2, + 2,2,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,2,2,3,3,1, + 2,3,3,3,3,1,2,0,3,0,0,0,0,0,0,0,0,0,0,0,1,0,2,2, + 3,3,2,3,2,3,3,3,2,3,3,3,3,1,2,3,3,2,3,3,0,2,3, + 3,2,2,2,1,3,3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, + 3,3,3,2,3,3,3,3,3,3,1,2,3,3,3,1,3,3,3,3,3,3,3, + 2,3,0,2,3,3,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2, + 3,3,3,3,3,3,2,3,2,3,2,3,2,1,2,3,3,3,3,3,3,2,3, + 3,2,2,2,3,3,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 2,3,3,3,3,2,3,3,3,3,3,1,3,3,3,3,2,3,2,1,3,3,3, + 1,3,3,3,3,2,2,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2, + 3,3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3, + 3,3,3,2,3,3,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, + 0,0,0,1,0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0, + 3,3,3,3,2,3,3,2,3,2,3,3,3,2,2,3,3,2,3,3,3,2,3, + 3,2,3,2,2,3,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0, + 3,3,3,3,2,3,2,0,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3, + 3,3,3,3,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,2,3,2,3,2,3,3,3,3,0,2,3,3,3,3,2,3,3,2,3,3,1, + 2,3,0,2,3,2,2,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2, + 1,1,3,1,3,1,3,3,3,3,0,0,3,2,3,0,1,3,0,1,2,1,0, + 0,2,1,2,3,1,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2, + 3,3,3,3,3,3,2,2,2,2,3,3,3,2,3,3,3,3,3,3,2,3,3, + 3,2,3,3,2,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,2,3,3,2,2,1,3,3,3,2,1,3,3,1,3,3,0,1,3, + 3,1,2,1,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,1,3,2,3,1,2,1,2,2,3,2,3,3,3,3,1,3,3,0,3,3, + 3,1,2,3,0,2,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1, + 3,3,3,1,3,3,3,3,3,3,1,2,3,3,3,0,0,3,0,0,3,2,3, + 0,3,0,2,3,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2, + 2,2,3,2,3,3,3,3,3,3,2,2,3,3,3,0,0,3,0,0,3,2,1, + 0,2,0,2,2,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2, + 3,3,0,3,3,3,3,0,3,0,2,3,3,0,3,3,3,3,3,3,2,0,3, + 3,2,2,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 2,3,3,3,3,2,3,3,3,3,1,2,3,3,3,0,0,3,0,0,3,3,1, + 0,3,0,3,3,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2, + 3,3,3,3,3,2,3,3,3,3,2,1,3,3,3,0,0,3,0,0,3,2,2, + 0,3,0,3,2,0,2,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2, + 3,3,0,3,1,3,3,0,0,2,3,3,0,0,0,3,3,0,3,3,0,0,3, + 3,0,3,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,1,3,1,3,3,1,1,1,2,2,3,0,0,3,3,2,3,3,2,2,2, + 2,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 2,1,3,2,3,1,3,3,2,3,0,1,2,3,3,1,0,3,2,1,2,3,1, + 2,2,0,2,3,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,0,3,1,3,1,3,3,3,3,0,0,2,2,3,0,0,2,0,0,2,1,1, + 0,2,0,2,3,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2, + 3,3,3,3,3,3,3,1,0,2,2,3,2,1,2,3,3,1,3,3,0,1,3, + 3,0,2,1,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 2,3,3,2,3,3,3,3,3,3,0,2,3,3,3,0,0,3,0,0,2,2,1, + 0,2,0,2,3,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2, + 3,3,2,3,2,3,3,1,1,2,3,3,2,0,1,3,3,2,2,3,0,1,3, + 3,0,2,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,0,2,3,3,3,1,1,0,3,3,3,0,3,2,2,0,3,3,0,0,3, + 3,0,2,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,1,3,2,3,0,3,3,2,3,0,2,2,3,2,0,0,1,0,0,2,2,1, + 0,1,0,1,2,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,0,3,2,3,1,0,0,1,2,3,1,0,2,3,3,1,2,3,0,2,3, + 2,0,1,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,2,1,2,2,3,3,2,3,2,2,3,2,2,2,2,0,0, + 3,3,1,3,0,3,0,0,0,0,2,3,1,1,1,2,2,1,2,2,0,0,2, + 2,0,3,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,2,0,2,3,3,2,2,3,3,2,2,3,1,2,2,0,0, + 0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,2,0,2,1,3,3,1,2,3,3,2,2,2,2,2,0,0, + 1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,2,0,2,2,2,2,2,2,2,1,3,2,1,2,1,0,0, + 0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,3,0,3,3,2,2,2,2,2,2,2,2,2,2,2,0,0, + 0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,1,0,0,0,0,2,0,2,2,3,3,2,2,2,2,2,2,2,2,2,0,0, + 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,2,0,2,2,1,1,2,2,3,2,2,2,2,2,2,0,0, + 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,3,0,3,2,0,2,2,2,2,2,1,1,3,2,1,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,3,0,3,3,2,1,1,2,2,2,2,1,1,2,2,0, + 0,0,0,0,0,0,0,3,0,3,2,1,1,3,2,2,2,2,1,1,2,2,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,3,0,2,2,2,2,1,1,2,2,1,2,1,2,1,0, + 0,0,0,0,0,0,0,3,0,3,2,1,2,2,2,1,2,2,1,1,2,1,0,0, 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,2,0,2,2,2,2,1,2,1,2,2,2,3,2,1,0, - 0,0,0,0,0,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,3,0,2,2,2,2,2,2,1,2,1,1,1,2,2,0, + 0,0,0,0,0,0,0,2,0,2,2,1,2,2,1,2,2,1,1,3,2,1,0,0, + 0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,3,0,2,2,2,2,2,2,2,2,0,0,0,2,2,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,2,0,2,2,1,2,2,2,2,2,2,2,1,1,2,0, - 1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0, - 0,0,0,0,0,0,0,3,0,2,2,2,1,1,1,2,2,1,1,1,2,2,0, - 2,2,0,2,0,3,0,0,0,0,3,0,2,0,0,2,1,1,0,1,0,1,2, - 0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,2,0,2,2,2,2,1,2,2,2,2,2,1,0,2,0,0, + 1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1, + 0,0,0,0,0,0,0,3,0,2,3,1,1,2,2,1,1,0,1,0,2,2,0,0, + 1,3,1,0,2,0,1,1,1,2,0,0,1,0,1,0,0,0,0,0,2,0,0, + 0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 2,2,0,2,0,2,0,0,0,0,2,3,0,0,0,2,1,0,2,1,0,2,1, + 2,0,1,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, }; +const SequenceModel Iso_8859_7GreekModel = +{ + Iso_8859_7_CharToOrderMap, + GreekLangModel, + 47, + (float)0.9624941725288916, + PR_FALSE, + "ISO-8859-7", + "el" +}; + const SequenceModel Windows_1253GreekModel = { Windows_1253_CharToOrderMap, GreekLangModel, - 46, - (float)0.958419074626211, + 47, + (float)0.9624941725288916, PR_FALSE, "WINDOWS-1253", "el" }; -const SequenceModel Iso_8859_7GreekModel = +const LanguageModel GreekModel = { - Iso_8859_7_CharToOrderMap, + "el", + Unicode_CharOrder, + 93, GreekLangModel, - 46, - (float)0.958419074626211, - PR_FALSE, - "ISO-8859-7", - "el" + 47, + (float)0.9624941725288916, }; diff --git a/src/LangModels/LangHungarianModel.cpp b/src/LangModels/LangHungarianModel.cpp index 22f0de6..2bee180 100644 --- a/src/LangModels/LangHungarianModel.cpp +++ b/src/LangModels/LangHungarianModel.cpp @@ -36,12 +36,13 @@ * ***** END LICENSE BLOCK ***** */ #include "../nsSBCharSetProber.h" +#include "../nsLanguageDetector.h" /********* Language model for: Hungarian *********/ /** * Generated by BuildLangModel.py - * On: 2015-12-12 18:02:46.730481 + * On: 2021-03-16 19:23:30.842519 **/ /* Character Mapping Table: @@ -67,18 +68,18 @@ static const unsigned char Iso_8859_2_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 1, 15, 23, 16, 0, 24, 13, 20, 7, 22, 9, 4, 12, 6, 8, /* 4X */ - 21, 34, 5, 3, 2, 19, 17, 32, 33, 18, 10,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 1, 15, 23, 16, 0, 24, 13, 20, 7, 22, 9, 4, 12, 6, 8, /* 6X */ - 21, 34, 5, 3, 2, 19, 17, 32, 33, 18, 10,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 1, 15, 23, 16, 0, 25, 14, 20, 7, 22, 8, 4, 13, 5, 9, /* 4X */ + 21, 34, 6, 3, 2, 19, 18, 32, 33, 17, 10,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 1, 15, 23, 16, 0, 25, 14, 20, 7, 22, 8, 4, 13, 5, 9, /* 6X */ + 21, 34, 6, 3, 2, 19, 18, 32, 33, 17, 10,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM, 55,SYM, 42,SYM, 56, 46,SYM,SYM, 37, 52, 57, 58,SYM, 48, 59, /* AX */ - SYM, 60,SYM, 42,SYM, 61, 46,SYM,SYM, 37, 52, 62, 63,SYM, 48, 64, /* BX */ - 65, 11, 40, 36, 35, 66, 38, 39, 41, 14, 50, 67, 53, 28, 45, 68, /* CX */ - 49, 43, 54, 26, 69, 27, 25,SYM, 44, 70, 30, 31, 29, 47, 51, 71, /* DX */ - 72, 11, 40, 36, 35, 73, 38, 39, 41, 14, 50, 74, 53, 28, 45, 75, /* EX */ - 49, 43, 54, 26, 76, 27, 25,SYM, 44, 77, 30, 31, 29, 47, 51,SYM, /* FX */ + SYM, 55,SYM, 40,SYM, 56, 51,SYM,SYM, 38, 54, 57, 58,SYM, 43, 59, /* AX */ + SYM, 55,SYM, 40,SYM, 60, 51,SYM,SYM, 38, 54, 61, 62,SYM, 43, 63, /* BX */ + 64, 11, 45, 41, 37, 65, 35, 50, 39, 12, 66, 46, 49, 28, 42, 67, /* CX */ + 36, 52, 68, 26, 44, 27, 24,SYM, 48, 69, 30, 31, 29, 47, 70, 53, /* DX */ + 71, 11, 45, 41, 37, 72, 35, 50, 39, 12, 73, 46, 49, 28, 42, 74, /* EX */ + 36, 52, 75, 26, 44, 27, 24,SYM, 48, 76, 30, 31, 29, 47, 77,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -88,63 +89,76 @@ static const unsigned char Windows_1250_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 1, 15, 23, 16, 0, 24, 13, 20, 7, 22, 9, 4, 12, 6, 8, /* 4X */ - 21, 34, 5, 3, 2, 19, 17, 32, 33, 18, 10,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 1, 15, 23, 16, 0, 24, 13, 20, 7, 22, 9, 4, 12, 6, 8, /* 6X */ - 21, 34, 5, 3, 2, 19, 17, 32, 33, 18, 10,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM, 37,SYM, 46, 78, 48, 79, /* 8X */ - ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 37,SYM, 46, 80, 48, 81, /* 9X */ - SYM,SYM,SYM, 42,SYM, 82,SYM,SYM,SYM,SYM, 52,SYM,SYM,SYM,SYM, 83, /* AX */ - SYM,SYM,SYM, 42,SYM,SYM,SYM,SYM,SYM, 84, 52,SYM, 85,SYM, 86, 87, /* BX */ - 88, 11, 40, 36, 35, 89, 38, 39, 41, 14, 50, 90, 53, 28, 45, 91, /* CX */ - 49, 43, 54, 26, 92, 27, 25,SYM, 44, 93, 30, 31, 29, 47, 51, 94, /* DX */ - 95, 11, 40, 36, 35, 96, 38, 39, 41, 14, 50, 97, 53, 28, 45, 98, /* EX */ - 49, 43, 54, 26, 99, 27, 25,SYM, 44,100, 30, 31, 29, 47, 51,SYM, /* FX */ + SYM, 1, 15, 23, 16, 0, 25, 14, 20, 7, 22, 8, 4, 13, 5, 9, /* 4X */ + 21, 34, 6, 3, 2, 19, 18, 32, 33, 17, 10,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 1, 15, 23, 16, 0, 25, 14, 20, 7, 22, 8, 4, 13, 5, 9, /* 6X */ + 21, 34, 6, 3, 2, 19, 18, 32, 33, 17, 10,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM, 38,SYM, 51, 78, 43, 79, /* 8X */ + ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 38,SYM, 51, 80, 43, 81, /* 9X */ + SYM,SYM,SYM, 40,SYM, 55,SYM,SYM,SYM,SYM, 54,SYM,SYM,SYM,SYM, 82, /* AX */ + SYM,SYM,SYM, 40,SYM,SYM,SYM,SYM,SYM, 55, 54,SYM, 83,SYM, 84, 85, /* BX */ + 86, 11, 45, 41, 37, 87, 35, 50, 39, 12, 88, 46, 49, 28, 42, 89, /* CX */ + 36, 52, 90, 26, 44, 27, 24,SYM, 48, 91, 30, 31, 29, 47, 92, 53, /* DX */ + 93, 11, 45, 41, 37, 94, 35, 50, 39, 12, 95, 46, 49, 28, 42, 96, /* EX */ + 36, 52, 97, 26, 44, 27, 24,SYM, 48, 98, 30, 31, 29, 47, 99,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ +static const int Unicode_Char_size = 64; +static const unsigned int Unicode_CharOrder[] = +{ + 65, 1, 66, 15, 67, 23, 68, 16, 69, 0, 70, 25, 71, 14, 72, 20, + 73, 7, 74, 22, 75, 8, 76, 4, 77, 13, 78, 5, 79, 9, 80, 21, + 82, 6, 83, 3, 84, 2, 85, 19, 86, 18, 89, 17, 90, 10, 97, 1, + 98, 15, 99, 23, 100, 16, 101, 0, 102, 25, 103, 14, 104, 20,105, 7, + 106, 22, 107, 8, 108, 4, 109, 13, 110, 5, 111, 9, 112, 21,114, 6, + 115, 3, 116, 2, 117, 19, 118, 18, 121, 17, 122, 10, 193, 11,201, 12, + 205, 28, 211, 26, 214, 24, 218, 30, 220, 29, 225, 11, 233, 12,237, 28, + 243, 26, 246, 24, 250, 30, 252, 29, 336, 27, 337, 27, 368, 31,369, 31, +}; + /* Model Table: - * Total sequences: 1084 - * First 512 sequences: 0.9748272224933486 - * Next 512 sequences (512-1024): 0.024983863604162403 - * Rest: 0.0001889139024889644 + * Total sequences: 1122 + * First 512 sequences: 0.9736098834669349 + * Next 512 sequences (512-1024): 0.026285470450181352 + * Rest: 0.00010464608288375879 * Negative sequences: TODO */ static const PRUint8 HungarianLangModel[] = { - 3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,1,0,2,2,0,0, - 3,2,3,3,3,3,3,3,2,3,3,2,3,3,2,3,3,3,3,3,3,3,3,3,3,0,0,2,2,1,2,1, - 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,2,3,2,2,3,3,3,3,3,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,3,3,3,2,3,2,2,3,3,3,3,3,2, - 3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,3,3,2, - 3,3,3,3,3,2,3,3,3,3,3,3,2,3,3,3,3,3,3,3,2,2,2,3,3,3,2,3,2,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,1,3,3,3,2,3,3,2,3,0,2,2,2,2, - 3,2,3,3,3,3,3,2,2,3,3,2,3,3,0,3,3,3,2,3,3,3,2,3,3,0,2,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,2,3,2,2,3,3,2,2,2,3,2,2,2,2,2,3,3,2,3,3,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,2,2,3,3,3,3,3,2,2, - 1,2,3,3,3,3,3,3,2,3,3,0,3,3,2,3,3,3,2,2,2,3,3,3,2,0,0,0,2,0,0,0, - 3,3,3,2,3,2,2,3,3,2,3,3,3,2,3,3,2,2,2,3,2,3,2,2,2,2,3,2,2,2,2,3, - 3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,2,3,3,3,3,2,3,2,2,3,3,2,3,2,2,2, - 0,1,3,3,3,3,3,2,2,3,3,0,3,3,2,3,3,3,0,0,2,3,2,3,0,0,0,0,0,2,0,0, - 3,3,2,3,3,3,2,3,3,2,2,3,2,1,3,3,3,2,2,3,1,2,2,2,2,2,3,3,3,2,2,2, - 3,3,3,3,2,3,3,3,3,2,2,3,3,2,3,2,2,3,2,3,2,2,3,2,2,3,3,3,3,2,2,2, - 3,3,2,2,2,2,2,3,3,2,0,3,0,2,3,2,2,2,1,2,2,0,2,1,2,3,2,3,3,2,2,2, - 3,3,3,3,2,2,3,3,3,2,3,3,3,2,3,3,2,3,1,3,3,2,2,2,2,2,2,2,2,2,2,3, - 3,2,3,3,3,3,3,2,2,3,2,3,3,3,0,3,3,2,2,2,2,2,2,3,2,0,0,0,1,0,0,0, - 3,3,2,2,2,2,2,3,3,2,0,3,2,2,2,2,2,2,2,3,2,0,2,2,2,2,2,2,3,2,2,2, - 3,3,3,3,3,3,2,3,3,2,2,3,1,2,3,2,2,2,2,3,2,3,3,3,2,2,2,2,3,3,2,0, - 3,3,3,2,2,2,3,2,3,2,2,3,2,2,3,2,3,2,0,3,2,2,2,2,2,2,3,0,2,2,3,2, - 3,3,2,3,2,2,2,3,3,3,3,2,2,2,3,2,2,2,2,2,3,0,0,2,2,2,2,0,3,0,0,0, - 3,3,2,2,2,3,2,3,3,0,0,2,2,2,3,2,2,2,2,3,0,2,2,2,2,3,2,3,2,3,2,2, - 2,0,3,3,3,3,3,0,0,3,3,0,2,3,0,3,3,3,0,0,2,2,2,2,1,0,0,0,0,0,0,0, - 2,2,3,3,3,3,3,3,2,3,3,2,3,3,2,3,3,3,0,0,2,3,3,2,2,2,0,0,1,2,2,0, - 2,2,3,3,3,3,2,3,2,3,3,2,2,2,2,3,3,2,0,0,2,2,3,2,2,1,0,0,1,2,1,0, - 0,2,3,2,2,3,3,2,2,2,3,0,3,3,0,2,2,3,0,2,1,2,3,2,2,0,0,0,0,0,0,0, - 0,0,3,2,3,2,3,0,0,3,2,0,2,3,0,0,2,2,0,0,1,0,2,0,0,0,0,0,0,0,0,0, - 2,2,3,3,3,2,3,0,0,2,2,0,0,3,0,2,2,2,0,0,2,2,3,2,1,0,0,0,0,0,0,0, - 2,2,2,2,3,2,2,2,0,3,2,0,2,2,0,2,2,3,0,2,2,0,2,2,2,0,0,0,0,0,0,0, + 2,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,2,3,3,3,3,3,3,2,3,2,1,2,2,0,0, + 2,2,3,3,3,3,3,3,3,2,3,2,2,3,3,3,3,3,3,3,3,3,3,3,2,3,2,0,2,2,2,0, + 3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,3,2,3,2,3,2,3,3,3,3,3,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,2,3,3,3,2,3,2,2,3,3,3,3,3,2, + 3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2, + 3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,3,3,3,2,3,2,2,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3, + 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,2,3,1,3,3,1,2,2,2,2, + 3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,2,2,2,3,2,2,2,2,3,2,3,2,3,3,2,2, + 2,2,3,3,3,3,3,2,3,2,3,2,2,3,3,3,3,2,3,3,3,3,2,3,1,3,2,0,0,0,0,0, + 3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,2,3,2,3,3,3,3,2,2, + 2,2,3,3,3,3,3,2,3,2,3,2,2,3,3,3,3,0,3,2,2,3,3,3,0,2,0,0,2,0,0,0, + 2,2,3,3,3,3,3,2,3,2,3,0,2,3,3,3,3,0,3,0,3,3,2,2,0,2,0,0,2,2,0,0, + 3,3,3,3,3,2,2,3,2,3,3,3,3,3,2,3,2,2,2,3,2,3,2,2,2,2,3,2,3,2,3,3, + 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,2,3,2,3,3,3,2,3,3,2,2, + 3,3,2,3,3,2,3,3,2,3,2,3,3,2,0,3,2,2,2,3,2,2,2,2,3,2,3,3,2,2,2,2, + 3,3,3,3,2,3,3,3,2,3,2,3,3,3,2,2,3,2,3,3,2,2,3,2,3,2,3,3,3,2,2,2, + 3,3,3,3,3,3,2,3,2,3,3,3,3,2,2,3,2,0,3,3,2,2,2,2,2,2,3,3,3,3,2,3, + 3,3,2,2,2,2,2,3,2,3,1,3,3,2,2,2,1,2,2,2,2,2,2,2,2,2,2,3,3,2,2,2, + 2,2,3,3,3,3,3,2,3,2,2,3,0,3,3,2,3,2,2,2,2,2,2,3,0,2,2,1,1,0,0,0, + 3,3,2,2,2,2,2,3,2,3,2,3,3,2,2,2,2,2,2,3,2,2,2,2,2,2,2,2,3,2,2,2, + 3,3,3,3,3,2,3,3,2,3,2,3,3,2,2,2,2,2,2,3,2,3,3,3,2,2,2,2,3,3,2,0, + 3,3,3,2,3,3,2,2,2,3,2,3,3,2,2,2,3,2,2,3,2,2,2,2,2,2,3,1,2,2,3,2, + 3,3,2,3,2,2,2,3,3,3,2,2,3,2,2,2,2,2,2,2,3,2,2,3,2,2,2,0,3,0,0,1, + 0,0,3,3,3,3,3,0,3,0,3,0,0,2,2,3,3,0,3,0,2,2,1,2,0,2,0,0,0,0,0,0, + 3,3,2,2,2,2,3,3,2,3,0,2,3,2,2,2,2,2,1,2,1,2,2,2,3,2,2,3,1,3,2,2, + 2,2,3,3,3,3,3,3,3,2,3,2,2,2,3,3,3,1,2,2,2,3,3,3,1,2,0,0,1,2,2,0, + 3,2,3,3,3,2,3,3,3,2,3,2,2,2,2,3,3,0,3,1,2,2,3,2,2,2,0,1,2,2,2,0, + 2,2,3,2,2,3,3,0,2,0,3,0,0,3,3,2,2,0,3,0,0,2,3,2,0,0,0,0,0,0,0,0, + 0,2,3,2,3,3,2,0,3,0,3,0,0,2,3,2,2,0,2,0,2,0,2,2,0,0,0,0,0,0,0,0, + 0,2,3,2,3,3,3,2,2,0,2,0,1,1,3,2,2,0,2,0,2,2,3,2,1,2,0,0,0,0,0,0, + 2,2,2,2,2,2,3,2,2,0,2,0,0,2,2,2,2,0,3,0,2,0,2,2,0,2,0,0,0,0,0,0, }; @@ -153,7 +167,7 @@ const SequenceModel Iso_8859_2HungarianModel = Iso_8859_2_CharToOrderMap, HungarianLangModel, 32, - (float)0.9748272224933486, + (float)0.9736098834669349, PR_FALSE, "ISO-8859-2", "hu" @@ -164,8 +178,18 @@ const SequenceModel Windows_1250HungarianModel = Windows_1250_CharToOrderMap, HungarianLangModel, 32, - (float)0.9748272224933486, + (float)0.9736098834669349, PR_FALSE, "WINDOWS-1250", "hu" }; + +const LanguageModel HungarianModel = +{ + "hu", + Unicode_CharOrder, + 64, + HungarianLangModel, + 32, + (float)0.9736098834669349, +}; diff --git a/src/LangModels/LangIrishModel.cpp b/src/LangModels/LangIrishModel.cpp index bbd9500..a9d814f 100644 --- a/src/LangModels/LangIrishModel.cpp +++ b/src/LangModels/LangIrishModel.cpp @@ -36,12 +36,13 @@ * ***** END LICENSE BLOCK ***** */ #include "../nsSBCharSetProber.h" +#include "../nsLanguageDetector.h" /********* Language model for: Irish *********/ /** * Generated by BuildLangModel.py - * On: 2016-09-27 00:33:40.158624 + * On: 2021-03-16 19:09:36.532691 **/ /* Character Mapping Table: @@ -61,174 +62,197 @@ * ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1 * even though they are both used for French. Same for the euro sign. */ -static const unsigned char Iso_8859_1_CharToOrderMap[] = +static const unsigned char Iso_8859_15_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ SYM, 0, 16, 8, 11, 5, 19, 12, 3, 1, 27, 25, 9, 13, 2, 10, /* 4X */ - 21, 30, 4, 6, 7, 15, 23, 26, 29, 24, 28,SYM,SYM,SYM,SYM,SYM, /* 5X */ + 22, 30, 4, 6, 7, 15, 24, 26, 29, 23, 28,SYM,SYM,SYM,SYM,SYM, /* 5X */ SYM, 0, 16, 8, 11, 5, 19, 12, 3, 1, 27, 25, 9, 13, 2, 10, /* 6X */ - 21, 30, 4, 6, 7, 15, 23, 26, 29, 24, 28,SYM,SYM,SYM,SYM,CTR, /* 7X */ + 22, 30, 4, 6, 7, 15, 24, 26, 29, 23, 28,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 44,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 45, 14, 46, 47, 33, 48, 49, 39, 35, 18, 42, 37, 50, 17, 51, 40, /* CX */ - 52, 32, 43, 22, 53, 54, 38,SYM, 36, 55, 20, 56, 31, 57, 58, 59, /* DX */ - 60, 14, 61, 62, 33, 63, 64, 39, 35, 18, 42, 37, 65, 17, 66, 40, /* EX */ - 67, 32, 43, 22, 68, 69, 38,SYM, 36, 70, 20, 71, 31, 72, 73, 74, /* FX */ + SYM,SYM,SYM,SYM,SYM,SYM, 35,SYM, 35,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ + SYM,SYM,SYM,SYM, 42, 43,SYM,SYM, 44,SYM,SYM,SYM, 45, 46, 47,SYM, /* BX */ + 48, 14, 49, 50, 34, 51, 52, 36, 32, 18, 40, 53, 54, 17, 55, 39, /* CX */ + 56, 37, 57, 21, 58, 59, 33,SYM, 38, 60, 20, 61, 31, 62, 63, 64, /* DX */ + 65, 14, 66, 67, 34, 68, 69, 36, 32, 18, 40, 70, 71, 17, 72, 39, /* EX */ + 73, 37, 74, 21, 75, 76, 33,SYM, 38, 77, 20, 78, 31, 79, 80, 81, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const unsigned char Windows_1252_CharToOrderMap[] = +static const unsigned char Iso_8859_1_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ SYM, 0, 16, 8, 11, 5, 19, 12, 3, 1, 27, 25, 9, 13, 2, 10, /* 4X */ - 21, 30, 4, 6, 7, 15, 23, 26, 29, 24, 28,SYM,SYM,SYM,SYM,SYM, /* 5X */ + 22, 30, 4, 6, 7, 15, 24, 26, 29, 23, 28,SYM,SYM,SYM,SYM,SYM, /* 5X */ SYM, 0, 16, 8, 11, 5, 19, 12, 3, 1, 27, 25, 9, 13, 2, 10, /* 6X */ - 21, 30, 4, 6, 7, 15, 23, 26, 29, 24, 28,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM,ILL,SYM, 75,SYM,SYM,SYM,SYM,SYM,SYM, 34,SYM, 76,ILL, 77,ILL, /* 8X */ - ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 34,SYM, 78,ILL, 79, 80, /* 9X */ + 22, 30, 4, 6, 7, 15, 24, 26, 29, 23, 28,SYM,SYM,SYM,SYM,CTR, /* 7X */ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 81,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 82, 14, 83, 84, 33, 85, 86, 39, 35, 18, 42, 37, 87, 17, 88, 40, /* CX */ - 89, 32, 43, 22, 90, 91, 38,SYM, 36, 92, 20, 93, 31, 94, 95, 96, /* DX */ - 97, 14, 98, 99, 33,100,101, 39, 35, 18, 42, 37,102, 17,103, 40, /* EX */ - 104, 32, 43, 22,105,106, 38,SYM, 36,107, 20,108, 31,109,110,111, /* FX */ + SYM,SYM,SYM,SYM,SYM, 82,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 83, 14, 84, 85, 34, 86, 87, 36, 32, 18, 40, 88, 89, 17, 90, 39, /* CX */ + 91, 37, 92, 21, 93, 94, 33,SYM, 38, 95, 20, 96, 31, 97, 98, 99, /* DX */ + 100, 14,101,102, 34,103,104, 36, 32, 18, 40,105,106, 17,107, 39, /* EX */ + 108, 37,109, 21,110,111, 33,SYM, 38,112, 20,113, 31,114,115,116, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const unsigned char Iso_8859_15_CharToOrderMap[] = +static const unsigned char Iso_8859_9_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ SYM, 0, 16, 8, 11, 5, 19, 12, 3, 1, 27, 25, 9, 13, 2, 10, /* 4X */ - 21, 30, 4, 6, 7, 15, 23, 26, 29, 24, 28,SYM,SYM,SYM,SYM,SYM, /* 5X */ + 22, 30, 4, 6, 7, 15, 24, 26, 29, 23, 28,SYM,SYM,SYM,SYM,SYM, /* 5X */ SYM, 0, 16, 8, 11, 5, 19, 12, 3, 1, 27, 25, 9, 13, 2, 10, /* 6X */ - 21, 30, 4, 6, 7, 15, 23, 26, 29, 24, 28,SYM,SYM,SYM,SYM,CTR, /* 7X */ + 22, 30, 4, 6, 7, 15, 24, 26, 29, 23, 28,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM, 34,SYM, 34,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,112,113,SYM,SYM,114,SYM,SYM,SYM,115,116,117,SYM, /* BX */ - 118, 14,119,120, 33,121,122, 39, 35, 18, 42, 37,123, 17,124, 40, /* CX */ - 125, 32, 43, 22,126,127, 38,SYM, 36,128, 20,129, 31,130,131,132, /* DX */ - 133, 14,134,135, 33,136,137, 39, 35, 18, 42, 37,138, 17,139, 40, /* EX */ - 140, 32, 43, 22,141,142, 38,SYM, 36,143, 20,144, 31,145,146,147, /* FX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ + SYM,SYM,SYM,SYM,SYM,117,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 118, 14,119,120, 34,121,122, 36, 32, 18, 40,123,124, 17,125, 39, /* CX */ + 126, 37,127, 21,128,129, 33,SYM, 38,130, 20,131, 31,132,133,134, /* DX */ + 135, 14,136,137, 34,138,139, 36, 32, 18, 40,140,141, 17,142, 39, /* EX */ + 143, 37,144, 21,145,146, 33,SYM, 38,147, 20,148, 31, 41,149,150, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const unsigned char Iso_8859_9_CharToOrderMap[] = +static const unsigned char Windows_1252_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ SYM, 0, 16, 8, 11, 5, 19, 12, 3, 1, 27, 25, 9, 13, 2, 10, /* 4X */ - 21, 30, 4, 6, 7, 15, 23, 26, 29, 24, 28,SYM,SYM,SYM,SYM,SYM, /* 5X */ + 22, 30, 4, 6, 7, 15, 24, 26, 29, 23, 28,SYM,SYM,SYM,SYM,SYM, /* 5X */ SYM, 0, 16, 8, 11, 5, 19, 12, 3, 1, 27, 25, 9, 13, 2, 10, /* 6X */ - 21, 30, 4, 6, 7, 15, 23, 26, 29, 24, 28,SYM,SYM,SYM,SYM,CTR, /* 7X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ + 22, 30, 4, 6, 7, 15, 24, 26, 29, 23, 28,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM,ILL,SYM,151,SYM,SYM,SYM,SYM,SYM,SYM, 35,SYM,152,ILL,153,ILL, /* 8X */ + ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 35,SYM,154,ILL,155,156, /* 9X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM,148,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 149, 14,150,151, 33,152,153, 39, 35, 18, 42, 37,154, 17,155, 40, /* CX */ - 156, 32, 43, 22,157,158, 38,SYM, 36,159, 20,160, 31,161,162,163, /* DX */ - 164, 14,165,166, 33,167,168, 39, 35, 18, 42, 37,169, 17,170, 40, /* EX */ - 171, 32, 43, 22,172,173, 38,SYM, 36,174, 20,175, 31, 41,176,177, /* FX */ + SYM,SYM,SYM,SYM,SYM,157,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 158, 14,159,160, 34,161,162, 36, 32, 18, 40,163,164, 17,165, 39, /* CX */ + 166, 37,167, 21,168,169, 33,SYM, 38,170, 20,171, 31,172,173,174, /* DX */ + 175, 14,176,177, 34,178,179, 36, 32, 18, 40,180,181, 17,182, 39, /* EX */ + 183, 37,184, 21,185,186, 33,SYM, 38,187, 20,188, 31,189,190,191, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ +static const int Unicode_Char_size = 62; +static const unsigned int Unicode_CharOrder[] = +{ + 65, 0, 66, 16, 67, 8, 68, 11, 69, 5, 70, 19, 71, 12, 72, 3, + 73, 1, 74, 27, 75, 25, 76, 9, 77, 13, 78, 2, 79, 10, 80, 22, + 81, 30, 82, 4, 83, 6, 84, 7, 85, 15, 86, 24, 87, 26, 88, 29, + 89, 23, 90, 28, 97, 0, 98, 16, 99, 8, 100, 11, 101, 5,102, 19, + 103, 12, 104, 3, 105, 1, 106, 27, 107, 25, 108, 9, 109, 13,110, 2, + 111, 10, 112, 22, 113, 30, 114, 4, 115, 6, 116, 7, 117, 15,118, 24, + 119, 26, 120, 29, 121, 23, 122, 28, 193, 14, 201, 18, 205, 17,211, 21, + 218, 20, 225, 14, 233, 18, 237, 17, 243, 21, 250, 20, +}; + /* Model Table: - * Total sequences: 701 - * First 512 sequences: 0.9974076651249096 - * Next 512 sequences (512-1024): 0.0025923348750903907 - * Rest: -2.7755575615628914e-17 + * Total sequences: 707 + * First 512 sequences: 0.9976732191628278 + * Next 512 sequences (512-1024): 0.0023267808371722288 + * Rest: -3.5561831257524545e-17 * Negative sequences: TODO */ static const PRUint8 IrishLangModel[] = { - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,0,3,0,3,0,3,3,3,3,2,3,3,2, - 3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,0,2,3,3,3,3,3,3,3,0,3,3,3,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,0,2,3,0,2, - 3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,2,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,3,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,0,3,3,3,3,3,3,2,3,3,0, - 3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,3,3,3,3,3,2,3,3,3,0,3,0,0, - 3,3,3,3,3,3,3,3,3,3,3,0,2,3,3,3,3,3,3,2,3,0,3,3,3,3,2,2,3,0,0, - 3,3,3,3,3,3,3,3,3,3,3,2,0,3,3,3,2,3,3,3,3,2,3,0,3,3,2,0,3,0,2, - 3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,3,0,0, - 2,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,0,3,3,3,0,3,3,3,3,3,2,3,2, - 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,2,3,3,3,3,0,3,2,3,2,3,2,2,0,0, - 3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,3,3,2,3,0,3,0,2,0,2,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,2,0,3,3,3,3,3,3,2,3,3,3,0,3,0,0,0,2,2,0, - 0,3,3,0,3,2,3,3,3,3,0,3,3,3,0,0,3,3,0,3,0,3,0,2,0,0,0,0,2,0,0, - 3,3,3,2,3,3,3,3,3,3,2,3,3,3,3,0,3,3,2,2,0,3,0,2,2,2,0,2,3,2,0, - 3,3,3,3,3,3,3,2,2,3,3,2,0,0,3,3,3,3,3,2,3,3,3,0,2,0,0,2,0,0,0, - 2,0,3,0,3,0,3,3,3,3,3,3,3,2,0,0,3,0,0,0,3,0,0,2,0,0,0,0,0,0,0, - 3,3,3,0,2,2,3,3,0,2,3,2,0,2,0,0,2,0,0,2,2,2,0,2,0,0,0,0,0,0,0, - 3,3,0,3,3,3,2,3,2,3,3,0,3,2,3,3,2,3,3,3,0,0,3,2,2,0,0,0,0,0,0, - 2,3,3,0,3,0,3,3,3,3,0,3,2,2,0,0,3,0,0,0,0,3,0,0,0,0,0,0,0,0,0, - 3,3,0,3,3,3,3,3,2,3,3,0,0,2,3,3,0,3,3,0,2,3,3,0,2,0,0,0,0,0,0, - 0,3,3,0,3,0,3,3,3,3,0,3,3,3,0,0,3,0,0,2,3,3,0,2,0,0,0,0,2,0,0, - 3,3,2,0,3,3,3,2,0,2,3,0,2,0,3,2,0,3,3,0,0,0,3,2,2,0,0,0,0,0,0, - 3,0,3,0,2,3,3,2,3,3,3,2,0,3,0,3,2,0,0,2,0,0,0,0,2,0,3,0,0,0,0, - 3,3,3,3,3,3,3,0,0,3,3,0,0,2,2,3,2,0,2,0,0,2,0,2,3,2,2,0,0,0,0, - 3,3,3,3,3,3,3,2,0,2,3,2,0,2,0,2,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0, - 3,3,2,0,2,3,0,0,0,0,3,0,0,0,0,3,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0, - 3,3,2,3,0,3,2,0,0,0,3,2,2,2,0,2,2,0,0,0,0,0,0,0,2,0,0,0,2,0,2, - 3,3,0,0,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,3,0,3,0,2,2,0,0,0,0,0,0, - 2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,0,3,0,0,3,3,3,3,3,2,3,3,2, + 3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,0,2,3,3,3,3,3,3,3,0,2,3,3,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,0,2,3,0,2, + 3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,0,0,0,0, + 3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,3,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,3,0,3,3,3,3,3,3,2,3,3,0, + 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,3,0,0, + 3,3,3,3,3,3,3,3,3,3,3,0,2,3,3,3,3,3,3,2,3,3,2,3,3,3,3,2,3,0,0, + 3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,3,3,3,3,3,2,3,0,3,2,0,3,2,2, + 3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,3,0,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,0,3,3,0,3,3,3,3,3,3,2,3,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,0,3,2,0,3,0,2,0,0, + 3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,3,3,2,3,3,0,2,0,2,2,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,2,0,3,3,3,3,3,3,2,2,3,3,3,0,0,2,0,2,0,0, + 0,3,3,0,3,2,3,3,3,3,0,3,3,3,0,0,3,3,0,3,0,0,3,0,2,0,0,0,2,0,0, + 3,3,3,2,3,3,3,3,3,3,2,3,3,3,3,2,3,3,0,2,0,0,3,2,2,2,0,0,3,2,0, + 3,3,3,3,3,3,3,2,2,3,3,2,0,0,3,3,3,3,3,2,3,3,3,3,0,0,0,2,0,0,0, + 2,0,3,0,3,0,3,3,3,3,3,3,3,2,0,0,3,0,0,2,3,2,2,0,2,0,0,0,0,0,0, + 3,3,2,0,2,2,2,3,0,2,2,2,0,2,0,0,2,0,0,0,2,0,2,0,2,0,0,0,0,0,0, + 3,3,0,3,3,3,2,3,2,3,3,0,3,2,3,3,2,3,3,3,0,3,0,2,0,0,0,0,0,0,0, + 0,3,3,0,3,0,3,3,3,3,0,3,3,2,0,0,3,0,0,0,0,0,3,0,0,0,0,0,0,0,0, + 0,3,3,0,3,0,3,3,3,3,0,3,3,3,0,0,3,0,0,2,3,0,3,0,2,0,0,0,2,0,0, + 3,3,2,3,3,3,3,3,3,3,3,2,0,2,3,3,0,3,3,0,2,3,3,2,0,0,0,2,0,2,0, + 3,2,3,0,2,3,3,2,3,3,3,2,0,3,0,3,2,0,0,2,0,0,0,2,0,0,3,0,0,0,0, + 3,3,2,0,3,3,3,2,0,2,3,0,2,0,3,2,0,3,3,0,0,3,0,2,2,0,0,0,0,0,0, + 3,3,3,3,2,3,3,0,0,3,3,0,0,3,2,3,2,0,2,0,0,0,2,3,2,2,2,0,0,0,0, + 3,3,3,3,2,3,3,2,0,3,3,2,0,0,0,2,0,0,0,0,0,0,2,2,0,2,2,0,0,0,0, + 3,3,2,0,0,3,0,0,0,0,3,0,0,0,0,3,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0, + 3,3,2,3,0,3,2,2,0,0,3,2,2,2,0,3,0,0,0,0,0,2,0,2,0,0,0,0,2,0,2, + 3,3,0,0,0,2,0,2,2,2,3,0,0,0,0,0,0,0,0,3,0,0,2,2,2,0,0,0,0,2,0, + 3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, }; +const SequenceModel Iso_8859_15IrishModel = +{ + Iso_8859_15_CharToOrderMap, + IrishLangModel, + 31, + (float)0.9976732191628278, + PR_TRUE, + "ISO-8859-15", + "ga" +}; + const SequenceModel Iso_8859_1IrishModel = { Iso_8859_1_CharToOrderMap, IrishLangModel, 31, - (float)0.9974076651249096, + (float)0.9976732191628278, PR_TRUE, "ISO-8859-1", "ga" }; -const SequenceModel Windows_1252IrishModel = +const SequenceModel Iso_8859_9IrishModel = { - Windows_1252_CharToOrderMap, + Iso_8859_9_CharToOrderMap, IrishLangModel, 31, - (float)0.9974076651249096, + (float)0.9976732191628278, PR_TRUE, - "WINDOWS-1252", + "ISO-8859-9", "ga" }; -const SequenceModel Iso_8859_15IrishModel = +const SequenceModel Windows_1252IrishModel = { - Iso_8859_15_CharToOrderMap, + Windows_1252_CharToOrderMap, IrishLangModel, 31, - (float)0.9974076651249096, + (float)0.9976732191628278, PR_TRUE, - "ISO-8859-15", + "WINDOWS-1252", "ga" }; -const SequenceModel Iso_8859_9IrishModel = +const LanguageModel IrishModel = { - Iso_8859_9_CharToOrderMap, + "ga", + Unicode_CharOrder, + 62, IrishLangModel, 31, - (float)0.9974076651249096, - PR_TRUE, - "ISO-8859-9", - "ga" + (float)0.9976732191628278, }; diff --git a/src/LangModels/LangLatvianModel.cpp b/src/LangModels/LangLatvianModel.cpp index fcccc82..3b47d21 100644 --- a/src/LangModels/LangLatvianModel.cpp +++ b/src/LangModels/LangLatvianModel.cpp @@ -36,12 +36,13 @@ * ***** END LICENSE BLOCK ***** */ #include "../nsSBCharSetProber.h" +#include "../nsLanguageDetector.h" /********* Language model for: Latvian *********/ /** * Generated by BuildLangModel.py - * On: 2016-09-21 00:19:18.362275 + * On: 2021-03-16 19:30:28.293047 **/ /* Character Mapping Table: @@ -67,18 +68,18 @@ static const unsigned char Iso_8859_4_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 17, 22, 13, 3, 25, 19, 28, 1, 16, 11, 9, 12, 7, 10, /* 4X */ - 15, 38, 4, 2, 5, 6, 14, 33, 35, 34, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 17, 22, 13, 3, 25, 19, 28, 1, 16, 11, 9, 12, 7, 10, /* 6X */ - 15, 38, 4, 2, 5, 6, 14, 33, 35, 34, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 0, 17, 22, 13, 3, 25, 19, 28, 1, 15, 11, 9, 12, 7, 10, /* 4X */ + 16, 39, 5, 2, 4, 6, 14, 34, 35, 33, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 17, 22, 13, 3, 25, 19, 28, 1, 15, 11, 9, 12, 7, 10, /* 6X */ + 16, 39, 5, 2, 4, 6, 14, 34, 35, 33, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ SYM, 55, 56, 57,SYM, 58, 26,SYM,SYM, 23, 21, 31, 59,SYM, 29,SYM, /* AX */ - SYM, 60,SYM, 61,SYM, 62, 26,SYM,SYM, 23, 21, 31, 63, 48, 29, 48, /* BX */ - 8, 42, 64, 65, 40, 52, 53, 66, 32, 37, 67, 43, 46, 45, 49, 18, /* CX */ - 68, 24, 51, 30, 69, 70, 36,SYM, 71, 72, 73, 74, 39, 75, 27, 44, /* DX */ - 8, 42, 76, 77, 40, 52, 53, 78, 32, 37, 79, 43, 46, 45, 49, 18, /* EX */ - 80, 24, 51, 30, 81, 82, 36,SYM, 83, 84, 85, 86, 39, 87, 27,SYM, /* FX */ + SYM, 60,SYM, 61,SYM, 62, 26,SYM,SYM, 23, 21, 31, 63, 49, 29, 49, /* BX */ + 8, 40, 64, 65, 41, 54, 42, 66, 32, 36, 67, 43, 46, 47, 44, 18, /* CX */ + 68, 24, 53, 30, 69, 70, 37,SYM, 71, 72, 73, 74, 38, 75, 27, 48, /* DX */ + 8, 40, 76, 77, 41, 54, 42, 78, 32, 36, 79, 43, 46, 47, 44, 18, /* EX */ + 80, 24, 53, 30, 81, 82, 37,SYM, 83, 84, 85, 86, 38, 87, 27,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -88,18 +89,18 @@ static const unsigned char Iso_8859_10_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 17, 22, 13, 3, 25, 19, 28, 1, 16, 11, 9, 12, 7, 10, /* 4X */ - 15, 38, 4, 2, 5, 6, 14, 33, 35, 34, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 17, 22, 13, 3, 25, 19, 28, 1, 16, 11, 9, 12, 7, 10, /* 6X */ - 15, 38, 4, 2, 5, 6, 14, 33, 35, 34, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 0, 17, 22, 13, 3, 25, 19, 28, 1, 15, 11, 9, 12, 7, 10, /* 4X */ + 16, 39, 5, 2, 4, 6, 14, 34, 35, 33, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 17, 22, 13, 3, 25, 19, 28, 1, 15, 11, 9, 12, 7, 10, /* 6X */ + 16, 39, 5, 2, 4, 6, 14, 34, 35, 33, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM, 88, 21, 31, 18, 89, 30,SYM, 26, 90, 23, 91, 29,SYM, 27, 48, /* AX */ - SYM, 92, 21, 31, 18, 93, 30,SYM, 26, 94, 23, 95, 29, 96, 27, 48, /* BX */ - 8, 42, 97, 98, 40, 52, 53, 99, 32, 37,100, 43, 46, 45, 49,101, /* CX */ - 50, 24, 51, 47,102,103, 36,104,105,106,107,108, 39,109, 54, 44, /* DX */ - 8, 42,110,111, 40, 52, 53,112, 32, 37,113, 43, 46, 45, 49,114, /* EX */ - 50, 24, 51, 47,115,116, 36,117,118,119,120,121, 39,122, 54,123, /* FX */ + SYM, 88, 21, 31, 18, 89, 30,SYM, 26, 90, 23, 91, 29,SYM, 27, 49, /* AX */ + SYM, 92, 21, 31, 18, 93, 30,SYM, 26, 94, 23, 95, 29, 96, 27, 49, /* BX */ + 8, 40, 97, 98, 41, 54, 42, 99, 32, 36,100, 43, 46, 47, 44,101, /* CX */ + 52, 24, 53, 45,102,103, 37,104,105,106,107,108, 38,109, 51, 48, /* DX */ + 8, 40,110,111, 41, 54, 42,112, 32, 36,113, 43, 46, 47, 44,114, /* EX */ + 52, 24, 53, 45,115,116, 37,117,118,119,120,121, 38,122, 51,123, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -109,70 +110,86 @@ static const unsigned char Iso_8859_13_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 17, 22, 13, 3, 25, 19, 28, 1, 16, 11, 9, 12, 7, 10, /* 4X */ - 15, 38, 4, 2, 5, 6, 14, 33, 35, 34, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 17, 22, 13, 3, 25, 19, 28, 1, 16, 11, 9, 12, 7, 10, /* 6X */ - 15, 38, 4, 2, 5, 6, 14, 33, 35, 34, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 0, 17, 22, 13, 3, 25, 19, 28, 1, 15, 11, 9, 12, 7, 10, /* 4X */ + 16, 39, 5, 2, 4, 6, 14, 34, 35, 33, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 17, 22, 13, 3, 25, 19, 28, 1, 15, 11, 9, 12, 7, 10, /* 6X */ + 16, 39, 5, 2, 4, 6, 14, 34, 35, 33, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,124,SYM,125,SYM,SYM,SYM,SYM, 53, /* AX */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,126,SYM,127,SYM,SYM,SYM,SYM, 53, /* BX */ - 128,129, 8,130, 40, 52,131, 21, 32, 37,132, 46, 31, 30, 18, 26, /* CX */ - 23,133, 24, 47, 51,134, 36,SYM,135, 41,136, 27, 39,137, 29, 44, /* DX */ - 138,139, 8,140, 40, 52,141, 21, 32, 37,142, 46, 31, 30, 18, 26, /* EX */ - 23,143, 24, 47, 51,144, 36,SYM,145, 41,146, 27, 39,147, 29,SYM, /* FX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,124,SYM,125,SYM,SYM,SYM,SYM, 42, /* AX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,126,SYM,127,SYM,SYM,SYM,SYM, 42, /* BX */ + 128,129, 8,130, 41, 54,131, 21, 32, 36,132, 46, 31, 30, 18, 26, /* CX */ + 23,133, 24, 45, 53,134, 37,SYM,135, 50,136, 27, 38,137, 29, 48, /* DX */ + 138,139, 8,140, 41, 54,141, 21, 32, 36,142, 46, 31, 30, 18, 26, /* EX */ + 23,143, 24, 45, 53,144, 37,SYM,145, 50,146, 27, 38,147, 29,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ +static const int Unicode_Char_size = 80; +static const unsigned int Unicode_CharOrder[] = +{ + 65, 0, 66, 17, 67, 22, 68, 13, 69, 3, 70, 25, 71, 19, 72, 28, + 73, 1, 74, 15, 75, 11, 76, 9, 77, 12, 78, 7, 79, 10, 80, 16, + 81, 39, 82, 5, 83, 2, 84, 4, 85, 6, 86, 14, 87, 34, 88, 35, + 89, 33, 90, 20, 97, 0, 98, 17, 99, 22, 100, 13, 101, 3,102, 25, + 103, 19, 104, 28, 105, 1, 106, 15, 107, 11, 108, 9, 109, 12,110, 7, + 111, 10, 112, 16, 113, 39, 114, 5, 115, 2, 116, 4, 117, 6,118, 14, + 119, 34, 120, 35, 121, 33, 122, 20, 201, 36, 214, 37, 220, 38,233, 36, + 246, 37, 252, 38, 256, 8, 257, 8, 268, 32, 269, 32, 274, 21,275, 21, + 290, 31, 291, 31, 298, 18, 299, 18, 310, 30, 311, 30, 315, 26,316, 26, + 325, 24, 326, 24, 352, 23, 353, 23, 362, 27, 363, 27, 381, 29,382, 29, +}; + /* Model Table: - * Total sequences: 970 - * First 512 sequences: 0.9904102202220861 - * Next 512 sequences (512-1024): 0.009589779777913882 - * Rest: -1.734723475976807e-17 + * Total sequences: 982 + * First 512 sequences: 0.9904642991017133 + * Next 512 sequences (512-1024): 0.009535700898286757 + * Rest: -5.377642775528102e-17 * Negative sequences: TODO */ static const PRUint8 LatvianLangModel[] = { - 2,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,3,0,3,3,2,2,3,2,2,2,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,2,3,3,3,3,3,2,3,3,3,2,3,0,0,2,0,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,3,3,0,2,2,2,3,2,2,0,0,0,2,2,0,2,2,2, - 3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,3,0,3,3,2,3,2,2,2,2,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,3,3,2,3,2,2,2,2,0,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,3,3,0,0,2,0,2,2,0,0,0,0, - 3,3,3,2,3,3,2,3,3,3,2,3,3,3,3,3,3,3,2,3,3,2,3,3,3,2,3,0,2,2,2,2,2,0,2,0,0,2,0, - 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,3,3,3,2,3,3,3,0,3,0,2,2,2,0,0,3,0,2,0,0,0,2, - 2,2,3,2,3,3,2,3,0,3,0,3,3,3,3,3,3,3,0,2,3,0,3,3,3,3,3,0,0,2,0,2,2,0,0,0,0,0,0, - 3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,3,3,2,0,2,2,0,2,2,0,2,0, - 3,2,3,2,3,3,3,3,2,3,2,3,3,3,3,3,3,3,0,3,3,2,3,3,3,3,3,0,2,3,2,3,2,2,2,2,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,2,2,2,2,2,2,3,3,2,0,3,2,2,0,0,0,0,0,2,0,2,0,0, - 3,3,3,3,0,3,3,3,3,2,3,3,2,2,2,3,3,3,3,2,0,3,2,2,0,2,0,3,0,0,0,2,0,0,2,2,0,2,0, - 3,3,3,3,3,2,3,3,3,2,3,2,3,2,3,2,2,2,3,2,3,3,2,2,2,0,0,2,0,3,0,0,0,2,2,0,0,2,0, - 3,3,3,3,2,2,3,2,3,2,3,2,2,2,2,3,3,2,3,2,2,3,2,0,2,0,0,2,0,0,0,0,0,0,2,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,2,3,2,3,2,2,2,2,2,0,0,2,0,0,0,0,0,0,0, - 3,3,3,3,2,0,3,3,3,2,3,2,2,2,2,2,0,0,2,2,0,3,2,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,0,3,2,3,3,3,2,2,2,2,2,2,2,3,0,0,3,2,2,0,0,2,3,2,0,0,0,2,0,2,0,2,0,0, - 0,0,3,0,3,3,0,3,0,3,0,3,3,3,3,3,3,3,0,3,3,0,3,3,3,2,2,0,0,2,2,0,2,0,0,0,0,0,0, - 3,3,3,3,3,3,3,2,3,3,3,2,2,2,2,0,0,0,0,2,2,2,0,3,0,2,3,3,2,2,0,0,0,0,2,0,0,2,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,0,0,2,2,0,0,0,0,2,2,0,0,0,0, - 2,0,3,0,3,3,0,3,0,3,0,3,3,3,3,2,3,2,0,3,3,0,3,3,2,2,3,0,0,2,2,3,0,0,0,0,0,0,0, - 3,3,3,3,2,2,3,2,3,2,3,3,2,2,2,2,0,2,3,0,2,3,2,2,0,0,0,2,3,0,0,2,0,0,2,0,0,0,0, - 3,3,3,3,2,3,3,3,3,3,3,2,2,2,3,2,2,2,3,2,2,2,0,0,2,0,2,2,0,0,3,0,0,0,0,0,0,0,0, - 3,3,2,3,0,0,3,2,3,0,3,0,2,2,2,2,2,2,0,2,0,3,2,3,0,0,0,2,0,0,3,2,0,0,0,0,0,0,0, - 3,3,3,3,3,2,3,2,2,3,3,2,2,0,0,0,0,0,2,2,0,2,0,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0, - 3,3,2,2,2,0,3,2,3,2,3,2,2,0,2,2,2,0,2,2,0,2,0,2,2,0,2,2,0,0,2,3,0,0,0,0,0,0,0, - 0,2,3,0,3,3,0,3,0,3,2,3,2,3,3,3,2,0,0,2,3,0,3,2,0,2,0,0,2,2,0,0,0,0,0,0,0,0,0, - 3,3,2,3,2,2,2,3,2,2,3,2,2,2,0,0,2,0,2,0,0,2,0,0,0,0,0,2,2,0,0,0,0,2,2,0,2,0,0, - 3,3,2,3,2,0,3,2,3,2,3,2,2,0,2,0,0,0,2,0,2,2,0,0,2,0,0,2,0,0,2,2,0,0,0,0,0,0,0, - 3,3,2,3,0,2,3,0,2,0,2,0,0,0,0,0,0,0,3,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,3,0,3,0,0,2,0,0,0,2,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,2,2,3,0,0,3,2,2,0,2,2,2,0,0,2,0,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0, - 2,2,2,2,0,2,0,0,0,2,2,0,2,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,3,0,0,0,0,0, - 2,0,2,2,2,0,0,2,0,2,2,0,2,2,0,0,0,0,0,2,0,0,2,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0, - 2,2,0,0,0,0,2,0,0,0,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0, - 0,0,2,0,0,2,0,2,0,2,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0, - 0,0,2,0,2,2,0,2,0,0,2,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 2,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,3,0,3,3,2,2,3,2,2,2,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,2,3,3,3,3,3,2,3,3,3,2,3,0,0,2,2,0,0,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,2,3,2,0,2,2,2,3,2,2,0,0,0,2,2,0,0,0,2,2, + 3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,3,0,3,3,2,3,2,2,2,2,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,3,3,3,0,0,2,0,2,2,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,2,2,3,3,2,3,2,2,2,2,0,2,2,2,2, + 3,3,3,2,3,3,2,3,3,3,2,3,3,3,3,3,3,3,2,3,3,2,3,3,3,0,3,0,2,2,2,2,3,2,0,0,2,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,3,3,3,2,3,3,3,0,3,0,2,2,2,0,0,3,2,0,0,2,0,0,2, + 2,2,3,2,3,3,2,3,0,3,0,3,3,3,3,3,3,3,0,2,3,0,3,3,3,3,3,0,0,2,0,2,2,0,0,0,0,0,0,0, + 3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,0,3,2,2,0,2,2,2,2,2,2,0,0,0, + 3,2,3,2,3,3,3,3,2,3,2,3,3,3,3,3,3,3,0,3,3,2,3,3,3,3,3,0,2,3,2,3,2,2,2,2,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,2,2,2,2,2,2,3,3,2,0,3,2,2,0,0,0,0,2,0,0,2,2,2,0, + 3,3,3,3,3,2,3,3,3,2,3,3,2,3,2,3,3,3,3,2,0,3,2,2,0,2,0,3,0,0,0,2,0,2,0,2,2,0,2,0, + 3,3,3,3,2,3,3,3,3,2,3,2,3,0,3,2,2,2,3,2,3,3,2,2,2,0,0,3,0,3,0,0,0,0,2,0,2,0,2,0, + 3,3,3,3,2,2,3,2,3,2,3,2,2,2,2,3,3,2,3,2,2,3,2,0,2,2,0,2,0,0,0,0,0,2,0,0,0,0,0,0, + 3,3,3,3,0,2,3,3,3,2,3,2,2,2,2,0,2,0,2,2,0,3,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,2,3,2,3,2,2,2,2,2,0,2,2,0,0,0,0,2,0,0,0, + 3,3,3,3,2,3,3,2,3,3,3,2,2,2,2,2,2,2,3,0,2,3,2,2,0,0,2,3,2,0,0,0,2,2,0,0,0,2,2,0, + 0,0,3,0,3,3,0,3,0,3,0,3,3,3,3,3,3,3,0,3,3,0,3,3,3,2,2,0,0,2,2,0,2,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,2,3,3,3,2,2,2,2,0,2,0,0,2,2,2,0,3,0,2,3,3,2,2,0,0,0,2,0,0,2,0,2,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,3,2,3,0,0,2,2,0,0,0,0,2,2,0,0,0,0,0, + 2,0,3,0,3,3,2,3,0,3,0,3,3,3,3,3,2,2,0,3,2,0,3,3,2,2,3,0,0,2,2,3,0,0,0,0,0,0,0,0, + 3,3,3,3,2,2,3,2,3,2,3,3,2,2,2,0,2,2,3,0,2,3,2,2,0,0,0,2,3,0,0,2,0,2,0,0,0,0,0,0, + 3,3,3,3,3,2,3,3,3,3,3,2,2,2,3,2,2,2,3,2,2,3,0,0,2,0,2,2,0,0,3,0,2,0,0,0,0,0,0,0, + 3,3,2,3,0,0,3,2,3,0,3,0,2,2,2,2,2,2,0,2,0,3,2,3,0,0,0,2,0,0,2,2,0,0,0,0,0,0,0,0, + 3,3,3,3,2,3,3,2,2,3,3,2,0,0,0,0,0,0,2,2,0,2,0,2,0,2,0,2,0,0,0,0,0,0,0,0,2,0,2,0, + 3,3,2,3,0,2,3,2,3,2,3,2,2,2,2,2,2,0,2,0,0,2,2,2,2,0,2,2,0,0,2,3,0,0,0,0,0,0,0,0, + 0,2,3,0,3,3,0,3,0,3,2,3,2,3,3,2,3,0,0,2,3,0,3,2,0,2,0,0,2,2,0,0,0,0,0,0,0,0,0,0, + 3,3,2,3,2,2,2,3,2,2,3,2,2,0,0,2,0,0,2,0,0,2,0,0,0,0,0,2,2,0,0,0,0,2,2,0,0,0,0,0, + 3,3,2,3,0,2,3,2,3,2,3,2,2,0,2,0,0,0,2,0,2,2,0,0,2,0,0,2,0,0,2,2,0,0,0,0,0,0,0,0, + 3,3,2,3,2,0,2,0,2,0,2,0,0,0,0,0,0,0,3,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 2,3,0,3,0,0,2,0,0,0,2,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,2,2,3,0,0,3,2,2,0,2,2,2,0,0,0,2,0,2,0,0,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0, + 2,2,2,2,0,2,2,2,0,2,2,2,2,2,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0, + 2,2,2,2,2,0,0,0,0,2,2,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0, + 2,2,0,0,0,0,2,0,0,0,2,0,0,0,2,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0, + 0,0,2,0,2,2,0,2,0,2,2,0,0,2,0,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,2,0,0,0,0,2,0,2,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0, + 0,0,2,0,0,2,0,2,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, }; @@ -180,8 +197,8 @@ const SequenceModel Iso_8859_4LatvianModel = { Iso_8859_4_CharToOrderMap, LatvianLangModel, - 39, - (float)0.9904102202220861, + 40, + (float)0.9904642991017133, PR_TRUE, "ISO-8859-4", "lv" @@ -191,8 +208,8 @@ const SequenceModel Iso_8859_10LatvianModel = { Iso_8859_10_CharToOrderMap, LatvianLangModel, - 39, - (float)0.9904102202220861, + 40, + (float)0.9904642991017133, PR_TRUE, "ISO-8859-10", "lv" @@ -202,9 +219,19 @@ const SequenceModel Iso_8859_13LatvianModel = { Iso_8859_13_CharToOrderMap, LatvianLangModel, - 39, - (float)0.9904102202220861, + 40, + (float)0.9904642991017133, PR_TRUE, "ISO-8859-13", "lv" }; + +const LanguageModel LatvianModel = +{ + "lv", + Unicode_CharOrder, + 80, + LatvianLangModel, + 40, + (float)0.9904642991017133, +}; diff --git a/src/LangModels/LangLithuanianModel.cpp b/src/LangModels/LangLithuanianModel.cpp index 686014a..d2fa554 100644 --- a/src/LangModels/LangLithuanianModel.cpp +++ b/src/LangModels/LangLithuanianModel.cpp @@ -36,12 +36,13 @@ * ***** END LICENSE BLOCK ***** */ #include "../nsSBCharSetProber.h" +#include "../nsLanguageDetector.h" /********* Language model for: Lithuanian *********/ /** * Generated by BuildLangModel.py - * On: 2016-09-21 00:25:34.775158 + * On: 2021-03-16 19:26:36.950339 **/ /* Character Mapping Table: @@ -61,45 +62,45 @@ * ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1 * even though they are both used for French. Same for the euro sign. */ -static const unsigned char Iso_8859_10_CharToOrderMap[] = +static const unsigned char Iso_8859_4_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 1, 18, 23, 12, 4, 25, 16, 26, 0, 14, 9, 10, 11, 6, 3, /* 4X */ - 15, 37, 5, 2, 7, 8, 13, 33, 32, 19, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 1, 18, 23, 12, 4, 25, 16, 26, 0, 14, 9, 10, 11, 6, 3, /* 6X */ - 15, 37, 5, 2, 7, 8, 13, 33, 32, 19, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 1, 18, 23, 12, 4, 25, 16, 29, 0, 14, 9, 10, 11, 7, 3, /* 4X */ + 15, 39, 5, 2, 6, 8, 13, 33, 32, 19, 28,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 1, 18, 23, 12, 4, 25, 16, 29, 0, 14, 9, 10, 11, 7, 3, /* 6X */ + 15, 39, 5, 2, 6, 8, 13, 33, 32, 19, 28,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM, 29, 50, 60, 47, 61, 62,SYM, 56, 55, 21, 63, 22,SYM, 28, 64, /* AX */ - SYM, 29, 50, 65, 47, 66, 67,SYM, 56, 55, 21, 68, 22, 69, 28, 70, /* BX */ - 41, 39, 71, 53, 38, 43, 72, 30, 24, 36, 31, 73, 17, 40, 74, 46, /* CX */ - 75, 57, 34, 44, 59, 76, 35, 77, 48, 20, 54, 78, 45, 79, 80, 52, /* DX */ - 41, 39, 81, 53, 38, 43, 82, 30, 24, 36, 31, 83, 17, 40, 84, 46, /* EX */ - 85, 57, 34, 44, 59, 86, 35, 87, 48, 20, 54, 88, 45, 89, 90, 91, /* FX */ + SYM, 27, 68, 69,SYM, 40, 60,SYM,SYM, 21, 56, 70, 71,SYM, 22,SYM, /* AX */ + SYM, 27,SYM, 72,SYM, 40, 60,SYM,SYM, 21, 56, 73, 74, 67, 22, 67, /* BX */ + 45, 35, 75, 37, 41, 49, 54, 30, 24, 36, 31, 76, 17, 43, 77, 50, /* CX */ + 63, 61, 42, 78, 62, 46, 38,SYM, 55, 20, 52, 79, 51, 44, 26, 59, /* DX */ + 45, 35, 80, 37, 41, 49, 54, 30, 24, 36, 31, 81, 17, 43, 82, 50, /* EX */ + 63, 61, 42, 83, 62, 46, 38,SYM, 55, 20, 52, 84, 51, 44, 26,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const unsigned char Iso_8859_4_CharToOrderMap[] = +static const unsigned char Iso_8859_10_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 1, 18, 23, 12, 4, 25, 16, 26, 0, 14, 9, 10, 11, 6, 3, /* 4X */ - 15, 37, 5, 2, 7, 8, 13, 33, 32, 19, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 1, 18, 23, 12, 4, 25, 16, 26, 0, 14, 9, 10, 11, 6, 3, /* 6X */ - 15, 37, 5, 2, 7, 8, 13, 33, 32, 19, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 1, 18, 23, 12, 4, 25, 16, 29, 0, 14, 9, 10, 11, 7, 3, /* 4X */ + 15, 39, 5, 2, 6, 8, 13, 33, 32, 19, 28,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 1, 18, 23, 12, 4, 25, 16, 29, 0, 14, 9, 10, 11, 7, 3, /* 6X */ + 15, 39, 5, 2, 6, 8, 13, 33, 32, 19, 28,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM, 29, 92, 93,SYM, 94, 56,SYM,SYM, 21, 50, 95, 96,SYM, 22,SYM, /* AX */ - SYM, 29,SYM, 97,SYM, 98, 56,SYM,SYM, 21, 50, 99,100,101, 22,102, /* BX */ - 41, 39,103, 53, 38, 43,104, 30, 24, 36, 31,105, 17, 40,106, 47, /* CX */ - 55, 57, 34,107, 59,108, 35,SYM, 48, 20, 54,109, 45,110, 28, 52, /* DX */ - 41, 39,111, 53, 38, 43,112, 30, 24, 36, 31,113, 17, 40,114, 47, /* EX */ - 55, 57, 34,115, 59,116, 35,SYM, 48, 20, 54,117, 45,118, 28,SYM, /* FX */ + SYM, 27, 56, 85, 50, 40, 86,SYM, 60, 63, 21, 87, 22,SYM, 26, 67, /* AX */ + SYM, 27, 56, 88, 50, 40, 89,SYM, 60, 63, 21, 90, 22, 91, 26, 67, /* BX */ + 45, 35, 92, 37, 41, 49, 54, 30, 24, 36, 31, 93, 17, 43, 94, 58, /* CX */ + 65, 61, 42, 34, 62, 46, 38, 44, 55, 20, 52, 95, 51, 48, 96, 59, /* DX */ + 45, 35, 97, 37, 41, 49, 54, 30, 24, 36, 31, 98, 17, 43, 99, 58, /* EX */ + 65, 61, 42, 34, 62, 46, 38, 44, 55, 20, 52,100, 51, 48,101,102, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -109,91 +110,108 @@ static const unsigned char Iso_8859_13_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 1, 18, 23, 12, 4, 25, 16, 26, 0, 14, 9, 10, 11, 6, 3, /* 4X */ - 15, 37, 5, 2, 7, 8, 13, 33, 32, 19, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 1, 18, 23, 12, 4, 25, 16, 26, 0, 14, 9, 10, 11, 6, 3, /* 6X */ - 15, 37, 5, 2, 7, 8, 13, 33, 32, 19, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 1, 18, 23, 12, 4, 25, 16, 29, 0, 14, 9, 10, 11, 7, 3, /* 4X */ + 15, 39, 5, 2, 6, 8, 13, 33, 32, 19, 28,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 1, 18, 23, 12, 4, 25, 16, 29, 0, 14, 9, 10, 11, 7, 3, /* 6X */ + 15, 39, 5, 2, 6, 8, 13, 33, 32, 19, 28,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 48,SYM,119,SYM,SYM,SYM,SYM,120, /* AX */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 48,SYM,121,SYM,SYM,SYM,SYM,122, /* BX */ - 29, 30, 41, 49, 38, 43, 31, 50, 24, 36,123, 17,124,125, 47, 56, /* CX */ - 21, 51, 57, 44, 34,126, 35,SYM, 20, 42, 58, 28, 45,127, 22, 52, /* DX */ - 29, 30, 41, 49, 38, 43, 31, 50, 24, 36,128, 17,129,130, 47, 56, /* EX */ - 21, 51, 57, 44, 34,131, 35,SYM, 20, 42, 58, 28, 45,132, 22,SYM, /* FX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 55,SYM,103,SYM,SYM,SYM,SYM, 54, /* AX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 55,SYM,104,SYM,SYM,SYM,SYM, 54, /* BX */ + 27, 30, 45, 53, 41, 49, 31, 56, 24, 36,105, 17,106,107, 50, 60, /* CX */ + 21, 57, 61, 34, 42, 46, 38,SYM, 20, 47, 64, 26, 51, 66, 22, 59, /* DX */ + 27, 30, 45, 53, 41, 49, 31, 56, 24, 36,108, 17,109,110, 50, 60, /* EX */ + 21, 57, 61, 34, 42, 46, 38,SYM, 20, 47, 64, 26, 51, 66, 22,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ +static const int Unicode_Char_size = 80; +static const unsigned int Unicode_CharOrder[] = +{ + 65, 1, 66, 18, 67, 23, 68, 12, 69, 4, 70, 25, 71, 16, 72, 29, + 73, 0, 74, 14, 75, 9, 76, 10, 77, 11, 78, 7, 79, 3, 80, 15, + 81, 39, 82, 5, 83, 2, 84, 6, 85, 8, 86, 13, 87, 33, 88, 32, + 89, 19, 90, 28, 97, 1, 98, 18, 99, 23, 100, 12, 101, 4,102, 25, + 103, 16, 104, 29, 105, 0, 106, 14, 107, 9, 108, 10, 109, 11,110, 7, + 111, 3, 112, 15, 113, 39, 114, 5, 115, 2, 116, 6, 117, 8,118, 13, + 119, 33, 120, 32, 121, 19, 122, 28, 193, 35, 195, 37, 201, 36,211, 34, + 214, 38, 225, 35, 227, 37, 233, 36, 243, 34, 246, 38, 260, 27,261, 27, + 268, 24, 269, 24, 278, 17, 279, 17, 280, 31, 281, 31, 302, 30,303, 30, + 352, 21, 353, 21, 362, 26, 363, 26, 370, 20, 371, 20, 381, 22,382, 22, +}; + /* Model Table: - * Total sequences: 1016 - * First 512 sequences: 0.9928710196247589 - * Next 512 sequences (512-1024): 0.0071289803752411715 - * Rest: -4.85722573273506e-17 + * Total sequences: 1138 + * First 512 sequences: 0.9919219576954762 + * Next 512 sequences (512-1024): 0.007740222486946524 + * Rest: 0.00033781981757727893 * Negative sequences: TODO */ static const PRUint8 LithuanianLangModel[] = { - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,3,3,3,0,2,3,2,2,2,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,3,3,3,3,3,3,3,0,0,0,0,2,2,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,0,3,3,2,3,2,3,3,2,3,0,2,2,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,0,3,3,3,2,3,3,3,0,0,0,0,2,3,0,0,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,0,3,3,3,3,3,2,3,0,0,2,0,2,3,0,0,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,2,2,3,3,3,3,2,2,2,2,2,2, - 3,3,3,3,3,3,3,3,3,3,2,2,3,3,2,2,3,3,3,3,3,2,3,3,3,3,3,3,2,3,3,3,2,2,2,0,2,0, - 3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,2,2,3,3,3,3,2,0,2,0,2,3,2,3,3,3,3,0,2,2,2,2,0, - 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,0,3,2,0,3,3,3,3,3,2,3,0,0,0,0,0,2,0,0,0,0, - 3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,2,2,3,2,3,3,3,0,3,2,2,3,2,3,3,2,3,0,2,2,0,2,0, - 3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,3,3,2,3,3,3,3,0,2,0,2,2,0, - 3,3,3,3,3,2,2,3,3,2,2,3,2,2,2,3,2,3,3,3,3,2,3,2,0,2,0,2,3,3,0,3,0,2,2,2,2,0, - 3,3,3,3,3,3,2,2,3,3,2,3,2,3,2,2,2,3,2,3,3,2,3,2,0,2,2,2,2,3,2,3,0,2,2,2,2,2, - 3,3,3,3,3,2,2,2,3,2,3,0,2,0,2,2,0,3,0,3,3,2,0,2,0,0,0,3,2,3,0,3,0,0,0,0,0,0, - 3,3,2,3,3,2,2,2,3,2,0,0,0,0,0,2,2,3,0,2,3,0,0,0,0,0,0,0,3,3,3,3,0,0,2,2,0,0, - 3,3,3,3,3,3,2,3,3,3,3,2,2,3,3,2,2,3,0,3,2,3,2,2,2,2,3,0,2,2,2,2,0,0,2,0,2,0, - 3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,2,3,2,3,3,2,2,2,0,0,3,3,3,3,2,2,0,2,2,2,0,0, - 2,0,3,0,0,3,3,3,2,3,3,3,3,3,3,0,3,0,2,0,0,2,2,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,2,3,3,2,3,0,3,2,2,2,0,3,2,2,3,2,2,2,0,0,2,2,3,3,2,3,0,2,2,2,0,0, - 2,3,3,2,2,3,3,3,2,3,3,3,3,3,3,3,3,0,3,2,0,2,2,2,3,2,0,3,2,0,0,0,0,0,2,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,2,2,3,0,3,2,3,2,3,2,2,2,2,3,2,0,0,2,2,2,2,0,0,2,0,0,0, - 3,3,3,3,3,2,3,3,3,2,2,3,3,3,2,2,2,3,2,3,2,2,0,0,0,2,0,0,2,2,2,2,0,0,2,0,0,0, - 3,3,2,3,3,2,0,2,3,3,3,2,2,2,0,0,2,2,2,2,0,0,0,2,0,2,3,2,3,2,0,0,0,0,0,0,2,2, - 3,3,0,2,3,0,0,0,2,2,0,0,2,0,0,2,0,2,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,2,0,0,0, - 3,3,2,3,3,3,0,2,3,2,3,2,0,0,2,0,2,2,2,2,2,0,0,2,0,2,0,0,2,2,0,0,0,0,0,2,0,0, - 3,3,2,3,3,3,3,3,3,2,2,3,2,0,2,0,0,0,2,2,2,0,0,0,0,2,0,0,2,2,0,0,0,2,2,0,0,0, - 3,3,2,3,3,2,2,2,3,2,3,3,3,2,0,2,2,2,2,3,3,0,0,2,0,0,2,2,2,2,0,2,0,2,2,0,2,0, - 2,0,3,0,0,3,3,3,0,3,2,3,3,2,0,2,3,0,2,0,0,2,2,0,3,0,0,3,0,0,0,0,0,0,0,0,0,0, - 0,0,3,0,0,2,0,0,0,2,2,2,0,2,3,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,2,3,0,0,3,0,3,0,3,3,2,2,3,2,3,3,2,0,0,0,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,2,0,0,2,2,0,2,2,0,0,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0, - 3,3,2,2,3,2,2,0,2,0,2,0,0,0,0,0,2,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0, - 2,0,2,0,2,0,2,0,0,2,0,2,2,0,0,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,2,0,0,2,2,2,0,2,2,2,2,0,0,0,2,0,0,0,0,0,0,2,0,2,0,2,0,0,0,0,0,0,0,0,0,0, - 0,0,2,0,0,2,2,0,0,0,0,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,3,3,2,0,2,3,2,2,2,2,2,2,2, + 3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,0,3,3,3,3,3,0,0,3,3,0,0,1,3,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,2,0,3,3,2,3,3,2,3,2,2,0,2,2,2,2,2,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,0,3,3,3,2,3,0,0,3,3,0,0,2,2,0,0,0,2,0,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,0,3,3,3,3,3,0,0,3,2,2,0,2,3,0,0,0,0,0,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,3,3,1,2,2,2,2,2,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,2,2,3,3,3,3,2,2,2,0,2,3,3,2,3,3,3,0,2,0,2,2,1,2,0, + 3,3,3,3,3,3,3,3,3,3,2,2,3,2,2,2,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,3,2,1,2,0,2,2,0,1, + 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,0,3,2,0,3,3,3,3,2,0,0,3,2,0,0,0,2,0,2,0,0,0,0, + 3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,2,2,3,1,3,3,3,0,3,1,2,3,3,2,3,2,3,0,2,1,2,1,2,1,0, + 3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,2,2,2,3,3,3,2,3,3,3,0,2,1,2,2,0,2,0, + 3,3,3,3,3,2,3,2,3,2,2,3,2,2,2,3,2,3,3,3,3,2,3,2,1,2,3,3,2,0,0,3,0,2,3,2,2,2,1,0, + 3,3,3,3,3,3,2,2,3,3,2,3,2,3,2,2,2,3,2,3,3,1,3,2,0,2,2,3,2,2,2,3,0,2,2,2,2,0,2,2, + 3,3,3,3,3,2,2,2,3,2,3,2,2,0,2,2,2,3,0,3,3,2,0,2,0,0,2,3,3,0,2,3,0,0,2,2,2,0,0,0, + 3,3,2,3,3,2,2,2,3,2,0,0,0,0,0,2,2,3,0,2,3,1,0,0,0,0,3,3,0,0,3,3,0,0,2,2,2,0,2,0, + 3,3,3,3,3,3,3,2,3,3,3,2,2,3,3,2,2,3,0,3,2,3,2,2,2,2,2,2,0,3,2,2,0,1,0,1,2,1,0,0, + 3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,2,3,2,3,3,2,2,2,0,0,3,3,3,3,2,2,0,2,0,0,0,2,2,0, + 2,0,3,0,0,3,3,3,2,3,3,3,3,3,3,2,3,0,2,0,0,2,3,2,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,2,3,2,3,0,2,2,2,0,0,3,2,2,3,2,2,2,0,0,3,3,2,2,1,3,0,2,0,1,0,0,1,0, + 2,3,3,2,2,3,3,3,2,3,3,3,3,3,3,3,3,0,3,2,0,3,2,2,3,2,2,0,3,0,0,0,0,0,0,2,0,0,0,0, + 0,0,2,0,0,0,0,0,0,0,0,0,1,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,3,2,3,2,1,2,2,3,1,2,2,0,0,3,2,0,0,0,0,0,1,0,0, + 3,3,3,3,3,2,3,3,3,2,2,3,3,3,2,2,2,3,2,3,2,2,0,0,0,2,2,2,0,0,2,2,0,0,0,0,0,1,0,0, + 3,3,2,3,3,2,3,0,3,3,3,2,2,2,0,0,2,2,2,2,0,0,0,2,0,2,3,2,2,3,0,0,0,0,0,2,2,0,0,2, + 3,3,0,2,3,0,0,0,2,2,0,0,1,0,0,2,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0, + 3,3,2,3,3,3,2,0,3,2,3,2,0,0,2,0,2,2,2,2,2,0,0,2,0,2,2,1,0,0,0,0,0,0,0,1,0,0,2,0, + 1,0,3,0,0,3,3,3,0,3,2,3,3,2,0,2,2,0,2,0,0,3,2,0,3,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0, + 0,0,3,0,0,2,0,0,0,2,2,2,0,2,3,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,2,3,3,2,2,2,3,2,3,3,3,2,0,2,2,2,2,3,3,0,0,2,0,0,2,2,2,2,0,2,0,2,0,2,2,0,0,0, + 3,3,2,3,3,3,3,3,3,2,2,2,2,2,2,0,0,1,2,3,2,0,0,0,0,2,2,2,0,0,0,0,0,2,0,2,0,0,0,0, + 0,2,3,0,2,3,3,2,0,3,3,2,2,3,0,3,3,2,2,0,0,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0, + 3,2,0,0,2,2,2,0,2,0,0,0,0,3,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0, + 3,3,2,2,3,2,0,2,2,0,2,0,0,0,0,0,0,0,0,2,0,0,0,1,0,0,0,0,0,2,0,0,0,2,0,0,0,0,1,0, + 1,0,0,0,0,2,2,2,0,3,2,0,2,0,2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0, + 2,0,2,0,2,2,0,2,2,0,2,2,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0, + 2,0,2,0,0,2,1,2,0,0,1,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, + 0,0,1,2,0,1,2,2,0,2,2,2,2,2,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,2,0,0,2,2,2,0,2,2,2,2,0,0,0,2,0,0,0,0,0,0,1,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, }; -const SequenceModel Iso_8859_10LithuanianModel = +const SequenceModel Iso_8859_4LithuanianModel = { - Iso_8859_10_CharToOrderMap, + Iso_8859_4_CharToOrderMap, LithuanianLangModel, - 38, - (float)0.9928710196247589, + 40, + (float)0.9919219576954762, PR_TRUE, - "ISO-8859-10", + "ISO-8859-4", "lt" }; -const SequenceModel Iso_8859_4LithuanianModel = +const SequenceModel Iso_8859_10LithuanianModel = { - Iso_8859_4_CharToOrderMap, + Iso_8859_10_CharToOrderMap, LithuanianLangModel, - 38, - (float)0.9928710196247589, + 40, + (float)0.9919219576954762, PR_TRUE, - "ISO-8859-4", + "ISO-8859-10", "lt" }; @@ -201,9 +219,19 @@ const SequenceModel Iso_8859_13LithuanianModel = { Iso_8859_13_CharToOrderMap, LithuanianLangModel, - 38, - (float)0.9928710196247589, + 40, + (float)0.9919219576954762, PR_TRUE, "ISO-8859-13", "lt" }; + +const LanguageModel LithuanianModel = +{ + "lt", + Unicode_CharOrder, + 80, + LithuanianLangModel, + 40, + (float)0.9919219576954762, +}; diff --git a/src/LangModels/LangMalteseModel.cpp b/src/LangModels/LangMalteseModel.cpp index e253539..e0bdf42 100644 --- a/src/LangModels/LangMalteseModel.cpp +++ b/src/LangModels/LangMalteseModel.cpp @@ -36,12 +36,13 @@ * ***** END LICENSE BLOCK ***** */ #include "../nsSBCharSetProber.h" +#include "../nsLanguageDetector.h" /********* Language model for: Maltese *********/ /** * Generated by BuildLangModel.py - * On: 2016-09-21 02:07:45.509404 + * On: 2021-03-16 19:33:28.446672 **/ /* Character Mapping Table: @@ -67,62 +68,75 @@ static const unsigned char Iso_8859_3_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 15, 28, 13, 4, 16, 19, 22, 1, 9, 12, 3, 10, 5, 8, /* 4X */ - 14, 27, 6, 11, 2, 7, 26, 18, 25, 30, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 15, 28, 13, 4, 16, 19, 22, 1, 9, 12, 3, 10, 5, 8, /* 6X */ - 14, 27, 6, 11, 2, 7, 26, 18, 25, 30, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 1, 15, 28, 13, 4, 16, 18, 22, 0, 9, 12, 3, 10, 5, 8, /* 4X */ + 14, 27, 6, 11, 2, 7, 25, 19, 26, 30, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 1, 15, 28, 13, 4, 16, 18, 22, 0, 9, 12, 3, 10, 5, 8, /* 6X */ + 14, 27, 6, 11, 2, 7, 25, 19, 26, 30, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM, 17,SYM,SYM,SYM,ILL, 48,SYM,SYM, 49, 50, 51, 52,SYM,ILL, 21, /* AX */ - SYM, 17,SYM,SYM,SYM,SYM, 53,SYM,SYM, 54, 55, 56, 57,SYM,ILL, 21, /* BX */ - 29, 36, 47,ILL, 58, 24, 59, 40, 33, 31, 60, 39, 45, 35, 61, 62, /* CX */ - ILL, 37, 32, 34, 44, 23, 38,SYM, 63, 43, 42, 64, 46, 65, 66, 41, /* DX */ - 29, 36, 47,ILL, 67, 24, 68, 40, 33, 31, 69, 39, 45, 35, 70, 71, /* EX */ - ILL, 37, 32, 34, 44, 23, 38,SYM, 72, 43, 42, 73, 46, 74, 75,SYM, /* FX */ + SYM, 17,SYM,SYM,SYM,ILL, 49,SYM,SYM, 50, 51, 52, 53,SYM,ILL, 21, /* AX */ + SYM, 17,SYM,SYM,SYM,SYM, 54,SYM,SYM, 55, 56, 57, 58,SYM,ILL, 21, /* BX */ + 29, 36, 48,ILL, 41, 24, 59, 40, 33, 31, 60, 39, 46, 35, 61, 62, /* CX */ + ILL, 38, 32, 34, 43, 23, 37,SYM, 63, 47, 44, 64, 45, 65, 66, 42, /* DX */ + 29, 36, 48,ILL, 41, 24, 67, 40, 33, 31, 68, 39, 46, 35, 69, 70, /* EX */ + ILL, 38, 32, 34, 43, 23, 37,SYM, 71, 47, 44, 72, 45, 73, 74,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ +static const int Unicode_Char_size = 62; +static const unsigned int Unicode_CharOrder[] = +{ + 65, 1, 66, 15, 67, 28, 68, 13, 69, 4, 70, 16, 71, 18, 72, 22, + 73, 0, 74, 9, 75, 12, 76, 3, 77, 10, 78, 5, 79, 8, 80, 14, + 81, 27, 82, 6, 83, 11, 84, 2, 85, 7, 86, 25, 87, 19, 88, 26, + 89, 30, 90, 20, 97, 1, 98, 15, 99, 28, 100, 13, 101, 4,102, 16, + 103, 18, 104, 22, 105, 0, 106, 9, 107, 12, 108, 3, 109, 10,110, 5, + 111, 8, 112, 14, 113, 27, 114, 6, 115, 11, 116, 2, 117, 7,118, 25, + 119, 19, 120, 26, 121, 30, 122, 20, 192, 29, 224, 29, 266, 24,267, 24, + 288, 23, 289, 23, 294, 17, 295, 17, 379, 21, 380, 21, +}; + /* Model Table: - * Total sequences: 870 - * First 512 sequences: 0.9959115850692665 - * Next 512 sequences (512-1024): 0.004088414930733575 - * Rest: -4.423544863740858e-17 + * Total sequences: 888 + * First 512 sequences: 0.9960434044151966 + * Next 512 sequences (512-1024): 0.0039565955848034195 + * Rest: 1.5612511283791264e-17 * Negative sequences: TODO */ static const PRUint8 MalteseLangModel[] = { + 3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,0,2, 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,0,2, 3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,3,3,3,3,3,2,0,3,0,0,3,3,3,2,3,3, - 3,3,3,3,3,2,2,3,3,3,3,3,3,3,2,3,3,2,3,3,2,0,3,3,0,3,3,3,2,0,2, + 3,3,3,3,3,2,2,3,3,3,3,3,3,3,2,3,3,3,3,3,2,0,3,3,0,3,3,3,2,0,2, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3, - 3,3,3,3,3,3,2,3,3,3,2,3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,3,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,3,2,3,3,3,3,3,2,3,0,3, - 3,3,3,3,3,3,3,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2, - 3,3,2,2,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,0,3,3,2,2,2,2,2,0,0,0, - 3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,0,2,3,2,2,3,2,2,2,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,0,0,3,2,0,0,3,3,3,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,3,3,2,3,0,0,0,2,0,3,2,0,0,0,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,2,3,3,2,2,0,3,0,0,2,2,0,2,2,2, - 3,3,2,3,3,2,3,3,3,3,2,3,2,2,3,0,0,0,2,3,0,0,3,0,2,0,2,0,2,0,0, - 3,3,3,3,3,3,3,3,3,3,0,3,3,3,0,3,2,3,3,3,0,3,2,0,0,2,0,3,3,0,2, - 3,3,3,3,3,3,3,3,3,3,0,3,2,2,0,2,3,0,0,2,0,0,2,0,0,0,0,2,2,0,2, - 3,3,3,3,3,2,3,3,3,3,3,3,2,3,0,3,2,3,2,0,0,2,3,2,0,2,0,3,0,0,0, - 3,3,3,3,3,3,3,2,3,2,2,3,3,3,2,2,2,2,3,2,0,2,2,3,2,3,2,2,0,0,2, - 3,3,2,3,3,3,3,3,3,2,2,2,2,3,2,2,0,3,3,3,2,3,3,0,0,0,3,0,2,2,3, - 3,3,2,2,3,2,2,3,2,3,2,0,0,0,2,0,0,0,2,2,3,0,0,0,0,0,2,2,0,0,0, - 3,3,2,3,3,2,0,3,3,3,3,0,0,3,0,2,2,0,2,3,0,3,0,0,0,0,3,0,0,0,0, - 3,3,3,2,3,2,3,3,3,0,3,2,2,2,2,2,0,0,2,0,2,0,2,0,0,0,0,2,0,0,2, - 3,3,2,2,3,3,3,3,3,3,2,0,0,3,0,2,0,2,2,3,2,2,0,3,0,0,2,0,0,2,0, + 3,3,3,3,3,3,2,3,3,3,2,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,3,3,3,3,3,2,3,0,3, + 3,2,3,3,3,3,3,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3, + 3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,0,3,3,2,2,2,2,3,2,0,0, + 3,3,2,3,3,3,3,3,3,2,3,3,3,3,3,3,2,3,3,3,0,2,3,2,2,2,3,2,2,0,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,0,3,2,0,3,0,3,3,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,3,3,2,0,3,0,0,2,0,2,0,2,0,0,0,2, + 3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,2,3,2,3,2,0,3,2,0,2,2,0,0,2,2, + 3,3,2,3,3,3,3,3,3,3,2,3,2,2,3,0,2,0,2,0,0,0,3,2,2,2,0,0,2,0,0, + 3,3,3,3,3,3,3,3,3,3,0,3,2,3,0,3,2,3,3,3,0,3,2,0,0,0,2,3,3,0,2, + 3,3,3,3,3,3,3,3,3,3,0,3,2,2,0,2,3,0,2,0,0,0,2,0,0,0,0,3,2,0,2, + 3,3,3,3,3,2,3,3,3,3,3,3,2,3,0,3,2,3,0,2,0,3,3,2,0,0,2,3,0,0,0, + 3,3,2,3,3,3,3,3,3,2,2,2,2,3,2,3,0,3,3,3,2,3,3,0,0,3,0,0,2,2,2, + 3,3,3,3,3,3,3,2,3,2,2,3,3,3,2,2,2,2,2,3,0,2,2,3,2,2,2,2,0,0,2, + 3,3,2,2,3,2,2,3,2,3,2,0,0,0,2,0,0,0,2,2,3,0,0,0,0,2,0,2,0,0,0, + 3,3,2,3,3,2,0,3,3,3,3,0,0,3,0,2,2,0,3,2,0,2,2,0,0,3,0,0,0,0,0, + 3,3,3,2,3,2,3,3,3,0,2,2,2,2,2,2,0,0,0,2,2,0,2,0,0,2,0,2,0,0,2, + 3,3,2,3,3,3,3,3,3,3,2,0,0,3,0,2,0,2,3,2,2,2,0,3,0,2,0,0,0,2,0, 3,3,2,2,3,0,2,2,0,3,0,0,2,0,2,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0, - 3,3,3,3,3,2,3,3,3,3,3,0,2,2,0,3,2,0,2,0,0,0,3,0,0,3,2,0,2,0,0, - 3,3,0,2,3,2,3,3,3,3,0,2,0,3,2,0,0,0,0,0,0,0,0,0,0,0,3,0,2,0,2, - 3,3,3,2,3,0,3,3,3,3,2,3,2,3,0,3,3,0,3,3,0,0,2,2,2,2,0,3,0,2,0, - 3,3,3,3,3,0,2,2,3,2,0,3,3,3,0,2,3,0,0,0,2,0,3,0,0,0,0,2,2,0,2, - 0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,2,0,0,0,0,0,0,0,2,0,2,0,2, + 3,3,0,2,3,2,3,3,3,3,0,2,0,3,2,0,0,0,0,0,0,0,0,0,0,3,0,0,2,0,2, + 3,3,3,3,3,2,3,3,3,3,3,0,2,2,0,2,2,0,2,2,0,0,3,0,0,2,3,0,2,0,0, + 3,3,3,2,3,0,3,3,3,3,2,3,2,3,0,3,3,0,3,3,0,0,2,2,2,0,2,3,0,0,0, + 3,3,3,3,3,0,2,3,3,2,0,3,3,3,2,2,2,0,0,0,2,0,3,0,0,0,0,2,2,0,2, + 2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,2,2,2,2,2,2,0,2,2,2,2,2,2,2,2,0,0,2,2,0,0,0,0,0,2,0,0,2,0,2, }; @@ -131,8 +145,18 @@ const SequenceModel Iso_8859_3MalteseModel = Iso_8859_3_CharToOrderMap, MalteseLangModel, 31, - (float)0.9959115850692665, + (float)0.9960434044151966, PR_TRUE, "ISO-8859-3", "mt" }; + +const LanguageModel MalteseModel = +{ + "mt", + Unicode_CharOrder, + 62, + MalteseLangModel, + 31, + (float)0.9960434044151966, +}; diff --git a/src/LangModels/LangPolishModel.cpp b/src/LangModels/LangPolishModel.cpp index 38791de..690738f 100644 --- a/src/LangModels/LangPolishModel.cpp +++ b/src/LangModels/LangPolishModel.cpp @@ -36,12 +36,13 @@ * ***** END LICENSE BLOCK ***** */ #include "../nsSBCharSetProber.h" +#include "../nsLanguageDetector.h" /********* Language model for: Polish *********/ /** * Generated by BuildLangModel.py - * On: 2016-09-21 17:21:04.405363 + * On: 2021-03-16 19:54:55.178474 **/ /* Character Mapping Table: @@ -61,190 +62,217 @@ * ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1 * even though they are both used for French. Same for the euro sign. */ -static const unsigned char Ibm852_CharToOrderMap[] = +static const unsigned char Iso_8859_2_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 20, 11, 14, 3, 26, 21, 22, 1, 18, 7, 15, 16, 5, 2, /* 4X */ - 13, 36, 4, 6, 10, 17, 31, 9, 33, 12, 8,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 20, 11, 14, 3, 26, 21, 22, 1, 18, 7, 15, 16, 5, 2, /* 6X */ - 13, 36, 4, 6, 10, 17, 31, 9, 33, 12, 8,SYM,SYM,SYM,SYM,CTR, /* 7X */ - 47, 39, 34, 54, 40, 78, 30, 47, 19, 58, 49, 49, 77, 32, 40, 30, /* 8X */ - 34, 79, 80, 55, 38, 74, 74, 28, 28, 38, 39, 76, 76, 19,SYM, 44, /* 9X */ - 35, 37, 24, 51, 25, 25, 45, 45, 23, 23,SYM, 32, 44, 56,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 35, 54, 46, 56,SYM,SYM,SYM,SYM, 27, 27,SYM, /* BX */ - SYM,SYM,SYM,SYM,SYM,SYM, 53, 53,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* CX */ - 70, 70, 69, 58, 69, 81, 37, 77, 46,SYM,SYM,SYM,SYM, 65, 82,SYM, /* DX */ - 24, 57, 55, 29, 29, 83, 41, 41, 84, 51, 85, 86, 60, 60, 65,SYM, /* EX */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 87, 50, 50,SYM,SYM, /* FX */ + SYM, 0, 21, 11, 15, 3, 26, 20, 22, 1, 18, 6, 14, 16, 5, 2, /* 4X */ + 13, 37, 4, 7, 10, 17, 30, 9, 33, 12, 8,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 21, 11, 15, 3, 26, 20, 22, 1, 18, 6, 14, 16, 5, 2, /* 6X */ + 13, 37, 4, 7, 10, 17, 30, 9, 33, 12, 8,SYM,SYM,SYM,SYM,CTR, /* 7X */ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ + SYM, 24,SYM, 19,SYM, 68, 29,SYM,SYM, 40, 67, 74, 32,SYM, 43, 28, /* AX */ + SYM, 24,SYM, 19,SYM, 68, 29,SYM,SYM, 40, 67, 74, 32,SYM, 43, 28, /* BX */ + 86, 35, 61, 53, 41, 87, 31, 45, 42, 34, 23, 52, 48, 36, 80, 77, /* CX */ + 58, 27, 82, 25, 59, 57, 38,SYM, 50, 75, 49, 79, 39, 51, 78, 54, /* DX */ + 88, 35, 61, 53, 41, 89, 31, 45, 42, 34, 23, 52, 48, 36, 80, 77, /* EX */ + 58, 27, 82, 25, 59, 57, 38,SYM, 50, 75, 49, 79, 39, 51, 78,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const unsigned char Iso_8859_16_CharToOrderMap[] = +static const unsigned char Iso_8859_13_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 20, 11, 14, 3, 26, 21, 22, 1, 18, 7, 15, 16, 5, 2, /* 4X */ - 13, 36, 4, 6, 10, 17, 31, 9, 33, 12, 8,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 20, 11, 14, 3, 26, 21, 22, 1, 18, 7, 15, 16, 5, 2, /* 6X */ - 13, 36, 4, 6, 10, 17, 31, 9, 33, 12, 8,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 0, 21, 11, 15, 3, 26, 20, 22, 1, 18, 6, 14, 16, 5, 2, /* 4X */ + 13, 37, 4, 7, 10, 17, 30, 9, 33, 12, 8,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 21, 11, 15, 3, 26, 20, 22, 1, 18, 6, 14, 16, 5, 2, /* 6X */ + 13, 37, 4, 7, 10, 17, 30, 9, 33, 12, 8,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM, 25, 25, 19,SYM,SYM, 41,SYM, 41,SYM, 62,SYM, 32,SYM, 32, 27, /* AX */ - SYM,SYM, 44, 19, 45,SYM,SYM,SYM, 45, 44, 62,SYM, 75, 75, 88, 27, /* BX */ - 61, 35, 54, 53, 40, 30, 89, 47, 43, 34, 64, 58, 90, 37, 77, 91, /* CX */ - 70, 29, 66, 24, 55, 49, 38, 28, 92, 68, 51, 93, 39, 23, 72, 57, /* DX */ - 61, 35, 54, 53, 40, 30, 94, 47, 43, 34, 64, 58, 95, 37, 77, 96, /* EX */ - 70, 29, 66, 24, 55, 49, 38, 28, 97, 68, 51, 98, 39, 23, 72, 99, /* FX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 47,SYM, 90,SYM,SYM,SYM,SYM, 76, /* AX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 47,SYM, 91,SYM,SYM,SYM,SYM, 76, /* BX */ + 24, 92, 69, 31, 41, 55, 23, 65, 42, 34, 32, 63, 93, 94, 72, 95, /* CX */ + 40, 27, 73, 25, 44, 66, 38,SYM, 96, 19, 29, 56, 39, 28, 43, 54, /* DX */ + 24, 97, 69, 31, 41, 55, 23, 65, 42, 34, 32, 63, 98, 99, 72,100, /* EX */ + 40, 27, 73, 25, 44, 66, 38,SYM,101, 19, 29, 56, 39, 28, 43,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const unsigned char Iso_8859_2_CharToOrderMap[] = +static const unsigned char Iso_8859_16_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 20, 11, 14, 3, 26, 21, 22, 1, 18, 7, 15, 16, 5, 2, /* 4X */ - 13, 36, 4, 6, 10, 17, 31, 9, 33, 12, 8,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 20, 11, 14, 3, 26, 21, 22, 1, 18, 7, 15, 16, 5, 2, /* 6X */ - 13, 36, 4, 6, 10, 17, 31, 9, 33, 12, 8,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 0, 21, 11, 15, 3, 26, 20, 22, 1, 18, 6, 14, 16, 5, 2, /* 4X */ + 13, 37, 4, 7, 10, 17, 30, 9, 33, 12, 8,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 21, 11, 15, 3, 26, 20, 22, 1, 18, 6, 14, 16, 5, 2, /* 6X */ + 13, 37, 4, 7, 10, 17, 30, 9, 33, 12, 8,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM, 25,SYM, 19,SYM, 74, 28,SYM,SYM, 41, 56, 76, 32,SYM, 45, 27, /* AX */ - SYM, 25,SYM, 19,SYM, 74, 28,SYM,SYM, 41, 56, 76, 32,SYM, 45, 27, /* BX */ - 100, 35, 54, 53, 40,101, 30, 47, 44, 34, 23, 58, 46, 37, 77, 69, /* CX */ - 70, 29,102, 24, 55, 49, 38,SYM, 50,103, 51,104, 39, 60, 65, 57, /* DX */ - 105, 35, 54, 53, 40,106, 30, 47, 44, 34, 23, 58, 46, 37, 77, 69, /* EX */ - 70, 29,107, 24, 55, 49, 38,SYM, 50,108, 51,109, 39, 60, 65,SYM, /* FX */ + SYM, 24, 24, 19,SYM,SYM, 40,SYM, 40,SYM, 62,SYM, 32,SYM, 32, 28, /* AX */ + SYM,SYM, 42, 19, 43,SYM,SYM,SYM, 43, 42, 62,SYM, 84, 84,102, 28, /* BX */ + 70, 35, 61, 53, 41, 31, 76, 45, 46, 34, 60, 52, 83, 36, 80, 71, /* CX */ + 58, 27, 64, 25, 59, 57, 38, 29, 79, 85, 49,103, 39, 23, 81, 54, /* DX */ + 70, 35, 61, 53, 41, 31, 76, 45, 46, 34, 60, 52, 83, 36, 80, 71, /* EX */ + 58, 27, 64, 25, 59, 57, 38, 29, 79, 85, 49,104, 39, 23, 81,105, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const unsigned char Mac_Centraleurope_CharToOrderMap[] = +static const unsigned char Windows_1250_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 20, 11, 14, 3, 26, 21, 22, 1, 18, 7, 15, 16, 5, 2, /* 4X */ - 13, 36, 4, 6, 10, 17, 31, 9, 33, 12, 8,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 20, 11, 14, 3, 26, 21, 22, 1, 18, 7, 15, 16, 5, 2, /* 6X */ - 13, 36, 4, 6, 10, 17, 31, 9, 33, 12, 8,SYM,SYM,SYM,SYM,CTR, /* 7X */ - 40, 63, 63, 34, 25, 38, 39, 35, 25, 44, 40, 44, 30, 30, 34, 32, /* 8X */ - 32, 69, 37, 69,110,111, 71, 24, 71, 55, 38, 67, 51, 46, 46, 39, /* 9X */ - SYM,SYM, 23,SYM,SYM,SYM,SYM, 57,SYM,SYM,SYM, 23,SYM,SYM,112,113, /* AX */ - 114, 73,SYM,SYM, 73,115,SYM,SYM, 19,116,117, 74, 74,118,119,120, /* BX */ - 121, 29,SYM,SYM, 29,122,SYM,SYM,SYM,SYM,SYM,123, 49, 67, 49, 42, /* CX */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 42,124,125, 50,SYM,SYM, 50,126, /* DX */ - 127, 41,SYM,SYM, 41, 28, 28, 35, 76, 76, 37, 45, 45, 59, 24, 55, /* EX */ - 59,128, 51,129,130,131,132,133, 60, 60,134, 27, 19, 27,135,SYM, /* FX */ + SYM, 0, 21, 11, 15, 3, 26, 20, 22, 1, 18, 6, 14, 16, 5, 2, /* 4X */ + 13, 37, 4, 7, 10, 17, 30, 9, 33, 12, 8,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 21, 11, 15, 3, 26, 20, 22, 1, 18, 6, 14, 16, 5, 2, /* 6X */ + 13, 37, 4, 7, 10, 17, 30, 9, 33, 12, 8,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM, 40,SYM, 29, 74, 43, 32, /* 8X */ + ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 40,SYM, 29, 74, 43, 32, /* 9X */ + SYM,SYM,SYM, 19,SYM, 24,SYM,SYM,SYM,SYM, 67,SYM,SYM,SYM,SYM, 28, /* AX */ + SYM,SYM,SYM, 19,SYM,SYM,SYM,SYM,SYM, 24, 67,SYM, 68,SYM, 68, 28, /* BX */ + 106, 35, 61, 53, 41,107, 31, 45, 42, 34, 23, 52, 48, 36, 80, 77, /* CX */ + 58, 27, 82, 25, 59, 57, 38,SYM, 50, 75, 49, 79, 39, 51, 78, 54, /* DX */ + 108, 35, 61, 53, 41,109, 31, 45, 42, 34, 23, 52, 48, 36, 80, 77, /* EX */ + 58, 27, 82, 25, 59, 57, 38,SYM, 50, 75, 49, 79, 39, 51, 78,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const unsigned char Iso_8859_13_CharToOrderMap[] = +static const unsigned char Ibm852_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 20, 11, 14, 3, 26, 21, 22, 1, 18, 7, 15, 16, 5, 2, /* 4X */ - 13, 36, 4, 6, 10, 17, 31, 9, 33, 12, 8,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 20, 11, 14, 3, 26, 21, 22, 1, 18, 7, 15, 16, 5, 2, /* 6X */ - 13, 36, 4, 6, 10, 17, 31, 9, 33, 12, 8,SYM,SYM,SYM,SYM,CTR, /* 7X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 48,SYM,136,SYM,SYM,SYM,SYM,137, /* AX */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 48,SYM,138,SYM,SYM,SYM,SYM,139, /* BX */ - 25,140, 63, 30, 40, 52, 23,141, 44, 34, 32, 71,142,143, 73,144, /* CX */ - 41, 29,145, 24, 42, 67, 38,SYM,146, 19, 28, 59, 39, 27, 45, 57, /* DX */ - 25,147, 63, 30, 40, 52, 23,148, 44, 34, 32, 71,149,150, 73,151, /* EX */ - 41, 29,152, 24, 42, 67, 38,SYM,153, 19, 28, 59, 39, 27, 45,SYM, /* FX */ + SYM, 0, 21, 11, 15, 3, 26, 20, 22, 1, 18, 6, 14, 16, 5, 2, /* 4X */ + 13, 37, 4, 7, 10, 17, 30, 9, 33, 12, 8,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 21, 11, 15, 3, 26, 20, 22, 1, 18, 6, 14, 16, 5, 2, /* 6X */ + 13, 37, 4, 7, 10, 17, 30, 9, 33, 12, 8,SYM,SYM,SYM,SYM,CTR, /* 7X */ + 45, 39, 34, 61, 41, 75, 31, 45, 19, 52, 57, 57, 80, 32, 41, 31, /* 8X */ + 34,110,111, 59, 38, 68, 68, 29, 29, 38, 39, 74, 74, 19,SYM, 42, /* 9X */ + 35, 36, 25, 49, 24, 24, 43, 43, 23, 23,SYM, 32, 42, 67,SYM,SYM, /* AX */ + SYM,SYM,SYM,SYM,SYM, 35, 61, 48, 67,SYM,SYM,SYM,SYM, 28, 28,SYM, /* BX */ + SYM,SYM,SYM,SYM,SYM,SYM, 53, 53,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* CX */ + 58, 58, 77, 52, 77, 82, 36, 80, 48,SYM,SYM,SYM,SYM, 78, 75,SYM, /* DX */ + 25, 54, 59, 27, 27, 82, 40, 40,112, 49,113, 79, 51, 51, 78,SYM, /* EX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 79, 50, 50,SYM,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const unsigned char Windows_1250_CharToOrderMap[] = +static const unsigned char Mac_Centraleurope_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 20, 11, 14, 3, 26, 21, 22, 1, 18, 7, 15, 16, 5, 2, /* 4X */ - 13, 36, 4, 6, 10, 17, 31, 9, 33, 12, 8,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 20, 11, 14, 3, 26, 21, 22, 1, 18, 7, 15, 16, 5, 2, /* 6X */ - 13, 36, 4, 6, 10, 17, 31, 9, 33, 12, 8,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM, 41,SYM, 28, 76, 45, 32, /* 8X */ - ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 41,SYM, 28, 76, 45, 32, /* 9X */ - SYM,SYM,SYM, 19,SYM, 25,SYM,SYM,SYM,SYM, 56,SYM,SYM,SYM,SYM, 27, /* AX */ - SYM,SYM,SYM, 19,SYM,SYM,SYM,SYM,SYM, 25, 56,SYM, 74,SYM, 74, 27, /* BX */ - 154, 35, 54, 53, 40,155, 30, 47, 44, 34, 23, 58, 46, 37, 77, 69, /* CX */ - 70, 29,156, 24, 55, 49, 38,SYM, 50,157, 51,158, 39, 60, 65, 57, /* DX */ - 159, 35, 54, 53, 40,160, 30, 47, 44, 34, 23, 58, 46, 37, 77, 69, /* EX */ - 70, 29,161, 24, 55, 49, 38,SYM, 50,162, 51,163, 39, 60, 65,SYM, /* FX */ + SYM, 0, 21, 11, 15, 3, 26, 20, 22, 1, 18, 6, 14, 16, 5, 2, /* 4X */ + 13, 37, 4, 7, 10, 17, 30, 9, 33, 12, 8,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 21, 11, 15, 3, 26, 20, 22, 1, 18, 6, 14, 16, 5, 2, /* 6X */ + 13, 37, 4, 7, 10, 17, 30, 9, 33, 12, 8,SYM,SYM,SYM,SYM,CTR, /* 7X */ + 41, 69, 69, 34, 24, 38, 39, 35, 24, 42, 41, 42, 31, 31, 34, 32, /* 8X */ + 32, 77, 36, 77, 65, 65, 63, 25, 63, 59, 38, 66, 49, 48, 48, 39, /* 9X */ + SYM,SYM, 23,SYM,SYM,SYM,SYM, 54,SYM,SYM,SYM, 23,SYM,SYM,114,115, /* AX */ + 116, 72,SYM,SYM, 72,117,SYM,SYM, 19,118,119, 68, 68,120,121, 73, /* BX */ + 73, 27,SYM,SYM, 27, 82,SYM,SYM,SYM,SYM,SYM, 82, 57, 66, 57, 44, /* CX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 44,122,123, 50,SYM,SYM, 50,124, /* DX */ + 125, 40,SYM,SYM, 40, 29, 29, 35, 74, 74, 36, 43, 43, 56, 25, 59, /* EX */ + 56, 75, 49, 75, 79, 79,126,127, 51, 51,128, 28, 19, 28,129,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ +static const int Unicode_Char_size = 76; +static const unsigned int Unicode_CharOrder[] = +{ + 65, 0, 66, 21, 67, 11, 68, 15, 69, 3, 70, 26, 71, 20, 72, 22, + 73, 1, 74, 18, 75, 6, 76, 14, 77, 16, 78, 5, 79, 2, 80, 13, + 81, 37, 82, 4, 83, 7, 84, 10, 85, 17, 86, 30, 87, 9, 88, 33, + 89, 12, 90, 8, 97, 0, 98, 21, 99, 11, 100, 15, 101, 3,102, 26, + 103, 20, 104, 22, 105, 1, 106, 18, 107, 6, 108, 14, 109, 16,110, 5, + 111, 2, 112, 13, 113, 37, 114, 4, 115, 7, 116, 10, 117, 17,118, 30, + 119, 9, 120, 33, 121, 12, 122, 8, 193, 35, 201, 34, 205, 36,211, 25, + 225, 35, 233, 34, 237, 36, 243, 25, 260, 24, 261, 24, 262, 31,263, 31, + 280, 23, 281, 23, 321, 19, 322, 19, 323, 27, 324, 27, 346, 29,347, 29, + 377, 32, 378, 32, 379, 28, 380, 28, +}; + /* Model Table: - * Total sequences: 1321 - * First 512 sequences: 0.9894531815946438 - * Next 512 sequences (512-1024): 0.010193795364991133 - * Rest: 0.0003530230403650733 + * Total sequences: 1547 + * First 512 sequences: 0.9881622113600178 + * Next 512 sequences (512-1024): 0.011288903649768277 + * Rest: 0.0005488849902139173 * Negative sequences: TODO */ static const PRUint8 PolishLangModel[] = { - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,0,3,3,3,3,3,3,3,2,0,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,2,2,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,0,0,0,3,3,3,3,2,3,2,2,0,0,1, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,2,2,3,3,3,3,2,3,2,2,0,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,2,3,3,3,2,2,2,0,2,2,0,1,2,2,2, - 3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,2,2,3,2,1,2,3,2,3,3,3,3,2,0,0,0,2,0,2,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,3,2,3,3,2,0,0,0,0,2,0,0,2,2,2, - 3,3,3,3,3,2,3,3,1,3,3,3,2,2,2,3,3,3,2,3,2,2,2,3,3,3,2,3,2,0,0,2,0,1,2,2,0, - 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,3,3,3,3,3,3,2,3,2,3,1,2,0,0,0,2,0,0,2,2,2, - 3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,3,2,3,0,3,2,2,2,3,3,3,1,0,2,0,0,0,0,0,1,0,0, - 3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,3,2,3,2,3,2,2,3,3,3,3,2,1,0,0,0,2,0,0,2,2,0, - 3,3,3,3,2,3,2,3,3,2,3,2,3,1,2,3,2,3,3,2,1,2,3,2,3,3,0,0,0,0,0,2,0,0,2,0,2, - 3,2,2,2,3,3,3,3,3,3,3,3,0,3,3,3,3,2,3,3,3,3,2,0,0,0,3,3,3,3,3,2,2,0,0,0,0, - 3,3,3,3,3,3,3,2,2,2,2,3,3,3,2,3,2,3,1,3,2,2,3,2,3,2,2,0,0,0,0,2,0,0,2,2,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,2,3,3,2,2,1,0,0,2,2,0,2,2,1, - 3,3,3,3,2,3,3,3,2,3,3,3,2,2,3,3,3,3,2,0,3,3,2,3,2,3,3,2,0,0,0,2,0,1,2,2,0, - 3,3,3,3,2,3,3,2,1,2,2,3,3,3,2,2,3,3,2,2,3,2,1,3,3,2,2,1,1,0,0,1,0,0,2,2,0, - 3,3,2,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,2,0,1,0,3,3,2,3,2,2,2,2,2,2,2, - 3,3,3,3,2,3,3,2,2,3,2,3,0,2,3,2,3,3,2,2,2,2,1,3,3,3,1,1,3,1,0,1,0,0,0,2,0, - 3,0,3,3,1,3,2,3,2,2,3,2,3,2,2,0,2,3,0,2,2,3,1,3,3,3,0,2,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,2,2,1,2,3,2,2,3,2,3,2,3,2,1,2,3,2,2,2,0,0,0,0,0,0,0,2,2,0, - 3,3,3,3,3,3,2,2,2,3,2,1,2,2,3,3,2,3,2,3,2,2,3,2,3,2,2,1,0,0,0,2,0,0,2,2,2, - 3,3,3,3,3,3,2,2,1,2,3,2,3,2,2,3,3,3,2,2,2,1,1,2,2,2,2,0,2,0,0,2,0,0,2,2,0, - 0,0,0,0,0,0,3,3,3,1,3,3,0,3,3,2,0,0,0,3,3,3,0,0,0,0,0,3,3,0,2,0,2,0,0,0,0, - 0,0,0,0,3,2,2,2,3,3,2,3,1,2,3,3,2,0,3,3,3,2,0,0,0,0,2,3,1,0,0,1,3,0,0,0,0, - 0,0,0,0,0,0,2,2,3,2,3,3,0,3,3,0,0,0,0,3,2,3,0,0,0,0,0,3,0,0,2,0,0,0,0,0,0, - 3,3,3,3,3,2,2,2,1,2,2,2,2,1,1,3,2,3,2,2,2,2,2,2,2,2,3,0,0,0,0,1,0,0,2,1,1, - 3,2,3,3,0,3,2,2,0,2,0,2,3,0,2,3,2,3,0,0,3,1,0,2,2,3,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,3,3,0,2,0,3,0,3,0,2,0,3,3,0,0,0,0,0,0,0,0,0,0,0,0,1,3,0,0,0,0,0,0, - 1,0,0,0,0,0,3,2,0,0,0,3,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,0,0,1,0,0,0,2,0,2,0,0,0,0,2,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,2,2,2,2,1,0,0,0,2,2,1,2,0,2,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,2,2,0, - 0,0,0,0,2,3,2,0,0,2,0,2,0,0,3,2,2,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0, - 2,3,2,2,0,1,0,1,0,1,1,0,1,2,1,2,1,2,1,0,2,0,2,0,0,0,2,0,0,0,0,2,0,2,0,0,0, - 2,1,2,2,2,2,2,0,1,0,2,2,1,1,2,2,2,1,1,0,1,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0, - 0,1,0,1,2,2,2,2,2,0,2,2,0,2,1,2,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0, - 1,2,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,0,3,3,3,3,3,3,3,2,0,0,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,1,2,2,2,0,2, + 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,0,0,0,3,3,3,3,3,2,2,2,1,0,0,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,2,2,3,3,3,3,3,2,2,2,0,1,0,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,2,3,3,3,2,0,2,2,2,2,0,1,2,2,2,2, + 3,3,3,3,3,3,3,3,3,2,3,3,3,2,2,3,2,3,2,1,3,2,2,3,3,3,3,0,3,1,2,0,1,2,2,2,2,2, + 3,3,3,3,3,2,3,3,1,3,3,3,2,2,3,2,3,3,2,3,2,2,2,3,3,3,2,0,3,1,1,0,0,1,2,2,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,3,2,3,3,2,0,0,0,2,0,0,0,2,2,2,2, + 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,0,1,0,2,0,0,0,2,2,1,2, + 3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,2,2,3,2,3,2,2,2,3,3,3,1,0,0,2,0,0,0,0,1,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,2,3,2,3,2,2,3,3,3,3,2,0,1,1,2,0,0,1,2,2,2,0, + 3,3,3,3,2,3,3,2,3,2,3,2,3,1,3,2,2,3,3,2,2,2,3,2,3,3,1,0,0,0,1,0,0,0,2,2,2,2, + 3,2,2,2,3,3,3,3,3,3,3,3,0,3,3,3,3,2,3,3,3,3,2,0,0,0,3,3,3,3,2,3,2,0,1,1,0,0, + 3,3,3,3,3,3,2,3,2,2,2,3,3,3,3,2,2,3,1,3,2,2,3,2,2,3,2,0,0,0,1,0,0,0,2,2,1,0, + 3,3,3,3,2,3,3,3,2,3,3,3,2,2,3,3,3,3,2,0,3,3,2,3,3,2,3,1,2,1,2,0,0,1,2,2,2,0, + 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,2,3,2,3,2,2,3,3,2,0,3,1,2,0,2,0,2,2,2,2, + 3,3,3,3,2,3,3,3,1,2,2,3,3,3,2,2,3,3,2,2,2,3,2,3,2,3,2,0,2,1,1,0,0,0,2,2,2,0, + 3,3,2,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,2,0,0,0,3,3,3,2,2,1,2,2,2,2,2,2, + 3,3,3,3,2,3,2,3,2,3,2,3,0,2,2,3,3,3,2,2,2,2,1,3,3,3,1,1,1,3,1,1,0,0,1,2,1,0, + 3,1,3,3,2,3,3,2,2,2,3,2,3,2,1,2,2,3,0,2,3,2,1,3,3,3,1,0,2,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,2,2,2,3,2,1,2,2,3,3,2,3,2,3,2,2,3,2,2,3,2,0,1,0,2,0,0,1,2,2,1,1, + 3,3,3,3,3,3,2,3,2,2,1,2,3,2,3,2,2,3,2,3,1,2,2,3,2,2,2,0,0,0,0,0,0,0,2,2,1,0, + 3,3,3,3,3,3,2,2,2,2,2,2,3,2,3,2,3,3,2,2,1,2,2,2,2,2,2,0,1,1,2,0,0,0,2,2,0,1, + 0,0,0,0,0,0,3,3,3,1,3,3,0,3,2,3,0,0,0,3,3,3,0,0,0,0,0,0,3,3,0,2,2,0,0,0,0,0, + 0,0,0,0,0,0,2,2,3,2,3,3,0,3,0,3,0,0,0,3,3,2,0,0,0,0,0,0,3,0,0,2,0,0,0,0,0,0, + 1,0,0,0,3,2,2,2,3,3,2,3,1,2,3,3,2,0,3,3,2,3,1,0,0,0,1,0,3,1,1,0,3,0,0,0,1,0, + 3,3,3,3,3,2,2,2,1,2,2,1,2,1,3,2,2,3,2,2,2,2,1,2,2,1,3,0,0,0,2,0,0,0,2,1,0,1, + 0,0,0,0,0,0,2,3,0,0,1,3,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,2,3,3,0,3,2,2,0,2,0,2,3,0,3,2,2,3,0,1,1,3,1,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,3,3,2,1,0,3,0,3,0,2,3,0,3,0,0,0,0,2,0,0,0,0,0,1,0,0,0,3,0,0,0,0,0,0, + 3,3,3,3,2,2,2,2,1,0,1,0,2,1,2,0,0,2,2,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,2,2,2,0, + 1,0,1,1,0,0,2,0,0,2,0,0,0,0,0,2,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,2,3,0,2,0,2,0,2,1,0,2,3,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0, + 2,3,2,2,0,0,1,2,0,1,2,0,1,2,1,0,1,2,0,0,0,1,2,0,0,0,2,0,0,0,2,0,0,2,1,0,0,0, + 2,1,2,2,2,2,1,2,1,0,2,2,0,1,2,2,2,1,1,0,2,2,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0, + 0,1,1,1,2,2,2,2,2,0,2,2,0,1,2,2,1,1,1,0,1,2,1,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0, + 2,1,2,1,2,2,2,2,1,0,1,2,0,0,2,1,1,0,0,0,2,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, + 1,2,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,3,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0, }; -const SequenceModel Ibm852PolishModel = +const SequenceModel Iso_8859_2PolishModel = { - Ibm852_CharToOrderMap, + Iso_8859_2_CharToOrderMap, PolishLangModel, - 37, - (float)0.9894531815946438, + 38, + (float)0.9881622113600178, PR_TRUE, - "IBM852", + "ISO-8859-2", + "pl" +}; + +const SequenceModel Iso_8859_13PolishModel = +{ + Iso_8859_13_CharToOrderMap, + PolishLangModel, + 38, + (float)0.9881622113600178, + PR_TRUE, + "ISO-8859-13", "pl" }; @@ -252,53 +280,52 @@ const SequenceModel Iso_8859_16PolishModel = { Iso_8859_16_CharToOrderMap, PolishLangModel, - 37, - (float)0.9894531815946438, + 38, + (float)0.9881622113600178, PR_TRUE, "ISO-8859-16", "pl" }; -const SequenceModel Iso_8859_2PolishModel = +const SequenceModel Windows_1250PolishModel = { - Iso_8859_2_CharToOrderMap, + Windows_1250_CharToOrderMap, PolishLangModel, - 37, - (float)0.9894531815946438, + 38, + (float)0.9881622113600178, PR_TRUE, - "ISO-8859-2", + "WINDOWS-1250", "pl" }; -const SequenceModel Mac_CentraleuropePolishModel = +const SequenceModel Ibm852PolishModel = { - Mac_Centraleurope_CharToOrderMap, + Ibm852_CharToOrderMap, PolishLangModel, - 37, - (float)0.9894531815946438, + 38, + (float)0.9881622113600178, PR_TRUE, - "MAC-CENTRALEUROPE", + "IBM852", "pl" }; -const SequenceModel Iso_8859_13PolishModel = +const SequenceModel Mac_CentraleuropePolishModel = { - Iso_8859_13_CharToOrderMap, + Mac_Centraleurope_CharToOrderMap, PolishLangModel, - 37, - (float)0.9894531815946438, + 38, + (float)0.9881622113600178, PR_TRUE, - "ISO-8859-13", + "MAC-CENTRALEUROPE", "pl" }; -const SequenceModel Windows_1250PolishModel = +const LanguageModel PolishModel = { - Windows_1250_CharToOrderMap, + "pl", + Unicode_CharOrder, + 76, PolishLangModel, - 37, - (float)0.9894531815946438, - PR_TRUE, - "WINDOWS-1250", - "pl" + 38, + (float)0.9881622113600178, }; diff --git a/src/LangModels/LangPortugueseModel.cpp b/src/LangModels/LangPortugueseModel.cpp index 0b2dd1b..72eae0b 100644 --- a/src/LangModels/LangPortugueseModel.cpp +++ b/src/LangModels/LangPortugueseModel.cpp @@ -36,12 +36,13 @@ * ***** END LICENSE BLOCK ***** */ #include "../nsSBCharSetProber.h" +#include "../nsLanguageDetector.h" /********* Language model for: Portuguese *********/ /** * Generated by BuildLangModel.py - * On: 2016-09-20 23:47:27.348423 + * On: 2021-03-16 19:59:19.803482 **/ /* Character Mapping Table: @@ -61,181 +62,206 @@ * ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1 * even though they are both used for French. Same for the euro sign. */ -static const unsigned char Iso_8859_1_CharToOrderMap[] = +static const unsigned char Iso_8859_15_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 17, 10, 7, 1, 16, 14, 18, 4, 28, 34, 12, 9, 6, 2, /* 4X */ - 13, 21, 5, 3, 8, 11, 15, 32, 24, 31, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 17, 10, 7, 1, 16, 14, 18, 4, 28, 34, 12, 9, 6, 2, /* 6X */ - 13, 21, 5, 3, 8, 11, 15, 32, 24, 31, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 0, 17, 10, 7, 1, 16, 14, 18, 4, 29, 35, 12, 9, 6, 2, /* 4X */ + 13, 22, 5, 3, 8, 11, 15, 34, 24, 31, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 17, 10, 7, 1, 16, 14, 18, 4, 29, 35, 12, 9, 6, 2, /* 6X */ + 13, 22, 5, 3, 8, 11, 15, 34, 24, 31, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 51,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 36, 25, 35, 20, 41, 42, 43, 22, 39, 19, 29, 44, 52, 23, 45, 47, /* CX */ - 48, 53, 46, 27, 37, 30, 38,SYM, 54, 55, 33, 56, 40, 57, 58, 49, /* DX */ - 36, 25, 35, 20, 41, 42, 43, 22, 39, 19, 29, 44, 59, 23, 45, 47, /* EX */ - 48, 60, 46, 27, 37, 30, 38,SYM, 61, 62, 33, 63, 40, 64, 65, 50, /* FX */ + SYM,SYM,SYM,SYM,SYM,SYM, 51,SYM, 52,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ + SYM,SYM,SYM,SYM, 53, 54,SYM,SYM, 55,SYM,SYM,SYM, 56, 57, 50,SYM, /* BX */ + 36, 25, 33, 19, 41, 42, 43, 21, 38, 20, 28, 44, 58, 23, 45, 46, /* CX */ + 47, 59, 49, 27, 37, 30, 40,SYM, 60, 61, 32, 62, 39, 63, 64, 48, /* DX */ + 36, 25, 33, 19, 41, 42, 43, 21, 38, 20, 28, 44, 65, 23, 45, 46, /* EX */ + 47, 66, 49, 27, 37, 30, 40,SYM, 67, 68, 32, 69, 39, 70, 71, 50, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const unsigned char Iso_8859_9_CharToOrderMap[] = +static const unsigned char Iso_8859_1_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 17, 10, 7, 1, 16, 14, 18, 4, 28, 34, 12, 9, 6, 2, /* 4X */ - 13, 21, 5, 3, 8, 11, 15, 32, 24, 31, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 17, 10, 7, 1, 16, 14, 18, 4, 28, 34, 12, 9, 6, 2, /* 6X */ - 13, 21, 5, 3, 8, 11, 15, 32, 24, 31, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 0, 17, 10, 7, 1, 16, 14, 18, 4, 29, 35, 12, 9, 6, 2, /* 4X */ + 13, 22, 5, 3, 8, 11, 15, 34, 24, 31, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 17, 10, 7, 1, 16, 14, 18, 4, 29, 35, 12, 9, 6, 2, /* 6X */ + 13, 22, 5, 3, 8, 11, 15, 34, 24, 31, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 66,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 36, 25, 35, 20, 41, 42, 43, 22, 39, 19, 29, 44, 67, 23, 45, 47, /* CX */ - 68, 69, 46, 27, 37, 30, 38,SYM, 70, 71, 33, 72, 40, 73, 74, 49, /* DX */ - 36, 25, 35, 20, 41, 42, 43, 22, 39, 19, 29, 44, 75, 23, 45, 47, /* EX */ - 76, 77, 46, 27, 37, 30, 38,SYM, 78, 79, 33, 80, 40, 81, 82, 50, /* FX */ + SYM,SYM,SYM,SYM,SYM, 72,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 36, 25, 33, 19, 41, 42, 43, 21, 38, 20, 28, 44, 73, 23, 45, 46, /* CX */ + 47, 74, 49, 27, 37, 30, 40,SYM, 75, 76, 32, 77, 39, 78, 79, 48, /* DX */ + 36, 25, 33, 19, 41, 42, 43, 21, 38, 20, 28, 44, 80, 23, 45, 46, /* EX */ + 47, 81, 49, 27, 37, 30, 40,SYM, 82, 83, 32, 84, 39, 85, 86, 50, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const unsigned char Iso_8859_15_CharToOrderMap[] = +static const unsigned char Windows_1252_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 17, 10, 7, 1, 16, 14, 18, 4, 28, 34, 12, 9, 6, 2, /* 4X */ - 13, 21, 5, 3, 8, 11, 15, 32, 24, 31, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 17, 10, 7, 1, 16, 14, 18, 4, 28, 34, 12, 9, 6, 2, /* 6X */ - 13, 21, 5, 3, 8, 11, 15, 32, 24, 31, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM, 83,SYM, 84,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM, 85, 86,SYM,SYM, 87,SYM,SYM,SYM, 88, 89, 50,SYM, /* BX */ - 36, 25, 35, 20, 41, 42, 43, 22, 39, 19, 29, 44, 90, 23, 45, 47, /* CX */ - 48, 91, 46, 27, 37, 30, 38,SYM, 92, 93, 33, 94, 40, 95, 96, 49, /* DX */ - 36, 25, 35, 20, 41, 42, 43, 22, 39, 19, 29, 44, 97, 23, 45, 47, /* EX */ - 48, 98, 46, 27, 37, 30, 38,SYM, 99,100, 33,101, 40,102,103, 50, /* FX */ + SYM, 0, 17, 10, 7, 1, 16, 14, 18, 4, 29, 35, 12, 9, 6, 2, /* 4X */ + 13, 22, 5, 3, 8, 11, 15, 34, 24, 31, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 17, 10, 7, 1, 16, 14, 18, 4, 29, 35, 12, 9, 6, 2, /* 6X */ + 13, 22, 5, 3, 8, 11, 15, 34, 24, 31, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM,ILL,SYM, 87,SYM,SYM,SYM,SYM,SYM,SYM, 88,SYM, 89,ILL, 90,ILL, /* 8X */ + ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 91,SYM, 92,ILL, 93, 50, /* 9X */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ + SYM,SYM,SYM,SYM,SYM, 94,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 36, 25, 33, 19, 41, 42, 43, 21, 38, 20, 28, 44, 95, 23, 45, 46, /* CX */ + 47, 96, 49, 27, 37, 30, 40,SYM, 97, 98, 32, 99, 39,100,101, 48, /* DX */ + 36, 25, 33, 19, 41, 42, 43, 21, 38, 20, 28, 44,102, 23, 45, 46, /* EX */ + 47,103, 49, 27, 37, 30, 40,SYM,104,105, 32,106, 39,107,108, 50, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const unsigned char Windows_1252_CharToOrderMap[] = +static const unsigned char Iso_8859_9_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 17, 10, 7, 1, 16, 14, 18, 4, 28, 34, 12, 9, 6, 2, /* 4X */ - 13, 21, 5, 3, 8, 11, 15, 32, 24, 31, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 17, 10, 7, 1, 16, 14, 18, 4, 28, 34, 12, 9, 6, 2, /* 6X */ - 13, 21, 5, 3, 8, 11, 15, 32, 24, 31, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM,ILL,SYM,104,SYM,SYM,SYM,SYM,SYM,SYM,105,SYM,106,ILL,107,ILL, /* 8X */ - ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,108,SYM,109,ILL,110, 50, /* 9X */ + SYM, 0, 17, 10, 7, 1, 16, 14, 18, 4, 29, 35, 12, 9, 6, 2, /* 4X */ + 13, 22, 5, 3, 8, 11, 15, 34, 24, 31, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 17, 10, 7, 1, 16, 14, 18, 4, 29, 35, 12, 9, 6, 2, /* 6X */ + 13, 22, 5, 3, 8, 11, 15, 34, 24, 31, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM,111,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 36, 25, 35, 20, 41, 42, 43, 22, 39, 19, 29, 44,112, 23, 45, 47, /* CX */ - 48,113, 46, 27, 37, 30, 38,SYM,114,115, 33,116, 40,117,118, 49, /* DX */ - 36, 25, 35, 20, 41, 42, 43, 22, 39, 19, 29, 44,119, 23, 45, 47, /* EX */ - 48,120, 46, 27, 37, 30, 38,SYM,121,122, 33,123, 40,124,125, 50, /* FX */ + SYM,SYM,SYM,SYM,SYM,109,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 36, 25, 33, 19, 41, 42, 43, 21, 38, 20, 28, 44,110, 23, 45, 46, /* CX */ + 111,112, 49, 27, 37, 30, 40,SYM,113,114, 32,115, 39,116,117, 48, /* DX */ + 36, 25, 33, 19, 41, 42, 43, 21, 38, 20, 28, 44,118, 23, 45, 46, /* EX */ + 119,120, 49, 27, 37, 30, 40,SYM,121,122, 32,123, 39,124,125, 50, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ +static const int Unicode_Char_size = 76; +static const unsigned int Unicode_CharOrder[] = +{ + 65, 0, 66, 17, 67, 10, 68, 7, 69, 1, 70, 16, 71, 14, 72, 18, + 73, 4, 74, 29, 75, 35, 76, 12, 77, 9, 78, 6, 79, 2, 80, 13, + 81, 22, 82, 5, 83, 3, 84, 8, 85, 11, 86, 15, 87, 34, 88, 24, + 89, 31, 90, 26, 97, 0, 98, 17, 99, 10, 100, 7, 101, 1,102, 16, + 103, 14, 104, 18, 105, 4, 106, 29, 107, 35, 108, 12, 109, 9,110, 6, + 111, 2, 112, 13, 113, 22, 114, 5, 115, 3, 116, 8, 117, 11,118, 15, + 119, 34, 120, 24, 121, 31, 122, 26, 192, 36, 193, 25, 194, 33,195, 19, + 199, 21, 201, 20, 202, 28, 205, 23, 211, 27, 212, 37, 213, 30,218, 32, + 224, 36, 225, 25, 226, 33, 227, 19, 231, 21, 233, 20, 234, 28,237, 23, + 243, 27, 244, 37, 245, 30, 250, 32, +}; + /* Model Table: - * Total sequences: 891 - * First 512 sequences: 0.9953179582313172 - * Next 512 sequences (512-1024): 0.0046820417686827855 - * Rest: 2.42861286636753e-17 + * Total sequences: 929 + * First 512 sequences: 0.9952990712503466 + * Next 512 sequences (512-1024): 0.004700928749653451 + * Rest: -7.806255641895632e-18 * Negative sequences: TODO */ static const PRUint8 PortugueseLangModel[] = { - 2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,3,3,3,3,0,3,2,3,0,0,3,2,2,3,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,2,3,3,2,3,2,3,2,3,0,2,3,3,2,2,2,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,2,3,2,3,2,3,0,2,3,3,0,3,0,0,0, - 3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,3,0,3,0,3,2,3,0,2,3,3,2,2,3,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,0,3,3,3,3,2,3,3,2,2,2,3,2,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,3,2,3,3,3,2,2,3,3,0,3, - 3,3,3,3,3,2,3,3,3,2,3,3,2,2,3,3,3,2,3,3,3,3,3,3,2,3,3,3,3,2,0,3,2,3,3,2,0,3, - 3,3,3,3,3,3,2,3,2,3,2,3,3,2,3,2,2,2,2,3,3,2,0,3,0,3,0,3,2,3,2,3,3,3,0,2,0,2, - 3,3,3,3,3,3,3,0,3,3,3,3,3,2,2,2,2,2,3,3,3,0,0,3,0,3,2,3,0,3,2,3,2,2,2,3,0,3, - 3,3,3,3,3,2,3,2,2,3,2,3,2,3,2,0,2,3,0,3,3,2,0,3,0,3,2,3,0,2,2,3,2,3,0,3,0,3, - 3,3,3,2,3,3,3,2,3,3,3,3,3,2,2,0,2,2,3,3,2,2,3,3,0,3,2,3,0,3,2,3,0,2,3,3,0,2, - 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,3,2,2,3,3,3,2,3,0,3,3,0,2,2,0,2,0,0,0, - 3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,0,3,2,3,0,3,0,3,2,2,2,3,0,3, - 3,3,3,3,3,3,2,2,3,0,2,3,3,3,0,0,0,2,3,3,2,2,3,3,0,3,2,3,0,2,2,2,0,3,0,2,0,2, - 3,3,3,3,3,3,3,2,2,3,2,3,3,2,2,2,0,2,3,3,2,0,0,2,0,3,0,2,0,3,2,3,2,2,0,2,0,0, - 3,3,3,0,3,3,0,2,0,0,0,3,0,0,0,2,0,0,0,3,2,0,0,3,0,3,0,2,0,3,2,0,0,0,0,2,0,2, - 3,3,3,2,3,3,0,2,2,2,2,3,3,2,2,0,3,2,0,3,0,0,0,3,0,2,0,3,0,3,0,2,0,2,0,0,0,2, - 3,3,3,3,3,3,3,3,3,2,2,3,3,2,2,2,3,2,2,3,2,0,0,2,0,2,2,2,3,2,0,2,2,2,0,0,0,0, - 3,3,3,3,3,3,3,2,3,2,0,3,3,0,0,0,2,2,2,2,3,0,0,2,0,3,0,2,0,0,3,3,2,0,2,0,0,0, - 2,2,2,3,2,3,3,3,3,3,3,2,3,3,2,2,2,0,0,0,0,0,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0, + 2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,3,3,3,3,0,3,2,0,3,0,3,2,0,2,3,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,3,3,2,3,2,3,2,0,3,2,3,2,2,3,2,0,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,2,3,2,3,2,0,3,2,3,0,0,3,3,0,0, + 3,3,3,3,3,2,2,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,3,3,0,3,2,3,3,0,3,3,2,2,2,3,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,2,3,3,3,3,3,2,3,2,2,2,2,3,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,2,3,3,2,3,3,3,0,2, + 3,3,3,3,3,3,3,3,3,2,3,3,2,2,3,3,3,2,3,3,3,3,3,3,2,3,3,3,2,3,0,3,3,2,2,3,0,3, + 3,3,3,3,3,3,2,2,2,3,2,3,2,2,3,2,2,2,2,3,3,0,2,3,0,3,2,2,3,3,2,3,3,2,3,0,0,2, + 3,3,3,3,3,3,3,2,3,3,3,3,3,2,2,2,2,2,3,3,3,0,0,3,0,3,2,3,3,2,2,3,2,3,2,2,0,3, + 3,3,3,3,3,2,3,2,2,3,2,3,2,3,2,0,2,3,0,3,3,0,2,3,2,3,2,3,2,0,2,3,3,3,2,0,0,3, + 3,3,3,2,3,3,3,2,3,3,3,3,3,2,2,0,2,2,3,3,3,3,3,3,0,3,2,3,3,0,2,3,2,3,0,3,0,2, + 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,2,3,3,2,3,3,3,3,2,3,3,0,2,0,0,2,2,0,0, + 3,3,3,3,3,2,2,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,0,3,2,3,3,0,0,3,2,3,2,2,0,3, + 3,3,3,3,3,3,2,2,3,2,2,3,3,3,0,0,2,2,3,2,3,3,2,3,0,3,2,3,2,0,2,2,3,2,0,0,0,2, + 3,3,3,2,3,3,3,2,2,3,2,3,3,2,2,2,2,2,3,2,3,0,0,2,0,3,0,2,3,0,2,3,2,2,2,0,0,0, + 3,3,3,0,3,3,0,2,0,0,0,3,2,0,0,2,0,0,0,2,3,0,0,3,0,3,0,0,2,0,2,0,0,2,0,0,0,2, + 3,3,3,2,3,3,0,2,2,2,2,3,3,2,2,0,3,2,0,0,3,0,0,3,0,2,0,3,3,0,0,2,2,2,0,0,0,2, + 3,3,3,3,3,3,3,3,3,2,2,3,3,2,2,2,3,2,2,2,3,0,0,2,0,2,2,2,2,3,0,2,2,0,2,0,0,0, + 3,3,3,3,3,3,3,2,3,2,0,3,2,0,0,0,2,2,2,3,2,0,0,2,0,3,0,2,0,0,3,3,2,2,2,2,0,0, 0,2,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 2,2,2,3,2,3,3,3,3,3,3,3,3,3,3,2,2,2,2,0,0,0,2,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0, + 3,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,3,0,2,0,0,0,0,0, 2,0,0,2,0,0,0,0,0,0,0,3,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0, 3,0,3,3,0,3,3,3,3,3,3,0,3,3,3,3,3,3,0,0,0,2,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,2,3,0,0,0,3,0,3,3,2,3,0,3,2,0,2,2,2,0,0,2,3,2,0,2,2,0,2,0,0,0,0,0,0,2, + 3,3,3,2,3,0,0,2,3,0,3,3,2,3,0,3,2,0,2,2,2,0,0,2,3,2,0,2,0,2,2,0,0,0,0,0,0,2, 0,0,0,3,0,3,2,2,3,0,3,2,3,3,3,3,3,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,0,3,2,2,0,0,2,2,3,0,0,0,0,0,2,2,2,2,0,0,0,0,2,2,2,0,2,2,0,2,0,0,2,0,0, - 0,0,0,3,2,3,3,3,3,3,3,0,3,3,3,2,3,2,0,0,0,2,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,2,2,0,2,0,0,0,2,3,0,2,0,0,0,0,0,0,2,0,0,0,0,3,0,0,0,0,0,0,0,2,2,0,0,0, + 3,3,3,0,3,2,2,0,0,2,2,3,2,2,2,0,0,2,2,2,2,0,0,0,0,2,2,2,2,0,2,2,0,2,2,2,0,0, + 0,0,0,3,3,3,3,3,3,3,3,0,3,3,3,2,2,2,0,0,0,0,2,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,3,0,0,3,0,2,3,0,2,2,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,2,2,0,2,0,0,0,2,3,0,2,0,0,0,0,0,2,0,0,0,0,0,3,0,0,0,0,0,0,2,0,0,2,0,2, 0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,2,3,2,2,3,2,3,2,3,2,2,0,2,2,2,0,0,0,0,0,3,0,2,0,2,0,0,0,2,0,2,0,0,0, - 3,3,3,2,3,2,2,2,3,2,2,2,2,0,0,2,0,2,3,0,0,0,0,0,0,0,2,0,0,0,0,2,2,0,2,0,0,0, - 0,0,0,3,0,2,3,3,2,3,2,0,3,2,0,2,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,2,3,2,2,0,0,3,2,2,2,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0,2,0,0,0, - 0,0,0,0,0,0,3,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,2,2,0,0,3,2,2,3,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,2,3,3,2,3,3,2,3,2,3,2,3,2,2,0,2,2,2,0,0,0,0,0,2,0,2,0,0,2,0,0,0,0,2,2,0,0, + 0,0,0,3,0,2,3,3,2,3,3,0,3,2,0,2,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,3,0,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,2,3,2,2,2,3,2,2,2,2,0,2,2,2,2,2,0,0,0,0,0,0,0,2,0,0,0,0,2,0,0,2,2,0,0, + 3,3,3,2,3,2,2,0,0,3,2,2,2,0,2,0,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,2,0,0,2,2,0,0, + 0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,2,0,2,3,2,2,3,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, }; +const SequenceModel Iso_8859_15PortugueseModel = +{ + Iso_8859_15_CharToOrderMap, + PortugueseLangModel, + 38, + (float)0.9952990712503466, + PR_TRUE, + "ISO-8859-15", + "pt" +}; + const SequenceModel Iso_8859_1PortugueseModel = { Iso_8859_1_CharToOrderMap, PortugueseLangModel, 38, - (float)0.9953179582313172, + (float)0.9952990712503466, PR_TRUE, "ISO-8859-1", "pt" }; -const SequenceModel Iso_8859_9PortugueseModel = +const SequenceModel Windows_1252PortugueseModel = { - Iso_8859_9_CharToOrderMap, + Windows_1252_CharToOrderMap, PortugueseLangModel, 38, - (float)0.9953179582313172, + (float)0.9952990712503466, PR_TRUE, - "ISO-8859-9", + "WINDOWS-1252", "pt" }; -const SequenceModel Iso_8859_15PortugueseModel = +const SequenceModel Iso_8859_9PortugueseModel = { - Iso_8859_15_CharToOrderMap, + Iso_8859_9_CharToOrderMap, PortugueseLangModel, 38, - (float)0.9953179582313172, + (float)0.9952990712503466, PR_TRUE, - "ISO-8859-15", + "ISO-8859-9", "pt" }; -const SequenceModel Windows_1252PortugueseModel = +const LanguageModel PortugueseModel = { - Windows_1252_CharToOrderMap, + "pt", + Unicode_CharOrder, + 76, PortugueseLangModel, 38, - (float)0.9953179582313172, - PR_TRUE, - "WINDOWS-1252", - "pt" + (float)0.9952990712503466, }; diff --git a/src/LangModels/LangRomanianModel.cpp b/src/LangModels/LangRomanianModel.cpp index cfb1b8d..430f51d 100644 --- a/src/LangModels/LangRomanianModel.cpp +++ b/src/LangModels/LangRomanianModel.cpp @@ -36,12 +36,13 @@ * ***** END LICENSE BLOCK ***** */ #include "../nsSBCharSetProber.h" +#include "../nsLanguageDetector.h" /********* Language model for: Romanian *********/ /** * Generated by BuildLangModel.py - * On: 2016-09-28 18:58:13.757152 + * On: 2021-03-16 20:04:01.199893 **/ /* Character Mapping Table: @@ -61,45 +62,45 @@ * ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1 * even though they are both used for French. Same for the euro sign. */ -static const unsigned char Iso_8859_16_CharToOrderMap[] = +static const unsigned char Iso_8859_2_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ SYM, 2, 17, 9, 11, 0, 16, 15, 23, 1, 26, 27, 6, 12, 4, 8, /* 4X */ - 13, 32, 3, 10, 5, 7, 21, 29, 25, 28, 22,SYM,SYM,SYM,SYM,SYM, /* 5X */ + 13, 32, 3, 10, 5, 7, 20, 29, 25, 28, 22,SYM,SYM,SYM,SYM,SYM, /* 5X */ SYM, 2, 17, 9, 11, 0, 16, 15, 23, 1, 26, 27, 6, 12, 4, 8, /* 6X */ - 13, 32, 3, 10, 5, 7, 21, 29, 25, 28, 22,SYM,SYM,SYM,SYM,CTR, /* 7X */ + 13, 32, 3, 10, 5, 7, 20, 29, 25, 28, 22,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM, 60, 61, 46,SYM,SYM, 38,SYM, 38,SYM, 19,SYM, 62,SYM, 63, 64, /* AX */ - SYM,SYM, 41, 46, 40,SYM,SYM,SYM, 40, 41, 19,SYM, 65, 66, 67, 68, /* BX */ - 69, 30, 24, 14, 33, 35, 53, 42, 45, 31, 58, 49, 70, 37, 20, 48, /* CX */ - 43, 52, 59, 34, 71, 44, 36, 56, 50, 72, 47, 73, 39, 74, 18, 57, /* DX */ - 75, 30, 24, 14, 33, 35, 53, 42, 45, 31, 58, 49, 76, 37, 20, 48, /* EX */ - 43, 52, 59, 34, 77, 44, 36, 56, 50, 78, 47, 79, 39, 80, 18, 81, /* FX */ + SYM, 63,SYM, 45,SYM, 64, 58,SYM,SYM, 34, 65, 66, 67,SYM, 35, 59, /* AX */ + SYM, 68,SYM, 45,SYM, 69, 58,SYM,SYM, 34, 70, 71, 72,SYM, 35, 59, /* BX */ + 73, 30, 24, 14, 36, 74, 37, 42, 33, 31, 75, 54, 50, 39, 21, 76, /* CX */ + 46, 51, 77, 38, 53, 47, 40,SYM, 52, 78, 44, 62, 41, 55, 79, 57, /* DX */ + 80, 30, 24, 14, 36, 81, 37, 42, 33, 31, 82, 54, 50, 39, 21, 83, /* EX */ + 46, 51, 84, 38, 53, 47, 40,SYM, 52, 85, 44, 62, 41, 55, 86,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const unsigned char Iso_8859_2_CharToOrderMap[] = +static const unsigned char Iso_8859_16_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ SYM, 2, 17, 9, 11, 0, 16, 15, 23, 1, 26, 27, 6, 12, 4, 8, /* 4X */ - 13, 32, 3, 10, 5, 7, 21, 29, 25, 28, 22,SYM,SYM,SYM,SYM,SYM, /* 5X */ + 13, 32, 3, 10, 5, 7, 20, 29, 25, 28, 22,SYM,SYM,SYM,SYM,SYM, /* 5X */ SYM, 2, 17, 9, 11, 0, 16, 15, 23, 1, 26, 27, 6, 12, 4, 8, /* 6X */ - 13, 32, 3, 10, 5, 7, 21, 29, 25, 28, 22,SYM,SYM,SYM,SYM,CTR, /* 7X */ + 13, 32, 3, 10, 5, 7, 20, 29, 25, 28, 22,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM, 82,SYM, 46,SYM, 83, 56,SYM,SYM, 38, 84, 85, 86,SYM, 40, 87, /* AX */ - SYM, 88,SYM, 46,SYM, 89, 56,SYM,SYM, 38, 90, 91, 92,SYM, 40, 93, /* BX */ - 94, 30, 24, 14, 33, 95, 35, 42, 41, 31, 96, 49, 51, 37, 20, 97, /* CX */ - 43, 52, 98, 34, 99, 44, 36,SYM, 55,100, 47, 50, 39, 54,101, 57, /* DX */ - 102, 30, 24, 14, 33,103, 35, 42, 41, 31,104, 49, 51, 37, 20,105, /* EX */ - 43, 52,106, 34,107, 44, 36,SYM, 55,108, 47, 50, 39, 54,109,SYM, /* FX */ + SYM, 87, 88, 45,SYM,SYM, 34,SYM, 34,SYM, 19,SYM, 89,SYM, 90, 59, /* AX */ + SYM,SYM, 33, 45, 35,SYM,SYM,SYM, 35, 33, 19,SYM, 91, 92, 93, 59, /* BX */ + 60, 30, 24, 14, 36, 37, 56, 42, 43, 31, 94, 54, 48, 39, 21, 49, /* CX */ + 46, 51, 61, 38, 53, 47, 40, 58, 62, 95, 44, 96, 41, 97, 18, 57, /* DX */ + 60, 30, 24, 14, 36, 37, 56, 42, 43, 31, 98, 54, 48, 39, 21, 49, /* EX */ + 46, 51, 61, 38, 53, 47, 40, 58, 62, 99, 44,100, 41,101, 18,102, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -110,17 +111,17 @@ static const unsigned char Windows_1250_CharToOrderMap[] = SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ SYM, 2, 17, 9, 11, 0, 16, 15, 23, 1, 26, 27, 6, 12, 4, 8, /* 4X */ - 13, 32, 3, 10, 5, 7, 21, 29, 25, 28, 22,SYM,SYM,SYM,SYM,SYM, /* 5X */ + 13, 32, 3, 10, 5, 7, 20, 29, 25, 28, 22,SYM,SYM,SYM,SYM,SYM, /* 5X */ SYM, 2, 17, 9, 11, 0, 16, 15, 23, 1, 26, 27, 6, 12, 4, 8, /* 6X */ - 13, 32, 3, 10, 5, 7, 21, 29, 25, 28, 22,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM, 38,SYM, 56,110, 40,111, /* 8X */ - ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 38,SYM, 56,112, 40,113, /* 9X */ - SYM,SYM,SYM, 46,SYM,114,SYM,SYM,SYM,SYM,115,SYM,SYM,SYM,SYM,116, /* AX */ - SYM,SYM,SYM, 46,SYM,SYM,SYM,SYM,SYM,117,118,SYM,119,SYM,120,121, /* BX */ - 122, 30, 24, 14, 33,123, 35, 42, 41, 31,124, 49, 51, 37, 20,125, /* CX */ - 43, 52,126, 34,127, 44, 36,SYM, 55,128, 47, 50, 39, 54,129, 57, /* DX */ - 130, 30, 24, 14, 33,131, 35, 42, 41, 31,132, 49, 51, 37, 20,133, /* EX */ - 43, 52,134, 34,135, 44, 36,SYM, 55,136, 47, 50, 39, 54,137,SYM, /* FX */ + 13, 32, 3, 10, 5, 7, 20, 29, 25, 28, 22,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM, 34,SYM, 58,103, 35,104, /* 8X */ + ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 34,SYM, 58,105, 35,106, /* 9X */ + SYM,SYM,SYM, 45,SYM,107,SYM,SYM,SYM,SYM,108,SYM,SYM,SYM,SYM, 59, /* AX */ + SYM,SYM,SYM, 45,SYM,SYM,SYM,SYM,SYM,109,110,SYM,111,SYM,112, 59, /* BX */ + 113, 30, 24, 14, 36,114, 37, 42, 33, 31,115, 54, 50, 39, 21,116, /* CX */ + 46, 51,117, 38, 53, 47, 40,SYM, 52,118, 44, 62, 41, 55,119, 57, /* DX */ + 120, 30, 24, 14, 36,121, 37, 42, 33, 31,122, 54, 50, 39, 21,123, /* EX */ + 46, 51,124, 38, 53, 47, 40,SYM, 52,125, 44, 62, 41, 55,126,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -131,85 +132,99 @@ static const unsigned char Ibm852_CharToOrderMap[] = SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ SYM, 2, 17, 9, 11, 0, 16, 15, 23, 1, 26, 27, 6, 12, 4, 8, /* 4X */ - 13, 32, 3, 10, 5, 7, 21, 29, 25, 28, 22,SYM,SYM,SYM,SYM,SYM, /* 5X */ + 13, 32, 3, 10, 5, 7, 20, 29, 25, 28, 22,SYM,SYM,SYM,SYM,SYM, /* 5X */ SYM, 2, 17, 9, 11, 0, 16, 15, 23, 1, 26, 27, 6, 12, 4, 8, /* 6X */ - 13, 32, 3, 10, 5, 7, 21, 29, 25, 28, 22,SYM,SYM,SYM,SYM,CTR, /* 7X */ - 42, 39, 31, 24, 33,138, 35, 42, 46, 49, 44, 44, 20,139, 33, 35, /* 8X */ - 31,140,141,142, 36,143,144, 56, 56, 36, 39,145,146, 46,SYM, 41, /* 9X */ - 30, 37, 34, 47,147,148, 40, 40,149,150,SYM,151, 41,152,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 30, 24, 51,153,SYM,SYM,SYM,SYM,154,155,SYM, /* BX */ + 13, 32, 3, 10, 5, 7, 20, 29, 25, 28, 22,SYM,SYM,SYM,SYM,CTR, /* 7X */ + 42, 41, 31, 24, 36,127, 37, 42, 45, 54, 47, 47, 21,128, 36, 37, /* 8X */ + 31,129,130, 53, 40,131,132, 58, 58, 40, 41,133,134, 45,SYM, 33, /* 9X */ + 30, 39, 38, 44,135,136, 35, 35,137,138,SYM,139, 33,140,SYM,SYM, /* AX */ + SYM,SYM,SYM,SYM,SYM, 30, 24, 50,141,SYM,SYM,SYM,SYM, 59, 59,SYM, /* BX */ SYM,SYM,SYM,SYM,SYM,SYM, 14, 14,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* CX */ - 43, 43,156, 49,157,158, 37, 20, 51,SYM,SYM,SYM,SYM,159,160,SYM, /* DX */ - 34, 57,161, 52, 52,162, 38, 38,163, 47,164, 50, 54, 54,165,SYM, /* EX */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 50, 55, 55,SYM,SYM, /* FX */ + 46, 46,142, 54,143,144, 39, 21, 50,SYM,SYM,SYM,SYM,145,146,SYM, /* DX */ + 38, 57, 53, 51, 51,147, 34, 34,148, 44,149, 62, 55, 55,150,SYM, /* EX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 62, 52, 52,SYM,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ +static const int Unicode_Char_size = 66; +static const unsigned int Unicode_CharOrder[] = +{ + 65, 2, 66, 17, 67, 9, 68, 11, 69, 0, 70, 16, 71, 15, 72, 23, + 73, 1, 74, 26, 75, 27, 76, 6, 77, 12, 78, 4, 79, 8, 80, 13, + 81, 32, 82, 3, 83, 10, 84, 5, 85, 7, 86, 20, 87, 29, 88, 25, + 89, 28, 90, 22, 97, 2, 98, 17, 99, 9, 100, 11, 101, 0,102, 16, + 103, 15, 104, 23, 105, 1, 106, 26, 107, 27, 108, 6, 109, 12,110, 4, + 111, 8, 112, 13, 113, 32, 114, 3, 115, 10, 116, 5, 117, 7,118, 20, + 119, 29, 120, 25, 121, 28, 122, 22, 193, 30, 194, 24, 201, 31,206, 21, + 225, 30, 226, 24, 233, 31, 238, 21, 258, 14, 259, 14, 536, 19,537, 19, + 538, 18, 539, 18, +}; + /* Model Table: - * Total sequences: 981 - * First 512 sequences: 0.997762564143313 - * Next 512 sequences (512-1024): 0.002237435856687006 - * Rest: 3.0357660829594124e-18 + * Total sequences: 1066 + * First 512 sequences: 0.9975318123681904 + * Next 512 sequences (512-1024): 0.002424831763747681 + * Rest: 4.3355868061878584e-05 * Negative sequences: TODO */ static const PRUint8 RomanianLangModel[] = { - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,2,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,0,3,3,3,2,3,3,3,2,2,0,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,0,3,3,3,0,3,3,3,3,3,0,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,2,3,3,2,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,0,3,3,3,3,2,3,3,3,3,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,0,2,2,3,3,3,3,0,2,2,3,3,2,3,0, - 3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,2,3,0,3,3,3,2,2,2,0, - 3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,2,2,0,2,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,0,3,3,3,0,3,2,3,3,3,2,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,2,2,3,2,0,3,2,3,3,0,3,3,2,2,0,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,0,2,2,3,3,3,0,2,3,3,3,2,2,2, - 3,3,3,3,3,2,3,3,3,2,3,3,3,2,3,3,2,3,0,0,0,3,2,3,3,0,2,2,3,3,3,2,0, - 3,3,3,2,3,3,3,3,3,3,3,2,3,3,3,2,2,3,3,2,2,2,2,3,3,2,0,0,3,2,2,2,0, - 3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,0,0,2,3,0,2,0,2,3,3,0,2,2,3,0,2,2,0, - 2,3,0,3,3,3,3,3,0,3,3,3,3,3,0,3,0,3,3,3,0,3,3,0,0,0,2,2,0,0,0,0,0, - 3,3,3,3,3,2,3,3,3,0,2,3,3,2,3,3,2,3,0,0,2,3,2,3,3,0,2,0,3,2,2,2,0, - 3,3,3,3,0,3,3,3,3,2,2,2,3,2,3,2,3,0,0,0,0,0,0,2,3,0,0,0,2,0,2,2,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,3,3,2,0,2,2,2,3,0,2,2,3,2,2,2,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,0,3,3,2,3,3,3,2,2,2,0,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,1,3,3,0,3,3,3,3,3,0,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,2,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,2,2,2, + 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,0,2,3,2,3,3,3,0,2,2,3,3,2,3,0, + 3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,3,2,3,0,3,3,3,2,2,2,0, + 3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,2,3,3,3,2,2,0,2,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,3,3,3,3,2,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,2,2,3,2,3,2,2,3,3,2,2,3,3,2,0,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,2,3,3,3,0,2,3,3,3,2,2,2, + 3,3,3,3,3,2,3,3,3,2,3,3,3,2,3,3,3,3,0,0,3,0,2,2,3,0,2,2,3,3,3,2,0, + 3,3,3,2,3,3,3,3,3,3,3,2,3,3,3,2,2,3,3,2,2,2,2,2,3,2,2,0,3,2,2,2,0, + 3,3,3,3,2,3,3,3,3,2,3,2,2,3,3,0,0,2,3,0,0,2,0,3,3,0,2,2,2,0,2,2,0, + 2,3,1,3,3,3,3,3,0,3,3,3,3,3,0,3,1,3,3,3,3,2,3,0,0,0,2,2,0,0,0,0,0, + 3,3,3,3,3,2,3,3,3,2,3,3,3,2,3,3,2,3,0,0,3,1,2,3,3,0,1,2,3,2,1,2,0, + 3,3,3,3,2,3,3,3,3,2,2,2,3,2,3,3,3,1,0,0,0,0,0,2,3,0,2,1,2,2,2,2,0, + 3,3,3,3,3,2,3,3,3,3,3,3,2,2,3,2,2,3,3,2,2,2,2,2,3,0,2,2,3,2,2,2,0, 3,3,3,0,0,0,0,3,2,2,2,0,0,0,3,0,0,0,0,0,2,2,0,0,2,0,0,2,0,0,0,0,0, - 3,3,3,0,3,3,3,3,3,3,0,2,2,0,3,0,0,0,0,0,0,2,0,0,2,0,0,2,0,0,0,0,0, - 0,3,0,2,3,0,3,0,0,0,0,0,3,0,0,0,0,0,2,3,0,0,2,2,0,0,0,2,0,0,0,0,0, - 3,3,3,3,3,2,3,3,3,2,2,3,2,0,3,2,2,2,0,0,0,0,0,0,3,0,2,2,2,0,2,0,0, - 3,3,3,2,2,2,2,3,3,0,2,3,2,2,3,2,0,3,0,0,0,3,3,2,3,0,0,2,2,0,2,2,0, - 3,3,3,3,3,3,3,3,3,2,3,2,2,2,3,0,2,3,0,0,0,2,2,0,2,0,2,2,3,2,2,2,0, - 0,3,0,3,3,3,3,3,0,2,2,2,3,0,0,0,0,0,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,2,0,3,0,3,3,3,2,0,0,3,3,0,3,0,0,0,0,3,0,2,2,3,0,0,3,0,0,0,0, - 3,3,3,2,2,2,3,3,3,0,2,2,2,0,2,0,0,2,0,0,0,2,0,0,2,0,0,2,0,0,2,0,0, - 3,3,3,3,2,3,3,3,3,2,3,2,3,2,2,2,2,2,2,2,2,2,0,3,0,0,0,2,3,2,2,2,0, - 3,2,3,3,3,2,3,2,3,3,3,3,3,2,0,2,0,2,0,0,0,2,2,2,0,0,2,2,0,2,2,0,0, - 3,3,3,2,3,2,2,2,3,2,3,2,2,2,0,0,2,2,0,0,0,0,0,3,0,0,0,0,2,3,0,0,0, - 2,3,0,3,3,2,2,0,0,2,2,2,2,0,0,2,0,0,0,0,0,0,2,0,0,0,0,2,0,0,0,0,0, - 0,3,2,2,2,2,2,0,0,2,2,2,2,2,0,2,0,2,0,0,0,2,2,0,0,0,2,2,0,0,0,0,0, - 0,0,2,0,0,2,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2, + 3,3,3,0,3,3,2,3,3,3,0,0,2,2,2,0,0,0,0,0,2,0,0,0,2,0,0,2,0,0,0,0,0, + 3,3,3,3,3,2,3,3,3,2,2,3,2,1,3,2,0,2,0,0,0,2,2,2,3,0,2,2,2,0,2,0,0, + 0,3,0,2,3,0,3,0,0,2,0,0,3,0,0,2,0,0,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,2,2,2,2,3,3,0,2,3,2,2,3,2,0,3,0,0,3,0,3,2,3,0,2,2,2,2,2,2,2, + 3,3,3,3,3,3,3,3,3,2,3,3,2,2,3,0,2,3,0,0,2,0,2,0,2,0,2,2,3,2,2,2,0, + 0,3,0,3,3,3,3,3,0,1,2,0,3,0,0,0,0,0,2,3,0,0,2,0,0,0,0,0,0,0,0,0,0, + 3,3,3,2,2,3,0,3,3,3,2,2,2,3,3,0,3,2,0,0,3,0,0,0,2,3,0,0,3,2,0,0,0, + 3,3,3,2,2,2,3,3,3,2,2,2,2,0,2,0,0,2,0,0,1,0,2,0,2,0,0,2,0,0,2,0,0, + 3,3,3,3,2,2,3,3,3,2,3,1,3,2,2,2,2,2,1,2,2,0,2,3,2,0,2,2,3,2,2,2,0, + 3,2,3,3,3,2,3,2,3,3,3,3,3,2,0,2,2,2,0,0,2,0,2,2,0,2,2,2,0,2,2,0,0, + 3,3,3,3,3,2,3,2,3,2,3,2,2,2,0,0,2,2,0,0,0,0,0,3,0,0,0,2,2,3,0,0,0, + 2,3,0,2,3,2,2,0,0,2,2,2,2,2,0,2,0,0,0,0,2,0,2,2,0,0,0,2,2,0,0,0,0, + 0,3,2,2,1,2,2,0,2,2,2,2,2,2,0,2,0,2,0,0,2,0,2,0,0,0,2,2,0,0,0,0,0, + 0,2,2,0,0,2,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2, }; -const SequenceModel Iso_8859_16RomanianModel = +const SequenceModel Iso_8859_2RomanianModel = { - Iso_8859_16_CharToOrderMap, + Iso_8859_2_CharToOrderMap, RomanianLangModel, 33, - (float)0.997762564143313, + (float)0.9975318123681904, PR_TRUE, - "ISO-8859-16", + "ISO-8859-2", "ro" }; -const SequenceModel Iso_8859_2RomanianModel = +const SequenceModel Iso_8859_16RomanianModel = { - Iso_8859_2_CharToOrderMap, + Iso_8859_16_CharToOrderMap, RomanianLangModel, 33, - (float)0.997762564143313, + (float)0.9975318123681904, PR_TRUE, - "ISO-8859-2", + "ISO-8859-16", "ro" }; @@ -218,7 +233,7 @@ const SequenceModel Windows_1250RomanianModel = Windows_1250_CharToOrderMap, RomanianLangModel, 33, - (float)0.997762564143313, + (float)0.9975318123681904, PR_TRUE, "WINDOWS-1250", "ro" @@ -229,8 +244,18 @@ const SequenceModel Ibm852RomanianModel = Ibm852_CharToOrderMap, RomanianLangModel, 33, - (float)0.997762564143313, + (float)0.9975318123681904, PR_TRUE, "IBM852", "ro" }; + +const LanguageModel RomanianModel = +{ + "ro", + Unicode_CharOrder, + 66, + RomanianLangModel, + 33, + (float)0.9975318123681904, +}; diff --git a/src/LangModels/LangSlovakModel.cpp b/src/LangModels/LangSlovakModel.cpp index 480b4b5..ffc3410 100644 --- a/src/LangModels/LangSlovakModel.cpp +++ b/src/LangModels/LangSlovakModel.cpp @@ -36,12 +36,13 @@ * ***** END LICENSE BLOCK ***** */ #include "../nsSBCharSetProber.h" +#include "../nsLanguageDetector.h" /********* Language model for: Slovak *********/ /** * Generated by BuildLangModel.py - * On: 2016-09-21 13:33:10.331339 + * On: 2021-03-16 20:13:09.022988 **/ /* Character Mapping Table: @@ -61,212 +62,242 @@ * ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1 * even though they are both used for French. Same for the euro sign. */ -static const unsigned char Ibm852_CharToOrderMap[] = +static const unsigned char Iso_8859_2_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 1, 20, 15, 11, 2, 29, 30, 17, 4, 18, 7, 10, 12, 3, 0, /* 4X */ - 13, 40, 6, 8, 5, 14, 9, 37, 34, 19, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 1, 20, 15, 11, 2, 29, 30, 17, 4, 18, 7, 10, 12, 3, 0, /* 6X */ - 13, 40, 6, 8, 5, 14, 9, 37, 34, 19, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */ - 51, 46, 25, 62, 38, 48, 47, 51, 49, 54, 50, 50, 63, 64, 38, 47, /* 8X */ - 25, 42, 42, 32, 43, 33, 33, 65, 66, 43, 46, 31, 31, 49,SYM, 24, /* 9X */ - 21, 23, 35, 27, 67, 68, 26, 26, 69, 70,SYM, 71, 24, 59,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 21, 72, 41, 59,SYM,SYM,SYM,SYM, 61, 61,SYM, /* BX */ - SYM,SYM,SYM,SYM,SYM,SYM, 56, 56,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* CX */ - 55, 55, 39, 54, 39, 36, 23, 73, 41,SYM,SYM,SYM,SYM, 74, 48,SYM, /* DX */ - 35, 58, 32, 52, 52, 36, 28, 28, 44, 27, 44, 60, 22, 22, 75,SYM, /* EX */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 60, 45, 45,SYM,SYM, /* FX */ + SYM, 1, 20, 15, 11, 2, 28, 30, 17, 4, 18, 8, 10, 12, 3, 0, /* 4X */ + 13, 40, 6, 7, 5, 14, 9, 37, 35, 21, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 1, 20, 15, 11, 2, 28, 30, 17, 4, 18, 8, 10, 12, 3, 0, /* 6X */ + 13, 40, 6, 7, 5, 14, 9, 37, 35, 21, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ + SYM, 64,SYM, 49,SYM, 33, 65,SYM,SYM, 29, 59, 31, 66,SYM, 26, 61, /* AX */ + SYM, 67,SYM, 49,SYM, 33, 68,SYM,SYM, 29, 59, 31, 69,SYM, 26, 61, /* BX */ + 45, 19, 70, 55, 38, 41, 47, 50, 24, 25, 57, 51, 42, 23, 71, 39, /* CX */ + 53, 54, 36, 34, 32, 62, 43,SYM, 44, 48, 27, 56, 46, 22, 72, 60, /* DX */ + 45, 19, 73, 55, 38, 41, 47, 50, 24, 25, 57, 51, 42, 23, 74, 39, /* EX */ + 53, 54, 36, 34, 32, 62, 43,SYM, 44, 48, 27, 56, 46, 22, 75,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const unsigned char Iso_8859_2_CharToOrderMap[] = +static const unsigned char Windows_1250_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 1, 20, 15, 11, 2, 29, 30, 17, 4, 18, 7, 10, 12, 3, 0, /* 4X */ - 13, 40, 6, 8, 5, 14, 9, 37, 34, 19, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 1, 20, 15, 11, 2, 29, 30, 17, 4, 18, 7, 10, 12, 3, 0, /* 6X */ - 13, 40, 6, 8, 5, 14, 9, 37, 34, 19, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM, 76,SYM, 49,SYM, 33, 77,SYM,SYM, 28, 59, 31, 78,SYM, 26, 61, /* AX */ - SYM, 79,SYM, 49,SYM, 33, 80,SYM,SYM, 28, 59, 31, 81,SYM, 26, 61, /* BX */ - 44, 21, 82, 56, 38, 42, 47, 51, 24, 25, 83, 54, 41, 23, 84, 39, /* CX */ - 55, 52, 36, 35, 32, 50, 43,SYM, 45, 48, 27, 60, 46, 22, 85, 58, /* DX */ - 44, 21, 86, 56, 38, 42, 47, 51, 24, 25, 87, 54, 41, 23, 88, 39, /* EX */ - 55, 52, 36, 35, 32, 50, 43,SYM, 45, 48, 27, 60, 46, 22, 89,SYM, /* FX */ + SYM, 1, 20, 15, 11, 2, 28, 30, 17, 4, 18, 8, 10, 12, 3, 0, /* 4X */ + 13, 40, 6, 7, 5, 14, 9, 37, 35, 21, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 1, 20, 15, 11, 2, 28, 30, 17, 4, 18, 8, 10, 12, 3, 0, /* 6X */ + 13, 40, 6, 7, 5, 14, 9, 37, 35, 21, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM, 29,SYM, 76, 31, 26, 77, /* 8X */ + ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 29,SYM, 78, 31, 26, 79, /* 9X */ + SYM,SYM,SYM, 49,SYM, 80,SYM,SYM,SYM,SYM, 59,SYM,SYM,SYM,SYM, 61, /* AX */ + SYM,SYM,SYM, 49,SYM,SYM,SYM,SYM,SYM, 81, 59,SYM, 33,SYM, 33, 61, /* BX */ + 45, 19, 82, 55, 38, 41, 47, 50, 24, 25, 57, 51, 42, 23, 83, 39, /* CX */ + 53, 54, 36, 34, 32, 62, 43,SYM, 44, 48, 27, 56, 46, 22, 84, 60, /* DX */ + 45, 19, 85, 55, 38, 41, 47, 50, 24, 25, 57, 51, 42, 23, 86, 39, /* EX */ + 53, 54, 36, 34, 32, 62, 43,SYM, 44, 48, 27, 56, 46, 22, 87,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const unsigned char Mac_Centraleurope_CharToOrderMap[] = +static const unsigned char Ibm852_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 1, 20, 15, 11, 2, 29, 30, 17, 4, 18, 7, 10, 12, 3, 0, /* 4X */ - 13, 40, 6, 8, 5, 14, 9, 37, 34, 19, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 1, 20, 15, 11, 2, 29, 30, 17, 4, 18, 7, 10, 12, 3, 0, /* 6X */ - 13, 40, 6, 8, 5, 14, 9, 37, 34, 19, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */ - 38, 90, 91, 25, 92, 43, 46, 21, 93, 24, 38, 24, 47, 47, 25, 94, /* 8X */ - 95, 39, 23, 39, 96, 97, 98, 35, 99, 32, 43,100, 27, 41, 41, 46, /* 9X */ - SYM,SYM,101,SYM,SYM,SYM,SYM, 58,SYM,SYM,SYM,102,SYM,SYM,103,104, /* AX */ - 105, 57,SYM,SYM, 57,106,SYM,SYM, 49,107,108, 33, 33, 42, 42,109, /* BX */ - 110, 52,SYM,SYM, 52, 36,SYM,SYM,SYM,SYM,SYM, 36, 50,111, 50, 53, /* CX */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 53, 44, 44, 45,SYM,SYM, 45,112, /* DX */ - 113, 28,SYM,SYM, 28,114,115, 21, 31, 31, 23, 26, 26,116, 35, 32, /* EX */ - 117, 48, 27, 48, 60, 60,118,119, 22, 22,120, 61, 49, 61,121,SYM, /* FX */ + SYM, 1, 20, 15, 11, 2, 28, 30, 17, 4, 18, 8, 10, 12, 3, 0, /* 4X */ + 13, 40, 6, 7, 5, 14, 9, 37, 35, 21, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 1, 20, 15, 11, 2, 28, 30, 17, 4, 18, 8, 10, 12, 3, 0, /* 6X */ + 13, 40, 6, 7, 5, 14, 9, 37, 35, 21, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */ + 50, 46, 25, 88, 38, 48, 47, 50, 49, 51, 62, 62, 89, 90, 38, 47, /* 8X */ + 25, 41, 41, 32, 43, 33, 33, 91, 92, 43, 46, 31, 31, 49,SYM, 24, /* 9X */ + 19, 23, 34, 27, 93, 94, 26, 26, 57, 57,SYM, 95, 24, 59,SYM,SYM, /* AX */ + SYM,SYM,SYM,SYM,SYM, 19, 96, 42, 59,SYM,SYM,SYM,SYM, 61, 61,SYM, /* BX */ + SYM,SYM,SYM,SYM,SYM,SYM, 55, 55,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* CX */ + 53, 53, 39, 51, 39, 36, 23, 97, 42,SYM,SYM,SYM,SYM, 98, 48,SYM, /* DX */ + 34, 60, 32, 54, 54, 36, 29, 29, 45, 27, 45, 56, 22, 22, 99,SYM, /* EX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 56, 44, 44,SYM,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const unsigned char Windows_1250_CharToOrderMap[] = +static const unsigned char Mac_Centraleurope_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 1, 20, 15, 11, 2, 29, 30, 17, 4, 18, 7, 10, 12, 3, 0, /* 4X */ - 13, 40, 6, 8, 5, 14, 9, 37, 34, 19, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 1, 20, 15, 11, 2, 29, 30, 17, 4, 18, 7, 10, 12, 3, 0, /* 6X */ - 13, 40, 6, 8, 5, 14, 9, 37, 34, 19, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM, 28,SYM,122, 31, 26,123, /* 8X */ - ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 28,SYM,124, 31, 26,125, /* 9X */ - SYM,SYM,SYM, 49,SYM,126,SYM,SYM,SYM,SYM, 59,SYM,SYM,SYM,SYM, 61, /* AX */ - SYM,SYM,SYM, 49,SYM,SYM,SYM,SYM,SYM,127, 59,SYM, 33,SYM, 33, 61, /* BX */ - 44, 21,128, 56, 38, 42, 47, 51, 24, 25,129, 54, 41, 23,130, 39, /* CX */ - 55, 52, 36, 35, 32, 50, 43,SYM, 45, 48, 27, 60, 46, 22,131, 58, /* DX */ - 44, 21,132, 56, 38, 42, 47, 51, 24, 25,133, 54, 41, 23,134, 39, /* EX */ - 55, 52, 36, 35, 32, 50, 43,SYM, 45, 48, 27, 60, 46, 22,135,SYM, /* FX */ + SYM, 1, 20, 15, 11, 2, 28, 30, 17, 4, 18, 8, 10, 12, 3, 0, /* 4X */ + 13, 40, 6, 7, 5, 14, 9, 37, 35, 21, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 1, 20, 15, 11, 2, 28, 30, 17, 4, 18, 8, 10, 12, 3, 0, /* 6X */ + 13, 40, 6, 7, 5, 14, 9, 37, 35, 21, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */ + 38, 63, 63, 25,100, 43, 46, 19,101, 24, 38, 24, 47, 47, 25,102, /* 8X */ + 103, 39, 23, 39,104,105,106, 34,107, 32, 43,108, 27, 42, 42, 46, /* 9X */ + SYM,SYM, 57,SYM,SYM,SYM,SYM, 60,SYM,SYM,SYM, 57,SYM,SYM,109,110, /* AX */ + 111, 58,SYM,SYM, 58,112,SYM,SYM, 49,113,114, 33, 33, 41, 41,115, /* BX */ + 116, 54,SYM,SYM, 54, 36,SYM,SYM,SYM,SYM,SYM, 36, 62,117, 62, 52, /* CX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 52, 45, 45, 44,SYM,SYM, 44,118, /* DX */ + 119, 29,SYM,SYM, 29,120,121, 19, 31, 31, 23, 26, 26,122, 34, 32, /* EX */ + 123, 48, 27, 48, 56, 56,124,125, 22, 22,126, 61, 49, 61,127,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ +static const int Unicode_Char_size = 92; +static const unsigned int Unicode_CharOrder[] = +{ + 65, 1, 66, 20, 67, 15, 68, 11, 69, 2, 70, 28, 71, 30, 72, 17, + 73, 4, 74, 18, 75, 8, 76, 10, 77, 12, 78, 3, 79, 0, 80, 13, + 81, 40, 82, 6, 83, 7, 84, 5, 85, 14, 86, 9, 87, 37, 88, 35, + 89, 21, 90, 16, 97, 1, 98, 20, 99, 15, 100, 11, 101, 2,102, 28, + 103, 30, 104, 17, 105, 4, 106, 18, 107, 8, 108, 10, 109, 12,110, 3, + 111, 0, 112, 13, 113, 40, 114, 6, 115, 7, 116, 5, 117, 14,118, 9, + 119, 37, 120, 35, 121, 21, 122, 16, 193, 19, 196, 38, 201, 25,205, 23, + 211, 34, 212, 32, 214, 43, 218, 27, 221, 22, 225, 19, 228, 38,233, 25, + 237, 23, 243, 34, 244, 32, 246, 43, 250, 27, 253, 22, 268, 24,269, 24, + 270, 39, 271, 39, 282, 42, 283, 42, 313, 41, 314, 41, 317, 33,318, 33, + 327, 36, 328, 36, 340, 45, 341, 45, 344, 44, 345, 44, 352, 29,353, 29, + 356, 31, 357, 31, 381, 26, 382, 26, +}; + /* Model Table: - * Total sequences: 1181 - * First 512 sequences: 0.9733303573968434 - * Next 512 sequences (512-1024): 0.026317344239265295 - * Rest: 0.0003522983638913346 + * Total sequences: 1198 + * First 512 sequences: 0.9724967373205526 + * Next 512 sequences (512-1024): 0.02707798928941092 + * Rest: 0.00042527339003644096 * Negative sequences: TODO */ static const PRUint8 SlovakLangModel[] = { - 2,2,2,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2, - 0,0,3,2,3,1,2,3,3,1,0,3,2,0,3,2,0,1,2,0,0,0,0, - 2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0, - 0,0,3,0,3,0,3,3,3,3,0,2,3,1,2,2,0,2,2,0,0,0,0, - 3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3, - 0,2,3,0,3,2,3,3,3,2,0,3,3,3,3,2,0,3,2,0,0,1,0, - 3,3,3,3,3,3,2,3,3,2,2,3,2,2,3,3,2,2,2,3,2,3, - 3,3,3,3,2,3,3,2,3,0,2,0,0,2,0,2,0,0,2,2,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3, - 0,3,3,2,3,0,3,3,3,3,0,2,2,3,2,2,0,0,2,0,0,0,0, - 3,3,3,3,3,2,3,3,3,3,3,1,3,2,3,2,3,3,2,3,2,3, - 3,3,2,3,0,3,2,2,2,1,0,2,0,3,2,2,2,2,1,2,1,1,2, - 3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,2,2,2,3,3,3, - 3,3,3,3,2,2,2,2,3,2,3,0,2,3,2,2,2,0,2,0,0,1,0, - 3,3,3,2,3,3,3,2,2,3,3,3,3,1,3,3,2,2,2,3,2,3, - 3,2,2,3,2,3,2,2,2,0,3,2,0,2,2,2,0,0,0,0,0,2,1, - 3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,2,2,2,3,2,3, - 2,3,2,2,1,3,0,2,2,3,2,2,0,2,2,2,0,0,2,0,0,0,0, - 3,3,3,3,3,2,3,2,3,0,3,3,2,3,3,2,3,2,0,3,2,3, - 3,3,2,3,2,2,3,1,2,0,2,0,0,0,2,0,3,2,0,2,2,1,2, - 3,3,3,3,3,3,2,3,3,2,3,2,2,2,3,2,2,2,2,3,3,3, - 3,3,2,3,2,3,2,2,3,0,1,0,0,3,2,0,0,0,0,0,0,0,0, - 3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,3,3,2,2,3,2,3, - 3,3,2,2,2,2,2,2,2,0,3,3,2,2,2,2,0,0,0,2,2,0,0, - 3,3,3,3,3,3,2,2,3,1,2,2,3,3,3,2,0,0,2,3,3,3, - 2,3,0,2,2,2,0,0,2,0,3,0,1,2,1,0,3,0,2,0,0,2,2, - 3,3,3,3,3,3,3,2,2,0,3,2,2,2,3,2,1,2,0,2,2,3, - 2,3,1,2,0,2,2,0,1,2,3,1,0,2,2,0,2,0,0,2,2,0,0, - 2,2,2,3,2,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,3, - 0,3,3,1,3,1,2,2,3,2,0,2,2,0,1,2,0,2,2,0,0,0,0, - 3,3,3,3,3,2,2,3,2,2,2,2,2,0,3,2,2,3,0,2,0,2, - 1,3,0,2,0,3,0,1,2,2,0,0,0,2,2,0,0,0,2,0,0,0,0, - 3,3,3,3,3,2,3,3,3,3,3,3,3,2,3,2,2,3,2,3,2,3, - 3,2,1,2,1,2,2,0,2,2,0,2,0,2,2,2,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,2,2,2,3,2,3,1,3,2,0,2,1,3,2,3, - 3,2,2,2,0,2,2,1,0,0,0,2,0,1,2,2,1,0,0,0,2,1,2, - 3,3,3,3,3,3,2,2,3,3,2,2,3,2,3,2,2,2,2,0,2,2, - 0,3,2,0,0,3,3,0,2,1,0,2,0,2,0,0,1,0,0,0,0,0,0, - 2,2,2,3,2,3,3,3,3,3,3,2,3,3,2,3,3,3,2,0,3,0, - 0,0,2,0,2,2,3,1,2,3,0,1,0,1,2,1,0,0,0,0,0,0,0, - 3,3,3,3,3,1,3,2,3,2,3,3,2,0,3,2,2,2,3,3,2,3, - 2,2,2,2,1,2,2,0,2,0,1,0,0,1,2,0,1,0,0,0,0,1,0, - 0,0,0,3,0,3,3,3,3,3,3,3,3,3,2,3,3,2,3,0,3,0, - 0,0,2,0,2,0,3,0,1,0,0,2,0,0,2,0,0,2,0,0,0,0,0, - 0,0,0,2,0,2,3,2,2,3,2,2,3,2,0,3,3,2,2,0,2,0, - 0,0,1,0,2,0,2,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, - 0,2,0,3,0,3,3,3,3,3,3,2,3,3,0,3,2,2,2,0,2,0, - 0,0,2,0,2,0,3,0,1,2,0,2,0,0,1,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,2,3,2,0,3,0,2,0,2,2,0,0,0,0,0,2, - 0,3,0,1,1,1,3,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0, - 1,0,0,2,0,3,3,2,2,2,2,2,3,2,0,3,3,3,0,0,1,0, - 0,0,2,0,0,0,2,2,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0, - 3,3,3,3,3,0,1,2,3,1,1,3,1,0,3,0,0,0,2,0,2,0, - 0,3,0,1,0,0,2,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0, - 1,0,0,3,0,3,3,2,3,3,3,3,2,3,0,3,3,2,0,0,2,0, - 0,0,3,0,2,0,2,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,0,2,2,2,3,1,2,3,2,0,0,2,0,0,2,1, - 0,3,2,1,0,1,2,0,0,2,0,2,0,0,1,0,0,0,0,0,0,0,0, - 3,3,3,2,3,2,3,0,2,1,2,0,2,0,3,0,0,2,0,3,0,2, - 0,2,0,2,0,1,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,2,3,2,3,0,2,2,3,2,2,0,3,2,0,2,0,2,0,2, - 0,0,0,2,0,2,0,0,2,0,0,0,0,2,0,1,0,0,0,0,0,2,0, - 3,3,0,0,1,0,2,1,0,0,0,2,0,1,2,0,0,0,0,0,0,1, + 2,2,2,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,0, + 0,3,2,3,1,3,2,3,1,0,3,0,2,3,2,0,1,2,0,0,0,1,0, + 2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,0, + 0,3,0,3,0,3,3,3,3,0,2,1,3,2,2,0,2,2,0,0,0,2,0, + 3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0, + 2,3,0,3,2,3,3,3,2,0,3,3,3,3,2,0,3,2,0,0,1,0,0, + 3,3,3,3,3,3,2,3,3,2,2,3,2,2,3,3,2,2,2,3,2,3,3, + 3,3,3,2,3,2,3,3,0,2,0,2,0,0,2,0,0,2,0,2,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0, + 3,3,2,3,0,3,3,3,3,0,2,3,2,2,2,0,0,2,0,0,1,2,0, + 3,3,3,3,3,2,3,3,3,3,3,1,3,2,3,2,3,3,2,3,3,3,3, + 3,2,3,0,3,2,2,2,1,0,2,3,0,2,1,2,2,1,1,2,2,0,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,2,3,3,3,3, + 3,3,3,2,2,2,2,3,2,2,0,3,2,2,2,2,0,2,0,0,2,0,0, + 3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,2,2,2,3,2,3,2, + 3,2,2,1,3,2,0,2,3,2,2,1,0,2,2,0,0,2,0,0,1,0,0, + 3,3,3,2,3,3,3,2,2,3,3,3,2,2,3,3,2,2,2,3,2,3,3, + 2,2,3,2,3,2,2,2,0,3,2,2,0,2,2,0,0,0,0,0,2,2,1, + 3,3,3,3,3,2,3,3,2,0,3,3,1,3,3,2,3,2,0,3,2,3,3, + 3,2,3,2,2,1,3,2,0,2,0,0,0,2,0,3,2,0,2,2,1,2,2, + 3,3,3,3,3,3,2,3,3,2,3,2,2,2,3,2,2,2,2,3,2,3,3, + 3,2,3,2,3,2,2,3,0,1,0,3,0,2,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,3,3,2,2,3,2,3,3, + 3,2,2,2,2,2,2,2,0,3,3,2,2,2,2,0,0,0,2,2,0,2,0, + 3,3,3,3,3,3,2,3,2,0,2,2,3,3,3,2,0,0,2,3,3,3,2, + 3,1,2,2,2,0,0,1,0,3,0,2,1,1,0,3,0,1,0,0,1,0,2, + 3,3,3,3,3,3,3,2,2,0,3,2,2,2,3,2,1,2,0,3,2,2,2, + 3,1,2,0,2,2,2,0,2,3,1,2,0,2,0,2,0,0,2,2,0,2,0, + 2,2,2,3,2,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,0, + 2,3,1,3,1,2,3,3,2,0,2,0,2,1,2,0,2,2,0,0,0,0,0, + 3,3,3,3,3,2,2,2,3,2,2,2,1,0,3,2,2,3,0,2,0,2,1, + 3,0,2,0,3,1,0,2,2,0,0,2,0,2,0,0,0,2,0,0,0,0,0, + 3,3,3,3,3,2,3,3,3,3,3,3,3,2,3,2,2,3,2,3,2,3,3, + 2,1,2,1,2,0,2,2,2,0,2,2,0,2,2,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,2,2,2,3,2,3,1,3,2,0,2,1,3,2,3,3, + 2,1,2,0,2,1,2,0,0,0,2,1,0,2,2,1,0,0,2,0,2,1,2, + 3,3,3,3,3,3,2,3,2,3,2,2,3,2,3,2,2,2,2,2,2,0,0, + 3,2,0,0,3,0,3,2,0,0,2,2,0,1,0,1,0,0,0,0,0,0,0, + 0,0,0,3,0,3,3,3,3,3,3,3,3,3,2,3,3,2,3,0,3,0,0, + 0,2,0,2,0,0,3,1,0,0,2,0,0,2,0,0,2,0,0,0,0,2,0, + 3,3,3,3,3,2,3,3,2,2,3,3,2,0,3,2,2,2,3,3,2,3,2, + 2,2,2,1,2,0,2,2,0,1,0,1,0,2,0,1,0,0,0,0,1,2,0, + 2,2,2,3,2,3,3,3,3,3,3,2,3,3,2,3,3,3,2,0,3,0,0, + 0,2,0,2,2,1,3,2,3,0,1,1,0,2,1,0,0,0,0,0,0,0,0, + 0,0,0,2,0,2,3,2,2,3,2,2,3,2,0,3,3,2,2,0,2,0,0, + 0,2,0,1,0,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, + 0,2,0,3,0,3,3,3,3,3,3,2,3,3,0,3,2,2,2,0,2,0,0, + 0,2,0,2,0,0,3,1,2,0,2,0,0,1,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,2,2,3,0,3,0,2,0,2,2,0,0,0,2,0,0,0, + 3,0,1,1,1,0,3,0,0,0,0,2,0,2,0,0,0,0,0,0,0,0,0, + 2,0,2,2,0,3,3,2,2,2,2,2,3,2,0,3,3,3,0,0,1,0,0, + 0,2,0,0,0,2,2,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, + 3,3,3,3,3,0,1,2,2,1,2,3,0,0,3,0,0,0,1,0,2,0,0, + 3,0,2,0,0,0,2,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0,0, + 1,0,0,3,0,3,3,3,2,3,3,3,2,3,0,3,3,2,0,0,2,0,0, + 0,3,0,2,0,2,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,2,3,2,3,1,2,1,2,0,2,0,3,0,0,1,0,2,0,3,0, + 2,0,2,0,1,2,1,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,0,2,2,2,3,1,2,3,2,0,0,2,0,1,2,0,0, + 3,2,1,0,1,0,2,0,2,0,2,0,0,1,0,0,0,0,0,0,0,0,0, + 3,3,3,2,3,2,3,2,0,2,3,2,2,1,3,1,0,2,1,2,0,2,0, + 0,0,2,0,2,0,0,2,0,0,0,2,0,0,1,0,0,0,0,0,2,0,0, + 3,3,0,0,1,0,2,0,1,0,0,2,0,1,2,0,0,0,0,2,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,2,3,3,3,3,3,2,1,0,0,2,3,0,0,0,2,0, - 0,0,0,0,3,0,1,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0, - 3,3,0,3,0,0,0,3,2,2,0,0,2,0,3,0,1,1,0,0,2,0, - 0,0,1,0,0,2,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,2,2,2,3,2,0,1,0,0,2,0,2,3,2,0,0,0,0,2,0,0, - 0,2,0,0,0,0,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0, - 0,0,0,3,0,2,3,1,2,1,2,3,3,2,0,2,2,0,0,0,3,0, - 0,0,0,0,1,0,0,1,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,0,0,0,0,0,0,0,0,0,1,2,0,2,0,0,2,0,0,0,2, + 0,0,0,0,0,2,3,3,3,3,3,2,1,0,0,2,2,0,0,0,2,0,0, + 0,0,0,3,0,0,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,0,3,0,0,0,2,3,2,0,0,2,0,3,0,1,1,0,0,2,0,0, + 0,2,0,0,2,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,3,0,2,3,2,1,1,2,3,3,3,0,1,2,0,0,0,3,0,0, + 0,0,0,1,0,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 2,2,2,2,3,3,0,0,1,0,2,0,2,2,2,0,0,1,0,0,0,1,0, + 1,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0, + 3,3,0,0,0,0,0,0,0,0,0,1,2,0,2,0,0,2,0,2,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,2,2,1,3,2,0,1,2,0,2,0,2,1,0,0,0,2,0,1,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0, - 0,0,0,2,0,2,1,1,0,0,0,2,0,0,0,2,2,0,0,0,2,0, - 0,0,3,0,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0, - 2,3,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0, - 0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,2,0,0,1,0,0,0,0,2,0,0,0,0,0,0,2, + 2,2,2,1,3,2,0,2,1,0,2,0,1,0,0,0,0,2,0,0,1,1,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, + 0,0,0,2,0,2,1,0,1,0,0,2,0,0,0,2,2,0,0,0,2,0,0, + 0,3,0,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0, + 2,3,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, + 0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,2,0,0,1,0,0,0,0,2,0,0,0,0,2,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,2,0,2,2,2,0,0,1,0,1,0,0,1,2,0,2,0,0,0, - 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2,0, - 0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, - 0,0,0,2,0,2,2,0,2,0,1,2,1,0,0,0,0,2,0,0,2,0, - 0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,2,0,0,0,0,0,0,1,0,0,0,0,2,0,0,0,0, - 0,0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, + 0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2,0,0, + 0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, + 0,0,0,2,0,2,2,0,2,0,2,0,1,0,0,1,2,0,2,0,0,0,0, + 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,2,0,2,2,2,0,0,2,2,2,0,0,0,0,2,0,0,1,0,0, + 0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,2,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0, + 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,2,0,0,0,0,0,0,1,0,0,0,0,2,0,0,0,0,0, + 0,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, }; -const SequenceModel Ibm852SlovakModel = +const SequenceModel Iso_8859_2SlovakModel = { - Ibm852_CharToOrderMap, + Iso_8859_2_CharToOrderMap, SlovakLangModel, - 45, - (float)0.9733303573968434, + 46, + (float)0.9724967373205526, PR_TRUE, - "IBM852", + "ISO-8859-2", "sk" }; -const SequenceModel Iso_8859_2SlovakModel = +const SequenceModel Windows_1250SlovakModel = { - Iso_8859_2_CharToOrderMap, + Windows_1250_CharToOrderMap, SlovakLangModel, - 45, - (float)0.9733303573968434, + 46, + (float)0.9724967373205526, PR_TRUE, - "ISO-8859-2", + "WINDOWS-1250", + "sk" +}; + +const SequenceModel Ibm852SlovakModel = +{ + Ibm852_CharToOrderMap, + SlovakLangModel, + 46, + (float)0.9724967373205526, + PR_TRUE, + "IBM852", "sk" }; @@ -274,20 +305,19 @@ const SequenceModel Mac_CentraleuropeSlovakModel = { Mac_Centraleurope_CharToOrderMap, SlovakLangModel, - 45, - (float)0.9733303573968434, + 46, + (float)0.9724967373205526, PR_TRUE, "MAC-CENTRALEUROPE", "sk" }; -const SequenceModel Windows_1250SlovakModel = +const LanguageModel SlovakModel = { - Windows_1250_CharToOrderMap, + "sk", + Unicode_CharOrder, + 92, SlovakLangModel, - 45, - (float)0.9733303573968434, - PR_TRUE, - "WINDOWS-1250", - "sk" + 46, + (float)0.9724967373205526, }; diff --git a/src/LangModels/LangSloveneModel.cpp b/src/LangModels/LangSloveneModel.cpp index 160f054..ccb4f7f 100644 --- a/src/LangModels/LangSloveneModel.cpp +++ b/src/LangModels/LangSloveneModel.cpp @@ -36,12 +36,13 @@ * ***** END LICENSE BLOCK ***** */ #include "../nsSBCharSetProber.h" +#include "../nsLanguageDetector.h" /********* Language model for: Slovene *********/ /** * Generated by BuildLangModel.py - * On: 2016-09-28 22:06:46.134717 + * On: 2021-03-16 20:20:05.416974 **/ /* Character Mapping Table: @@ -67,18 +68,18 @@ static const unsigned char Iso_8859_2_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 18, 19, 13, 1, 24, 17, 20, 2, 8, 12, 9, 14, 4, 3, /* 4X */ - 11, 28, 5, 6, 7, 16, 10, 27, 25, 26, 15,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 18, 19, 13, 1, 24, 17, 20, 2, 8, 12, 9, 14, 4, 3, /* 6X */ - 11, 28, 5, 6, 7, 16, 10, 27, 25, 26, 15,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 0, 18, 19, 13, 1, 24, 17, 20, 2, 9, 12, 8, 14, 4, 3, /* 4X */ + 11, 28, 5, 6, 7, 16, 10, 25, 27, 26, 15,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 18, 19, 13, 1, 24, 17, 20, 2, 9, 12, 8, 14, 4, 3, /* 6X */ + 11, 28, 5, 6, 7, 16, 10, 25, 27, 26, 15,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ SYM, 41,SYM, 42,SYM, 43, 44,SYM,SYM, 22, 45, 46, 47,SYM, 23, 48, /* AX */ SYM, 49,SYM, 50,SYM, 51, 52,SYM,SYM, 22, 53, 54, 55,SYM, 23, 56, /* BX */ - 57, 32, 58, 59, 60, 61, 37, 34, 21, 29, 62, 36, 63, 30, 64, 65, /* CX */ - 66, 67, 68, 31, 35, 69, 70,SYM, 71, 72, 39, 73, 74, 40, 75, 76, /* DX */ - 77, 32, 78, 79, 80, 81, 37, 34, 21, 29, 82, 36, 83, 30, 84, 85, /* EX */ - 86, 87, 88, 31, 35, 89, 90,SYM, 91, 92, 39, 93, 94, 40, 95,SYM, /* FX */ + 57, 33, 58, 59, 60, 61, 62, 31, 21, 29, 63, 37, 64, 30, 65, 66, /* CX */ + 67, 68, 69, 32, 36, 70, 71,SYM, 72, 73, 39, 74, 75, 40, 76, 77, /* DX */ + 78, 33, 79, 80, 81, 82, 83, 31, 21, 29, 84, 37, 85, 30, 86, 87, /* EX */ + 88, 89, 90, 32, 36, 91, 92,SYM, 93, 94, 39, 95, 96, 40, 97,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -88,18 +89,18 @@ static const unsigned char Iso_8859_16_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 18, 19, 13, 1, 24, 17, 20, 2, 8, 12, 9, 14, 4, 3, /* 4X */ - 11, 28, 5, 6, 7, 16, 10, 27, 25, 26, 15,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 18, 19, 13, 1, 24, 17, 20, 2, 8, 12, 9, 14, 4, 3, /* 6X */ - 11, 28, 5, 6, 7, 16, 10, 27, 25, 26, 15,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 0, 18, 19, 13, 1, 24, 17, 20, 2, 9, 12, 8, 14, 4, 3, /* 4X */ + 11, 28, 5, 6, 7, 16, 10, 25, 27, 26, 15,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 18, 19, 13, 1, 24, 17, 20, 2, 9, 12, 8, 14, 4, 3, /* 6X */ + 11, 28, 5, 6, 7, 16, 10, 25, 27, 26, 15,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM, 96, 97, 98,SYM,SYM, 22,SYM, 22,SYM, 99,SYM,100,SYM,101,102, /* AX */ - SYM,SYM, 21,103, 23,SYM,SYM,SYM, 23, 21,104,SYM,105,106,107,108, /* BX */ - 109, 32,110,111,112, 37,113, 34,114, 29, 33, 36,115, 30,116,117, /* CX */ - 118,119,120, 31, 35,121,122,123,124,125, 39,126,127,128,129,130, /* DX */ - 131, 32,132,133,134, 37,135, 34,136, 29, 33, 36,137, 30,138,139, /* EX */ - 140,141,142, 31, 35,143,144,145,146,147, 39,148,149,150,151,152, /* FX */ + SYM, 98, 99,100,SYM,SYM, 22,SYM, 22,SYM,101,SYM,102,SYM,103,104, /* AX */ + SYM,SYM, 21,105, 23,SYM,SYM,SYM, 23, 21,106,SYM,107,108,109,110, /* BX */ + 111, 33,112,113,114,115,116, 31, 35, 29, 34, 37,117, 30,118,119, /* CX */ + 120,121,122, 32, 36,123,124,125,126,127, 39,128,129,130,131,132, /* DX */ + 133, 33,134,135,136,137,138, 31, 35, 29, 34, 37,139, 30,140,141, /* EX */ + 142,143,144, 32, 36,145,146,147,148,149, 39,150,151,152,153,154, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -109,102 +110,115 @@ static const unsigned char Windows_1250_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 18, 19, 13, 1, 24, 17, 20, 2, 8, 12, 9, 14, 4, 3, /* 4X */ - 11, 28, 5, 6, 7, 16, 10, 27, 25, 26, 15,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 18, 19, 13, 1, 24, 17, 20, 2, 8, 12, 9, 14, 4, 3, /* 6X */ - 11, 28, 5, 6, 7, 16, 10, 27, 25, 26, 15,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM, 22,SYM,153,154, 23,155, /* 8X */ - ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 22,SYM,156,157, 23,158, /* 9X */ - SYM,SYM,SYM,159,SYM,160,SYM,SYM,SYM,SYM,161,SYM,SYM,SYM,SYM,162, /* AX */ - SYM,SYM,SYM,163,SYM,SYM,SYM,SYM,SYM,164,165,SYM,166,SYM,167,168, /* BX */ - 169, 32,170,171,172,173, 37, 34, 21, 29,174, 36,175, 30,176,177, /* CX */ - 178,179,180, 31, 35,181,182,SYM,183,184, 39,185,186, 40,187,188, /* DX */ - 189, 32,190,191,192,193, 37, 34, 21, 29,194, 36,195, 30,196,197, /* EX */ - 198,199,200, 31, 35,201,202,SYM,203,204, 39,205,206, 40,207,SYM, /* FX */ + SYM, 0, 18, 19, 13, 1, 24, 17, 20, 2, 9, 12, 8, 14, 4, 3, /* 4X */ + 11, 28, 5, 6, 7, 16, 10, 25, 27, 26, 15,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 18, 19, 13, 1, 24, 17, 20, 2, 9, 12, 8, 14, 4, 3, /* 6X */ + 11, 28, 5, 6, 7, 16, 10, 25, 27, 26, 15,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM, 22,SYM,155,156, 23,157, /* 8X */ + ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 22,SYM,158,159, 23,160, /* 9X */ + SYM,SYM,SYM,161,SYM,162,SYM,SYM,SYM,SYM,163,SYM,SYM,SYM,SYM,164, /* AX */ + SYM,SYM,SYM,165,SYM,SYM,SYM,SYM,SYM,166,167,SYM,168,SYM,169,170, /* BX */ + 171, 33,172,173,174,175,176, 31, 21, 29,177, 37,178, 30,179,180, /* CX */ + 181,182,183, 32, 36,184,185,SYM,186,187, 39,188,189, 40,190,191, /* DX */ + 192, 33,193,194,195,196,197, 31, 21, 29,198, 37,199, 30,200,201, /* EX */ + 202,203,204, 32, 36,205,206,SYM,207,208, 39,209,210, 40,211,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const unsigned char Mac_Centraleurope_CharToOrderMap[] = +static const unsigned char Ibm852_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 18, 19, 13, 1, 24, 17, 20, 2, 8, 12, 9, 14, 4, 3, /* 4X */ - 11, 28, 5, 6, 7, 16, 10, 27, 25, 26, 15,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 18, 19, 13, 1, 24, 17, 20, 2, 8, 12, 9, 14, 4, 3, /* 6X */ - 11, 28, 5, 6, 7, 16, 10, 27, 25, 26, 15,SYM,SYM,SYM,SYM,CTR, /* 7X */ - 208,209,210, 29,211,212,213, 32,214, 21,215, 21, 37, 37, 29,216, /* 8X */ - 217,218, 30,219, 38, 38,220, 31,221, 35,222,223, 39,224,225,226, /* 9X */ - SYM,SYM,227,SYM,SYM,SYM,SYM,228,SYM,SYM,SYM,229,SYM,SYM,230,231, /* AX */ - 232,233,SYM,SYM,234,235,SYM,SYM,236,237,238,239,240,241,242,243, /* BX */ - 244,245,SYM,SYM,246,247,SYM,SYM,SYM,SYM,SYM,248,249,249,249,249, /* CX */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,249,249,249,249,SYM,SYM,249,249, /* DX */ - 249, 22,SYM,SYM, 22,249,249, 32,249,249, 30, 23, 23,249, 31, 35, /* EX */ - 249,249, 39,249,249,249,249,249, 40, 40,249,249,249,249,249,SYM, /* FX */ + SYM, 0, 18, 19, 13, 1, 24, 17, 20, 2, 9, 12, 8, 14, 4, 3, /* 4X */ + 11, 28, 5, 6, 7, 16, 10, 25, 27, 26, 15,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 18, 19, 13, 1, 24, 17, 20, 2, 9, 12, 8, 14, 4, 3, /* 6X */ + 11, 28, 5, 6, 7, 16, 10, 25, 27, 26, 15,SYM,SYM,SYM,SYM,CTR, /* 7X */ + 31,212, 29,213,214,215,216, 31,217, 37,218,219,220,221,222,223, /* 8X */ + 29,224,225, 36,226,227,228,229,230,231,232,233,234,235,SYM, 21, /* 9X */ + 33, 30, 32, 39,236,237, 23, 23,238,239,SYM,240, 21,241,SYM,SYM, /* AX */ + SYM,SYM,SYM,SYM,SYM, 33,242,243,244,SYM,SYM,SYM,SYM,245,246,SYM, /* BX */ + SYM,SYM,SYM,SYM,SYM,SYM,247,248,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* CX */ + 249,249,249, 37,249,249, 30,249,249,SYM,SYM,SYM,SYM,249,249,SYM, /* DX */ + 32,249, 36,249,249,249, 22, 22,249, 39,249,249, 40, 40,249,SYM, /* EX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,249,249,249,SYM,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const unsigned char Ibm852_CharToOrderMap[] = +static const unsigned char Mac_Centraleurope_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 18, 19, 13, 1, 24, 17, 20, 2, 8, 12, 9, 14, 4, 3, /* 4X */ - 11, 28, 5, 6, 7, 16, 10, 27, 25, 26, 15,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 18, 19, 13, 1, 24, 17, 20, 2, 8, 12, 9, 14, 4, 3, /* 6X */ - 11, 28, 5, 6, 7, 16, 10, 27, 25, 26, 15,SYM,SYM,SYM,SYM,CTR, /* 7X */ - 34,249, 29,249,249,249, 37, 34,249, 36,249,249,249,249,249, 37, /* 8X */ - 29,249,249, 35,249,249,249,249,249,249,249,249,249,249,SYM, 21, /* 9X */ - 32, 30, 31, 39,249,249, 23, 23,249,249,SYM,249, 21,249,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 32,249,249,249,SYM,SYM,SYM,SYM,249,249,SYM, /* BX */ - SYM,SYM,SYM,SYM,SYM,SYM,249,249,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* CX */ - 249,249,249, 36,249,249, 30,249,249,SYM,SYM,SYM,SYM,249,249,SYM, /* DX */ - 31,249, 35,249,249,249, 22, 22,249, 39,249,249, 40, 40,249,SYM, /* EX */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,249,249,249,SYM,SYM, /* FX */ + SYM, 0, 18, 19, 13, 1, 24, 17, 20, 2, 9, 12, 8, 14, 4, 3, /* 4X */ + 11, 28, 5, 6, 7, 16, 10, 25, 27, 26, 15,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 18, 19, 13, 1, 24, 17, 20, 2, 9, 12, 8, 14, 4, 3, /* 6X */ + 11, 28, 5, 6, 7, 16, 10, 25, 27, 26, 15,SYM,SYM,SYM,SYM,CTR, /* 7X */ + 249,249,249, 29,249,249,249, 33,249, 21,249, 21,249,249, 29,249, /* 8X */ + 249,249, 30,249, 38, 38,249, 32,249, 36,249,249, 39,249,249,249, /* 9X */ + SYM,SYM,249,SYM,SYM,SYM,SYM,249,SYM,SYM,SYM,249,SYM,SYM,249,249, /* AX */ + 249,249,SYM,SYM,249,249,SYM,SYM,249,249,249,249,249,249,249,249, /* BX */ + 249,249,SYM,SYM,249,249,SYM,SYM,SYM,SYM,SYM,249,249,249,249,249, /* CX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,249,249,249,249,SYM,SYM,249,249, /* DX */ + 249, 22,SYM,SYM, 22,249,249, 33,249,249, 30, 23, 23,249, 32, 36, /* EX */ + 249,249, 39,249,249,249,249,249, 40, 40,249,249,249,249,249,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ +static const int Unicode_Char_size = 58; +static const unsigned int Unicode_CharOrder[] = +{ + 65, 0, 66, 18, 67, 19, 68, 13, 69, 1, 70, 24, 71, 17, 72, 20, + 73, 2, 74, 9, 75, 12, 76, 8, 77, 14, 78, 4, 79, 3, 80, 11, + 81, 28, 82, 5, 83, 6, 84, 7, 85, 16, 86, 10, 87, 25, 88, 27, + 89, 26, 90, 15, 97, 0, 98, 18, 99, 19, 100, 13, 101, 1,102, 24, + 103, 17, 104, 20, 105, 2, 106, 9, 107, 12, 108, 8, 109, 14,110, 4, + 111, 3, 112, 11, 113, 28, 114, 5, 115, 6, 116, 7, 117, 16,118, 10, + 119, 25, 120, 27, 121, 26, 122, 15, 268, 21, 269, 21, 352, 22,353, 22, + 381, 23, 382, 23, +}; + /* Model Table: - * Total sequences: 727 - * First 512 sequences: 0.9983524317161332 - * Next 512 sequences (512-1024): 0.0016475682838668457 - * Rest: -3.859759734048396e-17 + * Total sequences: 698 + * First 512 sequences: 0.998296272473889 + * Next 512 sequences (512-1024): 0.00170372752611106 + * Rest: -2.8189256484623115e-17 * Negative sequences: TODO */ static const PRUint8 SloveneLangModel[] = { - 2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2, + 2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,2, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,2, - 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,3,2,3,3,3,2,0,0,3,2,3,3,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,0,0,0,3,2,3,3,0, - 3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,2,3,2,3,3,3,2,3,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,2,3,3,2,3,2,0, - 3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,2,3,3,3,3,2,2,2,2,0,0, - 3,3,3,3,3,3,3,3,2,3,0,3,3,3,2,2,3,3,3,3,3,2,2,0,0,0,3,2,2, - 3,3,3,3,3,3,3,3,3,3,3,0,2,3,3,2,3,0,2,3,3,0,3,0,2,0,3,2,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,2,3,2,0, - 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,0,3,2,3,3,2,2,2,0,2,2,3,2,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,0,2,0,0,0, - 3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,2,0,0, - 3,3,3,3,3,3,3,2,0,3,3,3,2,2,2,0,3,2,3,2,3,0,0,0,2,2,2,2,0, - 3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,0,3,0,2,2,0,3,3,2,2,0,3,0,0, - 3,3,3,3,3,3,3,3,0,3,2,3,3,3,2,2,3,2,2,3,3,0,0,0,2,2,3,2,2, - 3,3,3,3,3,3,2,3,0,3,3,3,3,2,2,2,3,0,2,0,0,2,0,0,2,0,2,2,0, - 3,3,3,3,3,3,0,0,3,3,2,2,3,2,0,0,3,0,2,2,0,0,2,0,0,0,0,0,0, - 3,3,3,3,3,2,0,3,3,3,2,3,3,0,0,0,3,0,0,0,0,3,0,2,0,0,0,0,0, - 3,3,3,2,3,2,0,2,3,3,2,0,3,0,0,0,3,2,3,2,0,0,0,2,0,0,0,0,0, - 3,3,3,3,2,3,3,3,0,3,0,0,0,2,2,0,3,2,0,2,2,0,0,0,3,2,2,2,0, - 3,3,3,3,2,2,2,3,0,0,2,3,0,2,2,0,3,2,3,3,2,0,0,0,2,2,2,2,0, - 3,3,2,3,3,2,3,3,3,3,0,2,2,2,2,0,2,2,2,3,2,0,0,0,0,2,0,2,0, - 3,3,3,3,3,0,3,0,0,2,0,0,0,0,2,0,2,2,2,0,2,0,0,0,2,0,2,3,0, - 0,0,0,0,2,0,0,2,0,2,0,0,0,0,0,0,3,0,0,2,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,2,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,2, + 3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,3,2,3,3,3,2,2,0,3,3,3,2,2, + 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,0,2,0,3,3,3,3,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,0,3,3,2,3,0,0, + 3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,2,3,3,3,2,3,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,2,3,3,3,3,2,2,0,2,2,0, + 3,3,3,3,3,3,3,3,3,0,0,3,3,3,3,0,3,3,2,3,3,2,2,0,0,0,3,2,0, + 3,3,3,3,3,3,3,3,3,3,3,2,0,2,3,0,3,2,2,3,3,0,3,0,0,2,3,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,3,2,0, + 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,0,3,2,3,3,3,2,2,0,2,3,3,2,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,0,0,2,2,0, + 3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,0,2,3,0, + 3,3,3,3,3,3,3,2,3,0,3,3,0,0,2,2,3,3,3,2,3,0,0,0,0,0,2,2,0, + 3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,0,3,0,3,3,0,3,2,2,2,0,3,0,0, + 3,3,3,3,2,3,3,3,3,0,2,3,3,3,2,2,3,2,3,3,3,0,0,0,2,2,2,2,2, + 3,3,3,3,3,3,2,3,3,0,3,2,3,3,2,3,3,0,2,0,0,2,0,0,2,2,2,0,0, + 3,3,3,3,3,3,0,2,3,3,2,2,3,2,0,0,3,0,2,2,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,2,0,3,3,3,2,3,3,0,0,0,3,0,0,0,0,3,0,0,0,0,0,0,0, + 3,3,3,2,3,2,0,0,2,3,2,0,3,0,0,0,2,2,3,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,2,3,3,3,3,2,0,0,0,0,2,0,3,2,0,0,0,0,0,0,3,2,0,0,0, + 3,3,3,3,3,0,3,2,2,0,0,0,0,2,0,0,2,2,2,0,2,0,0,0,0,3,2,0,0, + 2,3,2,3,3,2,3,2,3,3,0,2,2,2,3,0,2,2,2,3,2,0,0,0,0,2,0,0,0, + 3,2,3,3,0,2,2,3,0,0,2,3,0,2,2,0,3,2,3,3,0,0,0,0,0,2,2,3,0, + 0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0, }; @@ -213,7 +227,7 @@ const SequenceModel Iso_8859_2SloveneModel = Iso_8859_2_CharToOrderMap, SloveneLangModel, 29, - (float)0.9983524317161332, + (float)0.998296272473889, PR_TRUE, "ISO-8859-2", "sl" @@ -224,7 +238,7 @@ const SequenceModel Iso_8859_16SloveneModel = Iso_8859_16_CharToOrderMap, SloveneLangModel, 29, - (float)0.9983524317161332, + (float)0.998296272473889, PR_TRUE, "ISO-8859-16", "sl" @@ -235,30 +249,40 @@ const SequenceModel Windows_1250SloveneModel = Windows_1250_CharToOrderMap, SloveneLangModel, 29, - (float)0.9983524317161332, + (float)0.998296272473889, PR_TRUE, "WINDOWS-1250", "sl" }; +const SequenceModel Ibm852SloveneModel = +{ + Ibm852_CharToOrderMap, + SloveneLangModel, + 29, + (float)0.998296272473889, + PR_TRUE, + "IBM852", + "sl" +}; + const SequenceModel Mac_CentraleuropeSloveneModel = { Mac_Centraleurope_CharToOrderMap, SloveneLangModel, 29, - (float)0.9983524317161332, + (float)0.998296272473889, PR_TRUE, "MAC-CENTRALEUROPE", "sl" }; -const SequenceModel Ibm852SloveneModel = +const LanguageModel SloveneModel = { - Ibm852_CharToOrderMap, + "sl", + Unicode_CharOrder, + 58, SloveneLangModel, 29, - (float)0.9983524317161332, - PR_TRUE, - "IBM852", - "sl" + (float)0.998296272473889, }; diff --git a/src/LangModels/LangSwedishModel.cpp b/src/LangModels/LangSwedishModel.cpp index 3dca8e8..e07efba 100644 --- a/src/LangModels/LangSwedishModel.cpp +++ b/src/LangModels/LangSwedishModel.cpp @@ -36,12 +36,13 @@ * ***** END LICENSE BLOCK ***** */ #include "../nsSBCharSetProber.h" +#include "../nsLanguageDetector.h" /********* Language model for: Swedish *********/ /** * Generated by BuildLangModel.py - * On: 2016-09-28 22:29:21.480940 + * On: 2021-03-16 20:24:13.934277 **/ /* Character Mapping Table: @@ -61,163 +62,186 @@ * ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1 * even though they are both used for French. Same for the euro sign. */ -static const unsigned char Windows_1252_CharToOrderMap[] = +static const unsigned char Iso_8859_1_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 22, 20, 9, 1, 14, 12, 18, 6, 23, 10, 7, 11, 3, 8, /* 4X */ - 15, 30, 2, 5, 4, 16, 13, 26, 25, 24, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 22, 20, 9, 1, 14, 12, 18, 6, 23, 10, 7, 11, 3, 8, /* 6X */ - 15, 30, 2, 5, 4, 16, 13, 26, 25, 24, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM,ILL,SYM, 34,SYM,SYM,SYM,SYM,SYM,SYM, 48,SYM, 49,ILL, 50,ILL, /* 8X */ - ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 51,SYM, 52,ILL, 53, 54, /* 9X */ + SYM, 0, 21, 20, 9, 1, 13, 12, 17, 6, 23, 11, 7, 10, 3, 8, /* 4X */ + 18, 29, 2, 5, 4, 15, 14, 26, 25, 24, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 21, 20, 9, 1, 13, 12, 17, 6, 23, 11, 7, 10, 3, 8, /* 6X */ + 18, 29, 2, 5, 4, 15, 14, 26, 25, 24, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 55,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 56, 44, 57, 58, 17, 19, 38, 40, 32, 28, 45, 59, 60, 61, 47, 62, /* CX */ - 63, 64, 65, 66, 35, 67, 21,SYM, 37, 68, 69, 70, 31, 71, 72, 73, /* DX */ - 74, 44, 75, 76, 17, 19, 38, 40, 32, 28, 45, 77, 78, 79, 47, 80, /* EX */ - 81, 82, 83, 84, 35, 85, 21,SYM, 37, 86, 87, 88, 31, 89, 90, 91, /* FX */ + SYM,SYM,SYM,SYM,SYM, 34,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 49, 33, 50, 51, 16, 19, 37, 40, 32, 28, 42, 52, 53, 38, 43, 54, /* CX */ + 55, 56, 57, 58, 59, 60, 22,SYM, 39, 61, 62, 63, 31, 64, 65, 66, /* DX */ + 67, 33, 68, 69, 16, 19, 37, 40, 32, 28, 42, 70, 71, 38, 43, 72, /* EX */ + 73, 74, 75, 76, 77, 78, 22,SYM, 39, 79, 80, 81, 31, 82, 83, 84, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const unsigned char Iso_8859_9_CharToOrderMap[] = +static const unsigned char Iso_8859_4_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 22, 20, 9, 1, 14, 12, 18, 6, 23, 10, 7, 11, 3, 8, /* 4X */ - 15, 30, 2, 5, 4, 16, 13, 26, 25, 24, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 22, 20, 9, 1, 14, 12, 18, 6, 23, 10, 7, 11, 3, 8, /* 6X */ - 15, 30, 2, 5, 4, 16, 13, 26, 25, 24, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 0, 21, 20, 9, 1, 13, 12, 17, 6, 23, 11, 7, 10, 3, 8, /* 4X */ + 18, 29, 2, 5, 4, 15, 14, 26, 25, 24, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 21, 20, 9, 1, 13, 12, 17, 6, 23, 11, 7, 10, 3, 8, /* 6X */ + 18, 29, 2, 5, 4, 15, 14, 26, 25, 24, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 92,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 93, 44, 94, 95, 17, 19, 38, 40, 32, 28, 45, 96, 97, 98, 47, 99, /* CX */ - 100,101,102,103, 35,104, 21,SYM, 37,105,106,107, 31,108,109,110, /* DX */ - 111, 44,112,113, 17, 19, 38, 40, 32, 28, 45,114,115,116, 47,117, /* EX */ - 118,119,120,121, 35,122, 21,SYM, 37,123,124,125, 31, 42,126,127, /* FX */ + SYM, 85, 86, 87,SYM, 88, 89,SYM,SYM, 90, 91, 92, 93,SYM, 94,SYM, /* AX */ + SYM, 95,SYM, 96,SYM, 97, 98,SYM,SYM, 99,100,101,102, 46,103, 46, /* BX */ + 30, 33,104,105, 16, 19, 37,106,107, 28,108,109, 44, 38, 43, 48, /* CX */ + 110,111, 35,112,113,114, 22,SYM, 39, 41,115,116, 31,117, 47,118, /* DX */ + 30, 33,119,120, 16, 19, 37,121,122, 28,123,124, 44, 38, 43, 48, /* EX */ + 125,126, 35,127,128,129, 22,SYM, 39, 41,130,131, 31,132, 47,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const unsigned char Iso_8859_1_CharToOrderMap[] = +static const unsigned char Iso_8859_9_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 22, 20, 9, 1, 14, 12, 18, 6, 23, 10, 7, 11, 3, 8, /* 4X */ - 15, 30, 2, 5, 4, 16, 13, 26, 25, 24, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 22, 20, 9, 1, 14, 12, 18, 6, 23, 10, 7, 11, 3, 8, /* 6X */ - 15, 30, 2, 5, 4, 16, 13, 26, 25, 24, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 0, 21, 20, 9, 1, 13, 12, 17, 6, 23, 11, 7, 10, 3, 8, /* 4X */ + 18, 29, 2, 5, 4, 15, 14, 26, 25, 24, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 21, 20, 9, 1, 13, 12, 17, 6, 23, 11, 7, 10, 3, 8, /* 6X */ + 18, 29, 2, 5, 4, 15, 14, 26, 25, 24, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM,128,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 129, 44,130,131, 17, 19, 38, 40, 32, 28, 45,132,133,134, 47,135, /* CX */ - 136,137,138,139, 35,140, 21,SYM, 37,141,142,143, 31,144,145,146, /* DX */ - 147, 44,148,149, 17, 19, 38, 40, 32, 28, 45,150,151,152, 47,153, /* EX */ - 154,155,156,157, 35,158, 21,SYM, 37,159,160,161, 31,162,163,164, /* FX */ + SYM,SYM,SYM,SYM,SYM, 34,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 133, 33,134,135, 16, 19, 37, 40, 32, 28, 42,136,137, 38, 43,138, /* CX */ + 139,140,141,142,143,144, 22,SYM, 39,145,146,147, 31,148,149,150, /* DX */ + 151, 33,152,153, 16, 19, 37, 40, 32, 28, 42,154,155, 38, 43,156, /* EX */ + 157,158,159,160,161,162, 22,SYM, 39,163,164,165, 31, 45,166,167, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const unsigned char Iso_8859_4_CharToOrderMap[] = +static const unsigned char Iso_8859_15_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 22, 20, 9, 1, 14, 12, 18, 6, 23, 10, 7, 11, 3, 8, /* 4X */ - 15, 30, 2, 5, 4, 16, 13, 26, 25, 24, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 22, 20, 9, 1, 14, 12, 18, 6, 23, 10, 7, 11, 3, 8, /* 6X */ - 15, 30, 2, 5, 4, 16, 13, 26, 25, 24, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 0, 21, 20, 9, 1, 13, 12, 17, 6, 23, 11, 7, 10, 3, 8, /* 4X */ + 18, 29, 2, 5, 4, 15, 14, 26, 25, 24, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 21, 20, 9, 1, 13, 12, 17, 6, 23, 11, 7, 10, 3, 8, /* 6X */ + 18, 29, 2, 5, 4, 15, 14, 26, 25, 24, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,165,166,167,SYM,168,169,SYM,SYM,170,171,172,173,SYM,174,SYM, /* AX */ - SYM,175,SYM,176,SYM,177,178,SYM,SYM,179,180,181,182, 43,183, 43, /* BX */ - 29, 44,184,185, 17, 19, 38,186,187, 28,188,189, 39,190, 47, 41, /* CX */ - 191,192, 33,193, 35,194, 21,SYM, 37, 36,195,196, 31,197, 46,198, /* DX */ - 29, 44,199,200, 17, 19, 38,201,202, 28,203,204, 39,205, 47, 41, /* EX */ - 206,207, 33,208, 35,209, 21,SYM, 37, 36,210,211, 31,212, 46,SYM, /* FX */ + SYM,SYM,SYM,SYM,SYM,SYM,168,SYM,169,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ + SYM,SYM,SYM,SYM,170, 34,SYM,SYM,171,SYM,SYM,SYM,172,173,174,SYM, /* BX */ + 175, 33,176,177, 16, 19, 37, 40, 32, 28, 42,178,179, 38, 43,180, /* CX */ + 181,182,183,184,185,186, 22,SYM, 39,187,188,189, 31,190,191,192, /* DX */ + 193, 33,194,195, 16, 19, 37, 40, 32, 28, 42,196,197, 38, 43,198, /* EX */ + 199,200,201,202,203,204, 22,SYM, 39,205,206,207, 31,208,209,210, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const unsigned char Iso_8859_15_CharToOrderMap[] = +static const unsigned char Windows_1252_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 22, 20, 9, 1, 14, 12, 18, 6, 23, 10, 7, 11, 3, 8, /* 4X */ - 15, 30, 2, 5, 4, 16, 13, 26, 25, 24, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 22, 20, 9, 1, 14, 12, 18, 6, 23, 10, 7, 11, 3, 8, /* 6X */ - 15, 30, 2, 5, 4, 16, 13, 26, 25, 24, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM,213,SYM,214,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,215,216,SYM,SYM,217,SYM,SYM,SYM,218,219,220,SYM, /* BX */ - 221, 44,222,223, 17, 19, 38, 40, 32, 28, 45,224,225,226, 47,227, /* CX */ - 228,229,230,231, 35,232, 21,SYM, 37,233,234,235, 31,236,237,238, /* DX */ - 239, 44,240,241, 17, 19, 38, 40, 32, 28, 45,242,243,244, 47,245, /* EX */ - 246,247,248,249, 35,249, 21,SYM, 37,249,249,249, 31,249,249,249, /* FX */ + SYM, 0, 21, 20, 9, 1, 13, 12, 17, 6, 23, 11, 7, 10, 3, 8, /* 4X */ + 18, 29, 2, 5, 4, 15, 14, 26, 25, 24, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 21, 20, 9, 1, 13, 12, 17, 6, 23, 11, 7, 10, 3, 8, /* 6X */ + 18, 29, 2, 5, 4, 15, 14, 26, 25, 24, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM,ILL,SYM, 36,SYM,SYM,SYM,SYM,SYM,SYM,211,SYM,212,ILL,213,ILL, /* 8X */ + ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,214,SYM,215,ILL,216,217, /* 9X */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ + SYM,SYM,SYM,SYM,SYM, 34,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 218, 33,219,220, 16, 19, 37, 40, 32, 28, 42,221,222, 38, 43,223, /* CX */ + 224,225,226,227,228,229, 22,SYM, 39,230,231,232, 31,233,234,235, /* DX */ + 236, 33,237,238, 16, 19, 37, 40, 32, 28, 42,239,240, 38, 43,241, /* EX */ + 242,243,244,245,246,247, 22,SYM, 39,248,249,249, 31,249,249,249, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ +static const int Unicode_Char_size = 60; +static const unsigned int Unicode_CharOrder[] = +{ + 65, 0, 66, 21, 67, 20, 68, 9, 69, 1, 70, 13, 71, 12, 72, 17, + 73, 6, 74, 23, 75, 11, 76, 7, 77, 10, 78, 3, 79, 8, 80, 18, + 81, 29, 82, 2, 83, 5, 84, 4, 85, 15, 86, 14, 87, 26, 88, 25, + 89, 24, 90, 27, 97, 0, 98, 21, 99, 20, 100, 9, 101, 1,102, 13, + 103, 12, 104, 17, 105, 6, 106, 23, 107, 11, 108, 7, 109, 10,110, 3, + 111, 8, 112, 18, 113, 29, 114, 2, 115, 5, 116, 4, 117, 15,118, 14, + 119, 26, 120, 25, 121, 24, 122, 27, 196, 16, 197, 19, 201, 28,214, 22, + 228, 16, 229, 19, 233, 28, 246, 22, +}; + /* Model Table: - * Total sequences: 748 - * First 512 sequences: 0.997323508584682 - * Next 512 sequences (512-1024): 0.0026764914153179875 - * Rest: 1.7780915628762273e-17 + * Total sequences: 752 + * First 512 sequences: 0.996987580875875 + * Next 512 sequences (512-1024): 0.00301241912412493 + * Rest: 4.640385298237959e-17 * Negative sequences: TODO */ static const PRUint8 SwedishLangModel[] = { - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,0,3,2,3,3,3,3,3,2,0,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,3,2,3,3,3,3,3,3,0,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,0,2,2,2,2,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,2,2,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,2,2,3,0,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,0,2,2,2,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,3,3,2,3,3,2,2,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,0,2,0,2,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,0,2,0,2,2,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,0,2,0,3,3,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,0,2,2,0,2,0, - 3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,2,3,3,3,3,0,2,3,2,0,0,0,2,0,0,0, - 3,3,3,2,3,2,3,3,3,2,0,2,2,2,3,2,3,3,0,3,2,3,0,3,3,0,0,0,2,0,0, - 3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,3,2,2,2,2,3,2,0,2,3,2,0, - 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,2,0,2,0,3,2,3,2,0,3,0,0,0,2,0, - 2,2,3,3,3,3,0,3,0,3,3,3,3,3,3,3,2,2,0,0,3,0,3,0,0,3,0,0,0,0,0, - 3,3,3,3,3,2,3,2,3,2,2,2,2,0,0,0,3,3,2,3,2,3,2,3,3,0,0,3,0,2,0, - 2,3,3,3,3,3,2,3,0,3,3,3,3,3,2,0,0,0,2,0,0,2,3,0,0,0,0,0,0,0,0, - 3,3,3,3,3,2,3,3,3,2,3,2,2,2,2,0,3,0,3,0,3,2,2,0,3,0,0,2,2,0,2, - 3,3,3,3,3,3,2,3,2,3,3,3,3,3,2,3,0,2,2,0,3,2,2,3,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,2,2,2,0,2,2,2,3,3,2,3,3,3,3,3,3,0,0,2,2,0,0, - 3,3,0,2,2,3,2,3,3,3,2,0,0,0,2,0,3,3,0,0,0,3,2,0,0,0,0,0,2,0,0, - 3,2,3,3,3,3,2,3,3,3,3,3,3,2,3,3,2,0,2,0,3,0,3,2,0,3,0,2,0,0,0, - 3,3,0,3,3,0,3,2,3,0,2,2,0,0,2,3,2,0,2,0,0,0,2,0,2,2,0,0,0,0,0, - 3,3,2,2,2,3,3,2,3,2,2,0,0,0,0,0,2,0,2,0,0,0,0,0,2,0,2,2,0,0,0, - 3,3,0,2,2,0,2,0,3,0,2,0,0,0,0,0,2,0,2,0,0,0,2,0,2,0,0,2,0,0,0, - 0,3,2,2,0,2,0,2,2,2,0,0,0,2,0,2,0,0,2,0,0,2,2,0,0,0,0,0,0,0,0, - 0,0,0,2,0,0,2,0,3,0,2,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,0,3,3,2,3,3,3,3,2,0,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,2,3,3,2,3,3,3,3,2,0,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,0,2,3,2,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,2,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,2,2,3,0,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,0,2,2,0,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,2,3,3,3,3,2,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,0,2,0,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,0,2,0,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,2,3,3,2,3,0,2,0,2,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,0,2,2,0,0, + 3,3,3,2,3,2,3,3,3,2,2,0,2,3,2,3,3,0,2,3,2,0,3,3,3,0,0,0,2,0, + 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,2,3,0,3,2,2,0,0,0,2,0,0, + 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,0,2,3,0,3,3,2,2,0,3,2,2,0,0, + 2,2,3,3,3,3,2,3,0,3,3,3,3,3,3,2,2,0,3,0,3,3,0,0,0,3,0,0,0,0, + 3,3,3,3,3,2,3,3,3,2,2,2,2,2,0,3,3,2,0,3,2,2,3,3,3,0,0,3,0,0, + 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,2,3,2,2,3,0,0,2,2,0, + 2,3,3,3,3,3,2,3,0,3,2,3,3,2,3,0,0,2,0,0,0,2,2,2,0,0,0,0,0,0, + 3,3,3,3,3,2,3,3,3,2,2,3,2,2,2,3,0,3,0,0,3,2,0,0,3,0,0,2,2,2, + 3,3,3,3,3,3,3,3,3,2,2,2,0,2,2,3,3,2,3,3,3,3,3,3,3,0,0,2,2,0, + 3,3,3,3,3,3,2,3,2,3,3,3,3,2,3,0,2,2,3,0,3,2,2,3,0,0,0,0,0,0, + 3,3,0,2,2,3,2,3,3,3,0,2,0,2,0,3,3,2,0,0,0,2,3,0,0,0,0,0,0,0, + 3,2,3,3,3,3,2,3,3,3,3,3,3,3,2,2,0,2,3,0,3,3,0,2,0,3,2,2,0,0, + 3,3,2,2,3,0,3,3,3,0,2,2,0,2,0,2,0,2,3,0,0,2,0,0,2,2,0,0,0,0, + 3,3,2,2,2,2,3,2,3,2,0,2,0,2,0,2,0,3,0,0,0,0,0,0,2,0,2,2,0,0, + 3,3,0,2,2,0,2,2,3,0,0,2,0,2,0,2,0,2,0,0,0,2,0,0,2,0,0,2,0,0, + 0,3,2,2,0,2,0,2,0,2,0,0,0,0,2,2,0,2,2,0,0,0,2,0,0,0,0,0,0,0, + 0,0,0,2,0,0,2,0,0,0,0,0,0,0,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0, }; -const SequenceModel Windows_1252SwedishModel = +const SequenceModel Iso_8859_1SwedishModel = { - Windows_1252_CharToOrderMap, + Iso_8859_1_CharToOrderMap, SwedishLangModel, - 31, - (float)0.997323508584682, + 30, + (float)0.996987580875875, PR_TRUE, - "WINDOWS-1252", + "ISO-8859-1", + "sv" +}; + +const SequenceModel Iso_8859_4SwedishModel = +{ + Iso_8859_4_CharToOrderMap, + SwedishLangModel, + 30, + (float)0.996987580875875, + PR_TRUE, + "ISO-8859-4", "sv" }; @@ -225,42 +249,41 @@ const SequenceModel Iso_8859_9SwedishModel = { Iso_8859_9_CharToOrderMap, SwedishLangModel, - 31, - (float)0.997323508584682, + 30, + (float)0.996987580875875, PR_TRUE, "ISO-8859-9", "sv" }; -const SequenceModel Iso_8859_1SwedishModel = +const SequenceModel Iso_8859_15SwedishModel = { - Iso_8859_1_CharToOrderMap, + Iso_8859_15_CharToOrderMap, SwedishLangModel, - 31, - (float)0.997323508584682, + 30, + (float)0.996987580875875, PR_TRUE, - "ISO-8859-1", + "ISO-8859-15", "sv" }; -const SequenceModel Iso_8859_4SwedishModel = +const SequenceModel Windows_1252SwedishModel = { - Iso_8859_4_CharToOrderMap, + Windows_1252_CharToOrderMap, SwedishLangModel, - 31, - (float)0.997323508584682, + 30, + (float)0.996987580875875, PR_TRUE, - "ISO-8859-4", + "WINDOWS-1252", "sv" }; -const SequenceModel Iso_8859_15SwedishModel = +const LanguageModel SwedishModel = { - Iso_8859_15_CharToOrderMap, + "sv", + Unicode_CharOrder, + 60, SwedishLangModel, - 31, - (float)0.997323508584682, - PR_TRUE, - "ISO-8859-15", - "sv" + 30, + (float)0.996987580875875, }; diff --git a/src/LangModels/LangThaiModel.cpp b/src/LangModels/LangThaiModel.cpp index 9880e09..847745b 100644 --- a/src/LangModels/LangThaiModel.cpp +++ b/src/LangModels/LangThaiModel.cpp @@ -36,12 +36,13 @@ * ***** END LICENSE BLOCK ***** */ #include "../nsSBCharSetProber.h" +#include "../nsLanguageDetector.h" /********* Language model for: Thai *********/ /** * Generated by BuildLangModel.py - * On: 2015-12-04 03:05:06.182099 + * On: 2021-03-16 20:29:56.647545 **/ /* Character Mapping Table: @@ -61,207 +62,230 @@ * ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1 * even though they are both used for French. Same for the euro sign. */ -static const unsigned char Tis_620_CharToOrderMap[] = +static const unsigned char Iso_8859_11_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 66, 70, 67, 80, 78, 87, 85, 73, 79, 93, 88, 84, 68, 77, 81, /* 4X */ - 75,101, 74, 61, 71, 86, 96, 90,103,100, 99,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 35, 64, 48, 52, 32, 60, 65, 54, 36, 97, 76, 46, 56, 41, 40, /* 6X */ - 59,104, 43, 45, 44, 55, 72, 82, 94, 57, 92,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 66, 74, 57, 76, 86, 83, 87, 80, 64, 95, 94, 79, 78, 77, 82, /* 4X */ + 75,104, 72, 56, 71, 93, 81, 89,103, 96, 97,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 37, 73, 51, 54, 38, 68, 67, 49, 39, 99, 84, 48, 58, 42, 44, /* 6X */ + 65,101, 45, 47, 46, 50, 70, 85,100, 61, 92,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - ILL, 3, 23,105, 15,106, 89, 5, 21, 63, 26, 31,102, 42, 69, 58, /* AX */ - 49, 91, 83, 34, 9, 17, 30, 12, 39, 1, 16, 19, 33, 62, 22, 47, /* BX */ - 38, 7, 10, 2, 50, 11,107, 8, 28, 37, 13, 18, 98, 4, 53, 95, /* CX */ - 14,SYM, 0, 29,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,ILL,ILL,ILL,SYM, /* DX */ - 6, 20, 27, 24, 25,108, 51,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,109, /* EX */ + SYM, 3, 25,106, 15,107, 88, 6, 22, 55, 23, 29,102, 41, 69, 59, /* AX */ + 40, 90, 63, 35, 11, 14, 32, 13, 33, 1, 17, 18, 31, 62, 21, 43, /* BX */ + 34, 7, 9, 2, 53, 10,108, 8, 26, 36, 12, 20, 91, 4, 52, 98, /* CX */ + 16,SYM, 0, 30,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,ILL,ILL,ILL,SYM, /* DX */ + 5, 19, 27, 24, 28,105, 60,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,109, /* EX */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,110,111,ILL,ILL,ILL,ILL, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const unsigned char Iso_8859_11_CharToOrderMap[] = +static const unsigned char Tis_620_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 66, 70, 67, 80, 78, 87, 85, 73, 79, 93, 88, 84, 68, 77, 81, /* 4X */ - 75,101, 74, 61, 71, 86, 96, 90,103,100, 99,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 35, 64, 48, 52, 32, 60, 65, 54, 36, 97, 76, 46, 56, 41, 40, /* 6X */ - 59,104, 43, 45, 44, 55, 72, 82, 94, 57, 92,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 66, 74, 57, 76, 86, 83, 87, 80, 64, 95, 94, 79, 78, 77, 82, /* 4X */ + 75,104, 72, 56, 71, 93, 81, 89,103, 96, 97,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 37, 73, 51, 54, 38, 68, 67, 49, 39, 99, 84, 48, 58, 42, 44, /* 6X */ + 65,101, 45, 47, 46, 50, 70, 85,100, 61, 92,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM, 3, 23,112, 15,113, 89, 5, 21, 63, 26, 31,102, 42, 69, 58, /* AX */ - 49, 91, 83, 34, 9, 17, 30, 12, 39, 1, 16, 19, 33, 62, 22, 47, /* BX */ - 38, 7, 10, 2, 50, 11,114, 8, 28, 37, 13, 18, 98, 4, 53, 95, /* CX */ - 14,SYM, 0, 29,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,ILL,ILL,ILL,SYM, /* DX */ - 6, 20, 27, 24, 25,115, 51,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,116, /* EX */ - NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,117,118,ILL,ILL,ILL,ILL, /* FX */ + ILL, 3, 25,112, 15,113, 88, 6, 22, 55, 23, 29,102, 41, 69, 59, /* AX */ + 40, 90, 63, 35, 11, 14, 32, 13, 33, 1, 17, 18, 31, 62, 21, 43, /* BX */ + 34, 7, 9, 2, 53, 10,114, 8, 26, 36, 12, 20, 91, 4, 52, 98, /* CX */ + 16,SYM, 0, 30,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,ILL,ILL,ILL,SYM, /* DX */ + 5, 19, 27, 24, 28,105, 60,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,115, /* EX */ + NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,116,117,ILL,ILL,ILL,ILL, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ +static const int Unicode_Char_size = 64; +static const unsigned int Unicode_CharOrder[] = +{ + 67, 57, 83, 56, 97, 37, 99, 51, 100, 54, 101, 38, 104, 49, 105, 39, + 108, 48, 109, 58, 110, 42, 111, 44, 114, 45, 115, 47, 116, 46, 117, 50, + 121, 61, 3585, 3, 3586, 25, 3588, 15, 3591, 6, 3592, 22, 3593, 55,3594, 23, + 3595, 29, 3597, 41, 3599, 59, 3600, 40, 3602, 63, 3603, 35, 3604, 11,3605, 14, + 3606, 32, 3607, 13, 3608, 33, 3609, 1, 3610, 17, 3611, 18, 3612, 31,3613, 62, + 3614, 21, 3615, 43, 3616, 34, 3617, 7, 3618, 9, 3619, 2, 3620, 53,3621, 10, + 3623, 8, 3624, 26, 3625, 36, 3626, 12, 3627, 20, 3629, 4, 3630, 52,3632, 16, + 3634, 0, 3635, 30, 3648, 5, 3649, 19, 3650, 27, 3651, 24, 3652, 28,3654, 60, +}; + /* Model Table: - * Total sequences: 2324 - * First 512 sequences: 0.8815720594354438 - * Next 512 sequences (512-1024): 0.0920860122682917 - * Rest: 0.026341928296264486 + * Total sequences: 2704 + * First 512 sequences: 0.8690353564146914 + * Next 512 sequences (512-1024): 0.09940380137019393 + * Rest: 0.03156084221511464 * Negative sequences: TODO */ static const PRUint8 ThaiLangModel[] = { - 0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,3, - 0,2,3,0,0,3,2,3,0,0,2,0,0,0,0,2,0,1,1,1,0,2,0,0,0,0,1,0,0,0,1,1, - 3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3, - 0,3,0,0,0,1,3,3,0,0,1,0,0,0,0,2,0,2,1,2,0,1,0,0,0,0,0,0,0,0,2,1, - 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,3,3,3,3,3,3,2,2,2,3,1,3,2, - 0,2,3,0,0,2,2,1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,2,1, - 3,3,3,3,3,2,3,3,3,3,2,3,3,3,2,3,2,3,3,3,3,3,3,3,3,3,2,3,2,3,2,3, - 0,2,1,0,0,3,2,1,0,0,0,0,0,0,0,1,0,3,3,1,0,1,0,0,0,0,3,0,0,0,1,1, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,2,2,2,3,3,2,2,1,2,2,2, - 0,2,0,0,0,0,2,2,0,0,1,0,0,0,0,2,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1, - 3,3,3,3,3,2,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2, - 0,3,0,0,0,1,2,2,0,0,1,0,0,0,0,2,0,1,1,2,0,2,0,0,0,0,0,0,0,0,2,1, - 0,3,3,3,3,2,0,3,3,3,3,3,3,3,0,3,3,3,3,3,0,3,3,3,0,0,3,0,3,0,1,3, - 0,2,0,0,0,2,2,2,0,0,0,0,0,0,0,3,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,3, - 3,3,3,3,3,2,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3,2,3,3,3,3,2,2,1,0,2,1, - 0,2,2,0,1,2,2,1,0,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,0,1,1, - 3,3,3,3,3,3,3,3,2,3,3,3,3,2,2,2,3,2,2,2,3,3,3,2,2,2,2,2,2,0,2,2, - 0,1,2,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,3,1,0,1,0,0,0,0,0,0,0,0,1,1, - 3,3,3,3,3,3,3,2,3,2,3,3,3,3,0,3,2,3,2,2,3,2,2,3,3,3,2,2,1,3,2,1, - 0,1,0,0,0,0,2,1,0,0,0,0,0,0,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,0,1,1, - 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,2,1,2,2, - 0,2,0,0,0,0,3,1,0,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,0,1,1, - 3,3,2,3,3,3,3,3,3,3,2,3,3,3,3,2,2,3,2,2,2,2,1,3,2,2,2,2,1,3,1,2, - 0,1,0,0,0,0,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,1, - 3,3,3,1,2,1,2,1,2,3,3,1,1,2,2,3,2,1,2,1,1,1,2,1,1,1,1,1,3,3,0,1, - 0,0,0,0,0,1,1,3,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,2,3,3,3,3,3,3,2,3,2,2,2,2,3,3,3,2,2,1,1,1,2,2,1,2,1,3,3,2, - 0,1,0,0,0,0,2,2,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, - 0,3,3,3,3,1,3,3,3,3,3,2,3,3,0,3,3,3,3,3,3,3,3,2,3,3,3,3,2,0,2,2, - 0,2,1,0,0,0,2,2,0,0,1,0,0,0,0,1,0,1,1,0,0,2,0,0,0,0,1,0,0,0,1,1, - 3,3,3,1,3,2,2,3,3,2,2,3,1,1,2,2,1,2,1,2,1,3,1,1,1,1,1,2,0,3,0,1, - 0,0,2,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0, - 3,3,3,3,3,1,3,2,3,3,2,3,3,3,1,3,3,3,3,3,3,2,2,2,3,3,2,2,2,2,2,2, - 0,2,0,0,0,0,2,1,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1, - 3,3,3,3,3,1,2,1,2,1,3,2,2,2,3,1,2,2,1,1,2,1,1,2,2,1,1,2,1,3,3,1, - 0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0, - 3,3,3,1,2,1,0,3,3,1,2,3,1,1,1,0,0,3,1,1,0,0,1,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,2,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,2,3,3,3,1,2,1,2,2,2,3,2,2,2,1,1,2,1,2,2,2,1,1,2,2,1,1,1,0,2,1, - 0,1,0,0,0,0,1,1,0,0,1,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,3,0,0,0,0,0, - 0,3,3,3,3,1,0,3,2,2,2,3,3,3,0,3,3,3,3,3,0,1,2,2,0,0,1,0,0,0,3,3, - 0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0, - 3,3,3,3,3,1,3,2,2,2,1,1,2,2,3,2,1,2,1,1,2,3,3,2,2,2,1,2,0,3,1,2, - 0,1,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, - 3,1,3,2,3,1,2,2,3,2,3,3,3,2,0,1,3,1,1,1,2,2,1,2,1,1,1,1,1,1,1,0, - 0,1,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,1,1,3,0,1,1,2,1,2,1,2,1,0,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1,1,1, - 0,0,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,3,0,3,0,0,0,0,0,2,1,0,0,2,0,1,1,3,3,1,0,3,0,0,0,0,3,0,0,0,0,0, - 0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,1,3,2,2,0,0,3,3,3,0,2,3,1,0,2,2,2,2,3,0,1,1,3,0,0,1,0,0,0,1,2, - 0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0, - 3,3,1,2,3,1,2,2,2,1,2,2,2,2,1,1,2,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1, - 0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, - 0,3,3,2,3,0,0,2,1,3,2,3,3,1,0,3,2,3,1,2,0,2,2,1,0,0,1,0,1,0,1,2, - 0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1, - 3,3,2,2,2,0,2,2,2,1,2,1,2,2,0,1,1,2,1,1,2,2,1,2,2,2,1,1,1,0,1,1, - 0,0,0,0,0,2,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,0, - 0,3,3,3,2,2,3,2,2,2,1,3,2,2,0,3,2,2,3,1,3,1,2,2,3,2,1,2,1,0,2,1, - 0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0, - 3,2,1,1,2,1,2,2,2,1,1,2,2,1,1,1,2,1,1,1,2,1,1,1,2,1,1,1,1,0,1,0, - 0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, - 3,3,1,1,3,2,2,1,1,1,1,2,1,0,1,1,1,2,0,1,1,0,0,0,0,1,1,1,0,0,0,1, - 0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, - 2,0,0,2,2,0,0,0,2,3,0,3,2,3,3,0,2,0,0,0,2,0,1,2,2,1,0,2,2,1,0,0, - 1,2,0,1,0,1,1,1,1,1,2,3,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0, + 1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,2, + 2,3,2,3,3,0,0,0,1,2,0,2,0,0,0,0,0,0,0,0,2,1,0,1,0,0,0,1,0,0,1,0, + 3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3, + 3,3,3,1,1,0,0,0,2,1,0,2,0,0,0,0,0,0,0,0,1,2,0,1,0,0,0,0,1,0,2,0, + 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,2,3,2,2,1,2, + 3,1,2,3,3,0,0,0,2,1,0,2,0,0,0,0,0,0,0,0,1,1,0,2,0,0,0,1,0,0,1,1, + 3,3,3,3,3,3,2,3,3,2,3,3,3,3,3,3,2,3,3,3,2,2,3,2,3,3,2,3,3,3,3,2, + 2,1,2,1,3,0,0,0,2,1,0,3,0,0,0,0,0,0,0,0,1,3,0,1,1,0,0,2,1,0,1,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,3,2,1,2,3,2,3,2, + 2,2,2,0,1,0,0,0,1,1,0,3,0,0,0,0,0,0,0,0,2,1,0,1,0,0,0,0,0,0,1,0, + 0,3,3,3,3,0,2,3,3,3,3,3,3,3,3,3,0,3,3,1,3,3,3,3,0,3,3,0,0,3,0,2, + 1,3,2,2,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,2,0,0,3,0,0,0,1,0,0,1,0, + 3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,2,3,3,3,1,3, + 3,1,2,0,1,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,0,2,0, + 3,3,3,3,3,3,2,3,3,2,2,3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,2,3,3,1,0,2, + 2,1,3,2,2,0,0,0,1,1,0,2,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,1,0, + 3,3,3,3,3,3,3,3,2,3,3,2,2,3,2,2,2,3,2,2,2,2,3,2,2,2,1,3,3,2,0,1, + 1,0,1,2,0,0,0,0,0,2,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0, + 3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,1,2, + 2,2,2,1,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,1,0,2,0, + 3,3,2,3,3,3,3,2,2,2,3,3,3,3,2,2,3,2,3,2,2,1,2,3,2,2,1,2,2,2,2,1, + 1,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0, + 3,3,3,3,3,3,3,2,2,3,3,2,3,3,3,2,1,2,2,3,2,2,3,2,3,3,1,2,2,2,2,2, + 1,1,2,0,1,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,0,1,0, + 3,3,3,3,3,3,3,3,3,1,2,3,2,2,3,1,2,2,2,2,3,1,1,1,2,1,1,3,2,2,3,1, + 3,2,3,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0, + 3,3,3,1,3,2,1,2,3,3,1,3,2,1,1,3,2,2,1,1,3,2,1,1,1,1,3,1,1,1,3,1, + 0,3,1,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,1,2,3,3,3,1,3,2,2,1,3,3,1,2,1,1,2,2,2,2,1,1,2,1,2,2, + 1,1,2,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0, + 3,3,3,2,3,3,2,3,3,2,3,2,2,2,2,2,2,2,1,2,2,1,1,1,2,1,1,3,1,3,3,1, + 1,1,1,3,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,1,0, + 0,3,3,3,3,3,1,3,3,3,2,3,3,3,3,3,1,3,3,3,3,3,3,3,3,2,2,3,3,2,0,2, + 2,3,2,1,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,2,1,0,1,0,0,0,0,0,0,1,0, + 3,3,3,3,3,3,1,2,3,2,2,3,3,3,3,3,1,3,3,3,2,2,2,2,2,2,1,3,3,2,1,3, + 1,1,2,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,2,1,0,1,0, + 3,3,3,3,3,2,1,1,2,2,3,2,2,2,2,1,2,1,2,2,1,1,2,1,2,1,1,2,2,1,0,1, + 2,1,1,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,3,0,0,1,0, + 0,3,3,3,3,0,1,3,2,2,3,3,3,3,3,3,0,3,3,0,3,3,2,2,0,3,0,0,0,2,0,3, + 2,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0, + 3,3,3,2,1,0,1,3,3,2,3,1,1,1,3,1,1,1,2,0,0,1,0,0,0,0,1,0,0,0,0,0, + 0,0,2,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,2,3,1,2,2,1,2,2,3,3,1,2,2,1,1,1,3,1,2,1,1,1,2,2,2,2,1,1,2,1,1, + 1,1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,3,0,0,0,0,0,0,0,0,1,0, + 3,3,3,2,3,2,1,3,1,1,2,2,2,2,2,1,3,2,1,2,1,2,3,1,2,2,1,1,2,1,3,1, + 1,1,0,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0, + 3,3,1,2,3,2,1,2,2,2,2,1,2,2,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, + 1,1,1,1,1,0,0,0,0,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, + 0,3,0,2,0,0,0,0,0,1,0,2,2,0,3,1,0,2,0,0,3,0,3,3,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, + 3,3,1,1,3,1,1,1,2,2,1,1,1,1,3,1,1,2,1,1,1,1,1,1,1,1,0,1,1,1,1,0, + 1,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,2,3,2,2,2,1,2,2,2,2,1,2,2,3,1,1,2,1,2,1,1,3,1,2,2,1,1,2,2,0,1, + 1,0,1,0,2,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0, + 0,3,3,2,3,0,1,3,1,2,3,3,1,2,3,3,0,2,3,0,1,3,2,1,0,1,1,0,0,2,0,1, + 1,1,1,1,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0, + 0,2,3,2,2,0,1,3,3,0,2,3,1,3,2,2,0,2,3,0,2,2,1,1,0,2,1,0,0,2,0,0, + 1,1,0,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0, + 3,2,1,1,2,2,1,2,1,1,2,1,1,1,2,1,1,2,1,1,1,1,0,1,0,0,0,2,1,1,0,1, + 0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,3,3,3,2,3,2,2,2,1,3,2,2,2,1,3,0,2,2,3,3,1,2,1,3,2,1,1,2,1,0,1, + 1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0, + 1,2,0,1,1,0,1,1,1,2,3,1,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,1,2,2,1,1,1,1,1,1,1,1,2,2,3,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1,1, - 0,0,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0, - 1,0,0,1,2,0,0,0,1,3,0,3,3,2,3,0,2,0,0,0,2,0,1,1,2,2,0,2,1,1,0,0, - 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,0,0,3,1,0,0,0,3,3,0,2,3,3,2,0,3,0,0,0,2,0,1,1,2,0,0,1,1,0,0,0, - 3,1,1,2,1,0,1,1,1,1,2,0,2,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1,1, - 0,1,3,0,0,1,2,0,0,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,1,0,0,0,1,0, - 3,0,2,1,1,0,0,1,0,0,1,0,2,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,1,3,1,2,1,1,2,1,1,1,0,1,1,0,1,1,1,1,1,1,0,0,1,1,1,0,1,1,1,0,0, - 0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,0,0,1,1,0,0,0,1,3,0,3,2,2,2,0,2,0,0,0,2,0,1,2,2,1,0,2,3,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0, - 3,0,0,2,2,0,0,0,2,2,0,1,3,2,1,0,2,0,0,0,3,0,1,1,1,1,0,0,1,0,0,0, - 3,1,1,1,1,0,2,1,1,0,0,1,2,1,0,1,1,1,2,1,1,1,1,1,2,1,2,1,1,0,1,1, - 0,0,0,0,0,0,1,0,0,0,3,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1, + 3,2,1,1,2,2,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1, + 1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,2,3,1,2,1,1,2,1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0, + 0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0, + 3,1,1,1,1,0,0,0,0,1,0,0,1,1,1,1,1,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0, + 0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0, + 3,1,2,2,1,1,1,1,1,1,1,1,2,1,1,1,3,1,2,1,2,1,1,1,1,1,2,1,1,1,0,0, + 0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, + 3,1,1,1,1,1,0,1,1,2,1,1,1,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1, + 1,0,2,3,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,1,1,2,0,0,3,0,1,3,3,2,3,2,1,2,0,0,2,0,0,1,2,0,0,2,0,0, + 0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0, + 0,0,0,0,0,2,1,2,0,0,3,1,1,3,2,3,2,1,1,2,0,0,2,0,0,0,2,0,0,1,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,0,3,3,0,0,0,2,2,0,2,2,2,1,0,2,0,0,0,2,0,1,1,1,2,0,1,1,0,0,0, + 0,0,0,0,0,2,2,0,0,0,3,0,3,2,3,3,2,1,1,3,0,0,2,0,0,0,2,0,0,1,0,0, + 3,1,1,2,2,2,0,2,1,1,1,1,2,1,1,1,0,3,2,2,1,1,1,1,1,1,1,1,1,1,0,1, + 1,2,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0, + 3,1,1,2,1,2,0,1,1,1,1,1,1,1,1,1,1,1,1,2,2,1,1,2,1,1,0,1,1,1,0,0, + 1,1,1,0,0,0,0,0,1,2,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,0,2,3,0,0,0,2,1,0,2,2,2,1,0,1,0,0,0,1,0,3,2,1,2,0,1,1,0,0,0, - 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,0,0,1,2,0,0,0,2,1,0,1,3,2,1,0,2,0,0,0,1,0,2,1,1,1,0,1,0,0,0,0, + 0,0,0,0,0,3,2,2,0,0,1,0,2,1,3,2,1,1,1,2,0,0,2,0,0,1,1,0,0,1,0,0, + 2,2,2,1,3,3,0,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0, + 0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,0,2,2,0,0,0,2,2,0,0,1,1,2,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,0,0, - 1,1,3,2,2,0,2,1,1,1,1,2,1,1,0,1,1,2,1,0,1,1,1,1,1,1,1,1,0,0,0,1, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,0,0,2,2,0,0,0,2,0,0,1,2,1,1,0,1,0,0,0,0,0,2,1,0,1,0,0,0,0,0,0, - 3,1,1,1,2,0,1,2,1,0,0,0,1,2,0,1,2,1,1,1,1,0,0,0,1,1,0,1,1,0,0,1, - 0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, - 0,0,0,3,0,0,0,0,0,2,0,0,1,0,0,1,0,2,2,0,0,1,0,0,0,0,0,0,2,0,1,0, - 0,0,0,0,0,3,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,1,1,0,1,1,1,0,0,0,1,0,0,1,0,1,0,0,1,0,1,1,1,1,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,1,1,1,0,0,3,0,2,3,2,1,2,1,2,2,0,0,2,0,0,0,2,0,0,1,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, + 0,0,0,0,0,3,3,3,0,0,2,0,2,1,2,2,1,1,2,2,0,0,2,0,0,1,1,0,0,2,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,0,0,2,2,0,0,0,2,0,0,1,0,1,1,0,1,0,0,0,1,0,1,1,1,2,0,0,2,0,0,0, - 2,1,1,0,2,0,2,1,1,1,1,2,1,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,2,3,3,0,0,1,0,2,2,1,1,1,2,2,1,0,0,1,0,0,1,1,0,0,2,0,0, + 0,0,1,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0, + 0,0,0,0,0,1,2,2,0,0,1,1,2,1,3,2,1,2,1,1,0,0,1,0,0,0,1,0,0,1,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,0,2,2,0,0,0,2,1,0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0, + 0,0,0,0,0,2,2,2,0,0,1,0,2,0,2,1,2,0,1,1,0,0,1,0,0,0,1,0,0,1,0,0, + 0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,2,3,2,0,0,1,0,2,2,1,1,1,1,2,1,0,0,1,0,0,0,1,0,0,1,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,0,0,1,1,0,0,0,0,2,0,2,2,2,2,0,2,0,0,0,2,0,1,0,1,1,0,1,1,1,0,0, + 0,0,0,0,0,2,1,1,0,0,2,0,1,2,2,2,2,1,0,1,0,0,1,0,0,0,2,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,0,0,2,2,0,0,0,2,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,2,0,1,0,0,0,0, - 0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0, - 1,0,0,1,1,0,0,0,1,1,0,0,1,2,1,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0, - 1,0,1,2,1,0,1,1,1,1,0,1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,0,0,0,1,1, + 0,0,0,0,0,2,2,2,0,0,0,0,2,1,2,1,1,2,1,1,0,0,0,0,0,1,0,0,0,1,0,0, + 2,1,1,1,2,1,0,1,1,1,1,1,1,0,1,0,1,0,1,1,0,0,1,1,0,0,0,1,1,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,1,0,1,0,0,0,1,0,0,0,2,0,2,2,1,0,0,0,0,1,1,0,1,0,0,2,0,0,0,0,0, + 0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,2,2,2,0,0,1,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,0,0,1,0,0, + 2,1,1,1,0,0,1,1,1,1,2,1,0,0,0,0,1,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,0,0,2,1,0,0,0,2,0,0,2,1,1,2,0,0,0,0,0,0,0,2,1,1,2,0,1,0,0,0,0, + 0,0,0,0,0,1,1,1,0,0,1,0,1,1,2,0,1,1,1,1,0,0,0,0,1,1,1,0,0,1,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,0,0,1,2,0,0,0,2,1,0,2,1,0,0,0,0,0,0,0,0,0,0,2,0,1,0,0,2,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,0,0,1,1,0,0,0,1,0,0,0,2,0,0,0,2,0,0,0,0,0,1,1,1,1,0,1,1,1,0,0, - 0,1,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,2,1,1,0,0,0,0,3,1,0,0,1,2,1,0,0,0,0,0,1,1,0,0,0,1,0,0, + 0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,2,2,2,0,0,1,0,2,1,1,1,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0, + 1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,0,1,1,0,1,1,1,0,0, + 1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, + 0,0,0,0,1,1,0,1,1,0,0,1,1,1,1,1,0,0,1,1,1,0,1,0,1,1,0,1,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,1,1,0,0,0,0,1,1,1,1,2,0,0,1,0,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, + 0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,2,1,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,1,0,0,0,1,0,0,1,0,0, + 1,1,2,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,3,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, + 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, }; +const SequenceModel Iso_8859_11ThaiModel = +{ + Iso_8859_11_CharToOrderMap, + ThaiLangModel, + 64, + (float)0.8690353564146914, + PR_FALSE, + "ISO-8859-11", + "th" +}; + const SequenceModel Tis_620ThaiModel = { Tis_620_CharToOrderMap, ThaiLangModel, 64, - (float)0.8815720594354438, + (float)0.8690353564146914, PR_FALSE, "TIS-620", "th" }; -const SequenceModel Iso_8859_11ThaiModel = +const LanguageModel ThaiModel = { - Iso_8859_11_CharToOrderMap, + "th", + Unicode_CharOrder, + 64, ThaiLangModel, 64, - (float)0.8815720594354438, - PR_FALSE, - "ISO-8859-11", - "th" + (float)0.8690353564146914, }; diff --git a/src/LangModels/LangTurkishModel.cpp b/src/LangModels/LangTurkishModel.cpp index 16c133f..c1b16c1 100644 --- a/src/LangModels/LangTurkishModel.cpp +++ b/src/LangModels/LangTurkishModel.cpp @@ -36,12 +36,13 @@ * ***** END LICENSE BLOCK ***** */ #include "../nsSBCharSetProber.h" +#include "../nsLanguageDetector.h" /********* Language model for: Turkish *********/ /** * Generated by BuildLangModel.py - * On: 2015-12-04 02:24:44.730727 + * On: 2021-03-16 20:34:51.083622 **/ /* Character Mapping Table: @@ -67,18 +68,18 @@ static const unsigned char Iso_8859_3_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 15, 21, 7, 1, 26, 22, 19, 6, 28, 9, 5, 11, 3, 14, /* 4X */ - 23, 34, 4, 10, 8, 12, 20, 29, 32, 13, 18,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 15, 21, 7, 1, 26, 22, 19, 2, 28, 9, 5, 11, 3, 14, /* 6X */ - 23, 34, 4, 10, 8, 12, 20, 29, 32, 13, 18,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 0, 15, 22, 9, 1, 27, 21, 19, 6, 28, 7, 5, 11, 3, 14, /* 4X */ + 23, 35, 4, 10, 8, 12, 18, 29, 33, 13, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 15, 22, 9, 1, 27, 21, 19, 2, 28, 7, 5, 11, 3, 14, /* 6X */ + 23, 35, 4, 10, 8, 12, 18, 29, 33, 13, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM, 48,SYM,SYM,SYM,ILL, 49,SYM,SYM, 2, 17, 25, 50,SYM,ILL, 51, /* AX */ - SYM, 52,SYM,SYM,SYM,SYM, 53,SYM,SYM, 6, 17, 25, 54,SYM,ILL, 55, /* BX */ - 41, 36, 30,ILL, 39, 56, 57, 24, 42, 33, 58, 45, 59, 37, 31, 60, /* CX */ - ILL, 47, 61, 38, 62, 63, 27,SYM, 64, 65, 40, 35, 16, 66, 67, 68, /* DX */ - 41, 36, 30,ILL, 39, 69, 70, 24, 42, 33, 71, 45, 72, 37, 31, 73, /* EX */ - ILL, 47, 74, 38, 75, 76, 27,SYM, 77, 78, 40, 35, 16, 79, 80,SYM, /* FX */ + SYM, 54,SYM,SYM,SYM,ILL, 55,SYM,SYM, 2, 17, 26, 56,SYM,ILL, 50, /* AX */ + SYM, 57,SYM,SYM,SYM,SYM, 58,SYM,SYM, 6, 17, 26, 59,SYM,ILL, 50, /* BX */ + 48, 36, 30,ILL, 39, 60, 61, 24, 40, 34, 62, 44, 63, 37, 31, 64, /* CX */ + ILL, 41, 52, 38, 46, 51, 25,SYM, 65, 66, 45, 32, 16, 67, 68, 69, /* DX */ + 48, 36, 30,ILL, 39, 70, 71, 24, 40, 34, 72, 44, 73, 37, 31, 74, /* EX */ + ILL, 41, 52, 38, 46, 51, 25,SYM, 75, 76, 45, 32, 16, 77, 78,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -88,67 +89,78 @@ static const unsigned char Iso_8859_9_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 15, 21, 7, 1, 26, 22, 19, 6, 28, 9, 5, 11, 3, 14, /* 4X */ - 23, 34, 4, 10, 8, 12, 20, 29, 32, 13, 18,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 15, 21, 7, 1, 26, 22, 19, 2, 28, 9, 5, 11, 3, 14, /* 6X */ - 23, 34, 4, 10, 8, 12, 20, 29, 32, 13, 18,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 0, 15, 22, 9, 1, 27, 21, 19, 6, 28, 7, 5, 11, 3, 14, /* 4X */ + 23, 35, 4, 10, 8, 12, 18, 29, 33, 13, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 15, 22, 9, 1, 27, 21, 19, 2, 28, 7, 5, 11, 3, 14, /* 6X */ + 23, 35, 4, 10, 8, 12, 18, 29, 33, 13, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 81,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 41, 36, 30, 44, 39, 82, 46, 24, 42, 33, 83, 45, 84, 37, 31, 85, /* CX */ - 25, 47, 86, 38, 87, 88, 27,SYM, 43, 89, 40, 35, 16, 2, 17, 90, /* DX */ - 41, 36, 30, 44, 39, 91, 46, 24, 42, 33, 92, 45, 93, 37, 31, 94, /* EX */ - 25, 47, 95, 38, 96, 97, 27,SYM, 43, 98, 40, 35, 16, 6, 17, 99, /* FX */ + SYM,SYM,SYM,SYM,SYM, 79,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 48, 36, 30, 47, 39, 42, 49, 24, 40, 34, 80, 44, 81, 37, 31, 82, /* CX */ + 26, 41, 52, 38, 46, 83, 25,SYM, 43, 84, 45, 32, 16, 2, 17, 85, /* DX */ + 48, 36, 30, 47, 39, 42, 49, 24, 40, 34, 86, 44, 87, 37, 31, 88, /* EX */ + 26, 41, 52, 38, 46, 89, 25,SYM, 43, 90, 45, 32, 16, 6, 17, 53, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ +static const int Unicode_Char_size = 66; +static const unsigned int Unicode_CharOrder[] = +{ + 65, 0, 66, 15, 67, 22, 68, 9, 69, 1, 70, 27, 71, 21, 72, 19, + 73, 2, 73, 6, 74, 28, 75, 7, 76, 5, 77, 11, 78, 3, 79, 14, + 80, 23, 82, 4, 83, 10, 84, 8, 85, 12, 86, 18, 87, 29, 89, 13, + 90, 20, 97, 0, 98, 15, 99, 22, 100, 9, 101, 1, 102, 27,103, 21, + 104, 19, 105, 2, 106, 28, 107, 7, 108, 5, 109, 11, 110, 3,111, 14, + 112, 23, 114, 4, 115, 10, 116, 8, 117, 12, 118, 18, 119, 29,121, 13, + 122, 20, 194, 30, 199, 24, 206, 31, 214, 25, 219, 32, 220, 16,226, 30, + 231, 24, 238, 31, 246, 25, 251, 32, 252, 16, 286, 26, 287, 26,305, 6, + 350, 17, 351, 17, +}; + /* Model Table: - * Total sequences: 935 - * First 512 sequences: 0.991865243864388 - * Next 512 sequences (512-1024): 0.008134756135611957 - * Rest: 2.949029909160572e-17 + * Total sequences: 1097 + * First 512 sequences: 0.9923593121944019 + * Next 512 sequences (512-1024): 0.007545326169453709 + * Rest: 9.536163614441446e-05 * Negative sequences: TODO */ static const PRUint8 TurkishLangModel[] = { - 3,2,3,3,3,3,2,3,3,3,3,3,3,3,2,3,0,3,3,3,3,3,3,3,3,3,3,0,3,3,0,2,2,2,2,0, - 3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,3,2,0,3,0,2,0, - 3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,0,2,2,2,0,2,0,2,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,0,3,2,2,2,2,2,2,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,3,2,2,2,2,2,2,2, - 3,3,3,2,2,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,2,3,0,3,2,2,2,2,3,0,2,2,2, - 3,2,0,3,3,3,3,3,3,3,3,3,2,3,2,3,0,3,3,2,3,3,2,3,2,3,2,0,0,0,0,0,2,0,0,0, - 3,3,3,2,3,3,3,3,2,2,2,2,3,3,3,2,3,0,2,2,2,2,2,2,0,0,0,3,2,3,2,2,0,0,0,0, - 3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,3,3,2,2,2,3,0,2,3,2,2,3,2,2,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,3,2,2,2,2,3,0,2,3,2,2,3,0,0,0,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,3,3,3,2,3,3,0,2,3,0,2,2,0,0,2,2,2, - 3,3,3,2,3,3,3,3,2,2,3,3,3,3,3,3,3,2,3,3,0,3,2,3,2,0,2,2,0,2,3,2,2,2,2,2, - 3,3,3,3,3,3,0,3,3,3,3,3,2,3,2,3,0,3,3,3,3,3,3,3,3,3,2,0,2,2,0,0,2,2,0,0, - 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,2,2,2,3,2,2,0,2,3,0,2,2,0,0,2,0,2, - 3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,3,0,2,2,2,0,0, - 3,3,3,3,3,3,3,3,0,2,2,3,3,3,3,3,3,0,2,2,2,2,0,2,0,0,0,3,2,2,2,0,0,2,0,0, - 2,2,2,3,3,3,0,3,3,3,3,3,0,3,2,3,0,3,3,3,3,3,2,3,3,3,3,0,2,0,0,0,0,0,0,0, - 3,3,3,0,2,3,3,2,3,3,2,3,3,2,2,3,3,2,0,2,2,2,2,2,3,0,2,2,0,0,2,2,0,0,0,0, - 3,3,3,2,2,3,3,3,2,2,0,3,3,3,3,2,3,0,2,2,0,3,3,0,0,0,0,2,0,0,2,2,0,0,0,0, - 3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,2,2,2,2,2,0,2,3,0,2,0,0,2,3,2,0,2,0,2, - 3,3,3,2,3,3,2,2,0,2,3,2,3,3,3,2,2,2,2,2,3,2,2,0,0,0,2,0,0,0,2,2,0,0,0,0, - 3,3,3,2,3,3,3,2,3,3,2,2,3,2,3,2,3,0,2,3,0,2,0,0,0,0,0,2,0,0,2,0,0,2,2,2, - 3,3,3,2,3,3,3,2,2,2,2,0,3,2,3,0,3,0,2,3,2,0,2,2,0,0,2,3,2,2,2,0,0,2,0,0, - 3,3,3,0,3,3,3,2,3,2,3,3,3,2,3,2,2,0,2,3,0,2,2,3,2,0,2,0,0,2,2,0,2,2,0,0, - 3,3,3,0,2,3,3,2,3,2,0,3,3,2,3,2,3,2,0,0,0,0,2,2,0,0,0,3,0,0,0,0,0,0,0,0, - 3,3,3,0,3,3,3,3,0,0,0,3,3,0,0,2,3,2,2,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,2,3,3,3,2,3,2,2,0,3,3,3,2,2,0,0,2,0,2,2,0,2,0,2,2,2,0,2,2,0,0,0,0, - 0,0,0,3,3,3,0,3,3,3,3,3,0,3,0,2,0,2,3,2,2,0,0,2,3,3,2,0,2,0,0,0,0,0,0,0, - 3,3,3,0,0,2,2,2,0,2,0,0,3,0,3,0,2,0,0,0,0,2,2,2,0,0,0,2,0,0,2,0,0,0,0,0, - 3,3,3,2,2,2,0,0,0,2,2,2,2,2,3,2,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,2,0,0, - 0,0,2,3,3,3,0,3,2,2,2,2,0,2,0,2,0,2,2,3,2,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0, - 0,0,0,2,0,2,0,2,2,0,0,2,0,2,0,0,0,2,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, - 3,2,2,0,0,0,2,0,2,0,0,0,0,2,2,0,0,0,0,0,2,0,0,2,0,0,2,0,0,2,0,0,0,0,0,0, - 2,0,2,2,2,2,0,2,2,0,2,2,2,0,0,2,0,0,2,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0, - 2,0,2,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,2,2,0,2,0,0,2,2,0,0,0,2,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,3,3,2,2,0,0, + 3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,0,3,3,3,3,0,0,0, + 3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,2,3,3,2,2,2,2,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,0,3,2,2,2,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,3,3,2,2,3,1, + 3,3,3,2,2,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,2,2,3,2,2,3,3,2, + 2,2,0,3,3,3,3,3,3,3,3,3,2,3,2,3,0,3,2,2,3,2,3,3,2,2,3,3,2,2,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,2,2,2,2,3,3,0,2,2,2,3,2,3, + 3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,0,3,3,2,2,2,2,3,3,0,2,2,2,2,2,0, + 3,3,3,2,3,3,3,2,2,3,3,2,3,3,3,2,3,0,2,2,2,2,2,2,0,3,0,2,2,2,2,2,0, + 3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,0,3,3,2,2,3,3,3,3,0,2,1,2,2,2,2, + 3,3,3,2,3,3,3,2,2,3,3,3,3,3,3,3,3,2,2,3,3,2,3,3,2,3,0,2,2,2,2,3,2, + 3,3,3,3,3,3,0,3,3,3,3,3,2,3,2,3,0,3,3,3,3,3,3,3,3,0,3,3,2,0,0,0,0, + 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,2,2,3,3,2,2,3,0,2,2,2,2,0,2, + 2,2,2,3,3,3,0,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,2,0,3,3,3,3,0,1,0, + 3,3,3,2,3,3,3,2,2,3,2,3,3,3,3,3,3,2,2,2,2,2,2,0,0,3,0,2,2,2,2,2,2, + 2,2,2,3,3,3,0,3,3,3,3,3,0,3,1,3,0,3,3,2,3,2,3,3,3,2,3,3,2,0,0,0,0, + 3,3,3,2,2,3,3,3,3,2,2,3,3,2,2,3,3,1,2,2,0,3,2,2,3,2,0,2,0,0,2,2,1, + 3,3,3,2,3,3,3,2,2,3,2,2,3,3,3,2,2,2,3,2,2,2,3,0,2,0,0,2,2,0,2,2,0, + 3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,2,3,2,2,2,2,0,2,2,3,2,0,2,2,2,3,2,0, + 3,3,3,2,2,3,3,2,2,3,2,3,3,3,3,2,3,0,2,2,2,3,3,2,1,2,0,1,0,2,2,2,1, + 3,3,3,2,3,3,3,2,2,2,2,2,3,2,3,2,3,0,2,3,2,2,1,2,0,3,0,0,0,2,2,0,0, + 3,3,3,2,3,3,3,3,2,2,2,2,3,2,3,3,3,0,0,3,2,2,2,1,0,2,0,2,0,0,2,0,0, + 3,3,3,2,3,3,3,2,3,2,3,3,3,2,3,2,2,1,0,3,2,2,1,2,3,0,0,2,0,0,2,0,0, + 3,3,3,0,2,3,3,2,3,0,2,3,3,2,3,2,3,0,0,2,0,2,2,0,2,2,0,0,0,0,0,0,0, + 0,0,0,3,3,3,0,3,3,3,3,3,0,3,0,2,0,2,2,2,3,2,2,2,2,0,3,2,0,2,0,0,0, + 3,3,3,2,3,3,3,0,1,3,2,3,3,0,2,2,3,0,2,0,2,2,2,0,0,0,0,1,0,0,0,0,0, + 3,3,3,2,3,3,3,3,3,0,2,2,3,3,3,2,2,1,0,2,2,3,2,0,2,2,0,2,1,0,2,2,0, + 3,3,3,2,2,2,2,2,0,2,2,0,3,2,3,0,2,0,0,0,0,2,2,2,0,2,0,1,0,0,0,0,0, + 3,3,3,2,2,2,0,2,0,2,2,2,2,2,2,0,0,2,0,2,0,0,1,2,0,0,0,2,0,2,0,0,0, + 2,2,2,3,2,3,0,2,2,2,2,2,0,2,0,2,0,2,2,2,2,0,0,0,0,0,2,2,0,0,0,2,0, + 0,0,0,2,2,2,0,2,1,2,2,2,0,2,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0, + 2,0,0,2,2,2,0,2,2,2,2,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0, }; @@ -156,8 +168,8 @@ const SequenceModel Iso_8859_3TurkishModel = { Iso_8859_3_CharToOrderMap, TurkishLangModel, - 36, - (float)0.991865243864388, + 33, + (float)0.9923593121944019, PR_FALSE, "ISO-8859-3", "tr" @@ -167,9 +179,19 @@ const SequenceModel Iso_8859_9TurkishModel = { Iso_8859_9_CharToOrderMap, TurkishLangModel, - 36, - (float)0.991865243864388, + 33, + (float)0.9923593121944019, PR_FALSE, "ISO-8859-9", "tr" }; + +const LanguageModel TurkishModel = +{ + "tr", + Unicode_CharOrder, + 66, + TurkishLangModel, + 33, + (float)0.9923593121944019, +}; diff --git a/src/LangModels/LangVietnameseModel.cpp b/src/LangModels/LangVietnameseModel.cpp index 0569887..ad9129a 100644 --- a/src/LangModels/LangVietnameseModel.cpp +++ b/src/LangModels/LangVietnameseModel.cpp @@ -36,12 +36,13 @@ * ***** END LICENSE BLOCK ***** */ #include "../nsSBCharSetProber.h" +#include "../nsLanguageDetector.h" /********* Language model for: Vietnamese *********/ /** * Generated by BuildLangModel.py - * On: 2016-02-13 03:42:06.561440 + * On: 2021-03-16 20:57:28.726718 **/ /* Character Mapping Table: @@ -67,162 +68,179 @@ static const unsigned char Windows_1258_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 6, 17, 3, 22, 21, 66, 5, 1, 4, 75, 24, 14, 8, 0, 9, /* 4X */ - 16, 36, 11, 19, 2, 7, 13, 69, 54, 20, 82,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 6, 17, 3, 22, 21, 66, 5, 1, 4, 75, 24, 14, 8, 0, 9, /* 6X */ - 16, 36, 11, 19, 2, 7, 13, 69, 54, 20, 82,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM,ILL,SYM,101,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,100,ILL,ILL,ILL, /* 8X */ - ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,100,ILL,ILL,102, /* 9X */ + SYM, 6, 18, 3, 21, 24, 71, 5, 1, 4, 78, 22, 14, 8, 0, 9, /* 4X */ + 16, 32, 13, 19, 2, 7, 12, 74, 53, 20, 83,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 6, 18, 3, 21, 24, 71, 5, 1, 4, 78, 22, 14, 8, 0, 9, /* 6X */ + 16, 32, 13, 19, 2, 7, 12, 74, 53, 20, 83,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM,ILL,SYM,105,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 97,ILL,ILL,ILL, /* 8X */ + ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 97,ILL,ILL,104, /* 9X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM,103,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 12, 15, 25, 51, 97,104, 98, 91, 90, 62, 27,105,SYM, 47,106,107, /* CX */ - 10,108,SYM, 33, 29, 46, 93,SYM, 94, 58, 67,109, 96, 18,SYM, 99, /* DX */ - 12, 15, 25, 51, 97,110, 98, 91, 90, 62, 27,111,SYM, 47,112,113, /* EX */ - 10,114,SYM, 33, 29, 46, 93,SYM, 94, 58, 67,115, 96, 18,116,117, /* FX */ + SYM,SYM,SYM,SYM,SYM,107,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 11, 15, 27, 44,101,106, 95, 92, 90, 73, 28,108,SYM, 39,103,102, /* CX */ + 10,100,SYM, 36, 29, 47, 98,SYM, 96, 62, 61,109, 93, 17,SYM, 99, /* DX */ + 11, 15, 27, 44,101,106, 95, 92, 90, 73, 28,110,SYM, 39,103,102, /* EX */ + 10,100,SYM, 36, 29, 47, 98,SYM, 96, 62, 61,111, 93, 17,112,104, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ static const unsigned char Viscii_CharToOrderMap[] = { - CTR,CTR, 88,CTR,CTR, 95, 77,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ - CTR,CTR,CTR,CTR, 80,CTR,CTR,CTR,CTR, 79,CTR,CTR,CTR,CTR, 92,CTR, /* 1X */ + CTR,CTR, 85,CTR,CTR, 91, 77,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ + CTR,CTR,CTR,CTR, 82,CTR,CTR,CTR,CTR, 84,CTR,CTR,CTR,CTR, 94,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 6, 17, 3, 22, 21, 66, 5, 1, 4, 75, 24, 14, 8, 0, 9, /* 4X */ - 16, 36, 11, 19, 2, 7, 13, 69, 54, 20, 82,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 6, 17, 3, 22, 21, 66, 5, 1, 4, 75, 24, 14, 8, 0, 9, /* 6X */ - 16, 36, 11, 19, 2, 7, 13, 69, 54, 20, 82,SYM,SYM,SYM,SYM,CTR, /* 7X */ - 30, 57, 71, 65, 41, 43, 78, 49, 83, 89, 23, 45, 39, 74, 28, 32, /* 8X */ - 53, 60, 84, 31, 37, 40, 38, 59, 42, 81, 44, 73, 35, 72, 48, 76, /* 9X */ - 86, 57, 71, 65, 41, 43, 78, 49, 83, 89, 23, 45, 39, 74, 28, 32, /* AX */ - 53, 60, 84, 87, 46, 31, 38, 59, 42, 56, 52, 55, 70, 46, 40, 18, /* BX */ - 12, 15, 25, 61, 34, 51, 88, 95, 90, 62, 27, 85, 50, 47, 64, 76, /* CX */ - 10, 52, 63, 33, 29, 30, 80, 55, 70, 58, 67, 79, 92, 68, 87, 18, /* DX */ - 12, 15, 25, 61, 34, 51, 26, 77, 90, 62, 27, 85, 50, 47, 64, 73, /* EX */ - 10, 56, 63, 33, 29, 86, 81, 44, 48, 58, 67, 72, 35, 68, 37, 26, /* FX */ + SYM, 6, 18, 3, 21, 24, 71, 5, 1, 4, 78, 22, 14, 8, 0, 9, /* 4X */ + 16, 32, 13, 19, 2, 7, 12, 74, 53, 20, 83,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 6, 18, 3, 21, 24, 71, 5, 1, 4, 78, 22, 14, 8, 0, 9, /* 6X */ + 16, 32, 13, 19, 2, 7, 12, 74, 53, 20, 83,SYM,SYM,SYM,SYM,CTR, /* 7X */ + 31, 60, 68, 64, 42, 51, 76, 46, 80, 89, 23, 38, 43, 72, 26, 30, /* 8X */ + 49, 59, 81, 25, 41, 37, 40, 57, 45, 75, 54, 70, 35, 69, 50, 79, /* 9X */ + 86, 60, 68, 64, 42, 51, 76, 46, 80, 89, 23, 38, 43, 72, 26, 30, /* AX */ + 49, 59, 81, 87, 47, 25, 40, 57, 45, 48, 55, 58, 65, 47, 37, 17, /* BX */ + 11, 15, 27, 56, 33, 44, 85, 91, 90, 73, 28, 88, 52, 39, 67, 79, /* CX */ + 10, 55, 63, 36, 29, 31, 82, 58, 65, 62, 61, 84, 94, 66, 87, 17, /* DX */ + 11, 15, 27, 56, 33, 44, 34, 77, 90, 73, 28, 88, 52, 39, 67, 70, /* EX */ + 10, 48, 63, 36, 29, 86, 75, 54, 50, 62, 61, 69, 35, 66, 41, 34, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ +static const int Unicode_Char_size = 108; +static const unsigned int Unicode_CharOrder[] = +{ + 65, 6, 66, 18, 67, 3, 68, 21, 69, 24, 71, 5, 72, 1, 73, 4, + 75, 22, 76, 14, 77, 8, 78, 0, 79, 9, 80, 16, 81, 32, 82, 13, + 83, 19, 84, 2, 85, 7, 86, 12, 88, 53, 89, 20, 97, 6, 98, 18, + 99, 3, 100, 21, 101, 24, 103, 5, 104, 1, 105, 4, 107, 22, 108, 14, + 109, 8, 110, 0, 111, 9, 112, 16, 113, 32, 114, 13, 115, 19, 116, 2, + 117, 7, 118, 12, 120, 53, 121, 20, 192, 11, 193, 15, 194, 27, 202, 28, + 204, 52, 205, 39, 211, 36, 212, 29, 224, 11, 225, 15, 226, 27, 234, 28, + 236, 52, 237, 39, 243, 36, 244, 29, 258, 44, 259, 44, 272, 10, 273, 10, + 416, 47, 417, 47, 431, 17, 432, 17, 7840, 31, 7841, 31, 7842, 33,7843, 33, + 7844, 42, 7845, 42, 7846, 51, 7847, 51, 7852, 46, 7853, 46, 7870, 23,7871, 23, + 7872, 38, 7873, 38, 7874, 43, 7875, 43, 7878, 26, 7879, 26, 7882, 45,7883, 45, + 7888, 30, 7889, 30, 7890, 49, 7891, 49, 7896, 25, 7897, 25, 7898, 37,7899, 37, + 7900, 40, 7901, 40, 7906, 41, 7907, 41, 7908, 50, 7909, 50, 7910, 35,7911, 35, + 7918, 34, 7919, 34, 7920, 48, 7921, 48, +}; + /* Model Table: - * Total sequences: 1494 - * First 512 sequences: 0.9321889118082535 - * Next 512 sequences (512-1024): 0.06092051479986333 - * Rest: 0.0068905733918831966 + * Total sequences: 1890 + * First 512 sequences: 0.9336493792477815 + * Next 512 sequences (512-1024): 0.05889427825209051 + * Rest: 0.007456342500128027 * Negative sequences: TODO */ static const PRUint8 VietnameseLangModel[] = { - 3,3,3,3,3,3,3,2,2,3,0,2,3,1,1,1,1,2,3,3,2,3,3,3,2,1,2, - 3,0,3,2,2,2,3,1,0,1,1,2,0,0,1,0,1,0,2,2,1,0,0,0,3,0,0,2, - 2,1,2,0,3,0,3,3,2,3,0,2,3,0,2,3,0,0,3,1,3,3,1,3,1,3,3, - 3,3,3,3,3,3,3,3,3,0,3,3,3,2,3,3,3,3,2,3,3,3,3,3,2,3,2,0, - 2,3,2,2,3,1,3,3,1,3,1,3,3,2,2,3,2,0,3,2,2,3,1,3,0,3,0, - 3,1,3,3,3,3,2,3,2,0,0,2,1,2,2,2,2,0,0,1,3,2,3,2,2,2,2,0, - 2,3,2,2,3,0,3,3,2,3,0,2,2,1,2,3,1,1,2,2,2,3,1,0,2,2,0, - 0,0,3,2,3,2,3,3,3,1,1,2,0,0,2,0,3,0,0,2,0,2,2,0,2,3,1,1, - 3,1,3,3,3,3,3,2,3,3,1,3,2,2,3,3,2,2,0,3,1,3,3,3,2,0,3, - 3,3,1,0,0,3,1,3,0,2,0,2,3,3,2,0,0,2,3,0,0,0,1,0,1,0,0,2, - 2,3,2,2,3,1,3,3,1,3,0,3,3,0,2,2,0,1,3,2,2,3,1,1,1,2,3, - 0,0,3,3,1,2,2,0,1,0,2,2,0,0,1,1,3,3,0,0,0,1,1,2,1,0,3,0, - 3,2,3,3,3,2,2,3,3,3,0,3,0,2,3,0,2,3,0,3,3,2,3,0,2,0,0, - 0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2, - 3,1,3,2,3,2,3,1,3,2,0,3,1,2,3,2,2,2,0,3,3,3,2,2,2,3,0, - 2,1,3,1,3,3,0,2,0,0,0,1,0,1,3,0,3,0,0,2,2,0,3,0,2,0,3,1, - 2,1,0,2,3,0,3,3,2,3,0,0,3,0,2,3,2,2,3,2,2,3,2,0,0,1,0, - 0,2,3,3,3,2,2,1,0,0,0,2,0,3,3,0,1,2,2,0,0,3,2,2,1,2,1,1, - 3,2,3,2,3,2,3,3,3,2,0,3,3,2,3,3,2,3,0,3,2,2,3,0,2,0,0, - 0,0,0,3,0,0,1,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,2, - 0,0,0,0,3,0,3,2,0,3,0,1,3,0,0,3,0,1,3,0,0,1,0,3,0,3,0, - 2,3,3,3,3,3,3,3,2,0,1,3,3,1,3,3,3,3,3,2,2,0,1,2,2,3,3,0, - 3,2,3,2,3,2,3,3,2,3,0,3,2,2,3,2,1,2,3,3,3,3,3,0,2,1,2, - 3,1,2,2,3,2,0,2,0,0,2,2,1,0,3,3,2,3,0,1,2,2,2,3,3,1,2,0, + 2,3,3,2,3,3,3,2,1,3,1,3,1,2,2,1,1,3,1,3,2,3,2,2,3,3,1, + 2,3,3,2,2,1,1,2,0,3,1,2,1,0,1,1,0,3,1,1,2,0,1,0,1,0,1, + 2,1,2,1,3,0,3,3,2,3,1,3,1,2,1,3,1,3,1,1,2,2,1,3,3,3,3, + 3,3,3,3,3,1,3,3,3,3,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1, + 2,3,2,2,3,1,3,3,2,3,0,3,1,3,2,3,1,3,1,2,2,1,1,3,3,3,1, + 3,3,3,3,3,0,3,0,1,2,3,1,3,2,0,3,0,2,3,3,1,3,2,2,2,3,1, + 2,3,2,2,3,0,3,3,1,3,2,2,1,2,2,3,1,3,1,2,2,1,2,0,3,3,0, + 2,0,3,2,2,1,3,1,3,3,1,0,1,1,1,3,0,2,0,2,3,2,0,2,3,0,1, + 3,1,3,3,3,2,3,2,3,3,1,2,2,3,3,3,2,1,2,3,1,2,2,3,3,0,3, + 1,3,2,2,1,2,3,3,0,1,3,3,0,2,0,2,3,1,0,1,1,0,0,1,1,0,2, + 2,3,1,1,3,1,3,3,1,3,1,3,1,2,2,2,1,3,1,2,2,1,1,1,3,2,0, + 2,1,3,2,2,0,1,3,1,2,1,0,0,1,2,1,0,2,1,1,1,2,3,1,2,2,1, + 3,2,3,3,3,2,1,3,3,3,1,0,2,3,3,0,2,0,2,3,3,3,2,0,2,0,0, + 0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2, + 3,1,2,2,2,2,3,1,2,2,0,1,1,3,2,3,2,0,2,3,3,2,2,2,3,3,1, + 3,2,3,3,2,1,3,0,0,0,1,1,1,1,0,3,0,1,0,3,1,0,3,0,3,0,1, + 1,1,1,1,3,1,3,3,2,3,0,3,1,2,2,3,2,3,2,2,1,1,1,1,3,3,2, + 2,1,3,2,3,0,1,0,1,2,3,2,1,2,0,2,0,1,1,2,1,1,1,3,1,3,1, + 3,2,3,2,3,2,3,3,3,2,0,3,2,3,3,3,2,0,2,3,1,3,2,0,2,0,0, + 0,0,0,0,3,1,3,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,2, + 0,2,1,0,3,0,3,2,0,3,0,3,0,1,0,3,0,3,1,0,0,0,1,3,2,3,2, + 3,2,3,3,3,0,3,0,2,3,1,3,2,3,2,3,3,2,3,2,3,1,3,1,3,2,0, 3,0,0,0,3,0,0,2,3,3,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,0,0,0,3,0,3,3,0,2,0,1,3,0,1,1,0,0,2,1,1,3,1,1,0,2,1, - 2,1,2,1,0,1,0,0,0,0,2,1,0,3,2,3,3,1,3,0,3,2,3,3,3,0,0,0, - 0,2,2,1,3,2,3,3,2,3,0,0,3,2,3,2,2,2,3,2,2,3,2,1,1,2,1, - 3,2,2,3,3,2,1,0,0,0,3,2,0,3,2,3,2,1,0,1,2,2,3,0,2,0,0,1, - 3,0,3,3,3,1,0,2,3,3,0,1,0,0,1,0,3,0,0,1,3,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,3,2,0,3,0,3,2,1,3,0,3,0,0,2,0,2,1,0,2,2,3,1,0,0,0,0, - 2,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, - 2,1,0,2,3,1,3,3,0,3,0,3,3,0,3,3,0,3,1,2,2,3,1,1,1,0,0, - 2,1,0,2,3,3,2,3,0,0,0,1,0,2,2,3,2,0,1,0,2,1,2,3,0,2,3,0, - 3,0,1,1,2,0,3,3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,3,3,0,3,0,0,0,0,0,3,0,0,0,0,0,0,0,0, - 1,3,3,3,3,1,3,3,2,3,0,1,2,0,2,3,2,2,2,3,2,3,2,0,2,2,0, - 0,0,2,1,0,3,2,2,0,1,1,1,1,1,1,0,0,0,0,2,0,1,0,0,1,2,1,0, - 2,0,1,2,1,0,2,2,1,2,0,2,0,0,1,1,2,1,0,2,0,2,1,3,1,0,0, - 3,2,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0, - 3,2,3,2,2,2,3,2,3,3,0,3,0,2,3,1,2,2,0,3,2,3,3,0,2,0,0, - 0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2, - 1,1,1,2,3,1,3,3,0,3,0,3,3,1,2,1,0,0,3,2,2,3,2,0,1,3,1, - 1,0,0,3,1,1,1,0,0,0,0,1,0,0,3,3,2,1,0,1,0,3,2,1,1,2,1,0, + 0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 2,1,1,0,3,1,3,2,0,2,0,3,0,1,1,1,1,3,0,1,1,1,1,1,3,1,2, + 2,1,2,2,2,0,1,1,0,1,3,3,3,1,2,3,0,3,3,3,1,2,0,3,2,3,0, + 3,2,3,2,3,2,3,3,2,3,0,2,2,2,2,3,1,3,2,2,2,2,2,1,3,2,1, + 1,3,2,2,2,1,2,2,1,1,0,0,3,2,2,3,1,2,3,2,1,2,2,2,2,3,1, + 1,2,2,1,3,2,3,3,2,3,0,3,2,1,3,2,2,3,2,2,2,2,1,0,3,2,3, + 2,3,2,2,3,1,0,1,0,1,3,1,2,2,2,2,0,2,3,3,1,3,1,2,3,1,1, + 3,0,3,3,3,1,0,2,3,3,0,0,1,1,1,0,3,0,1,1,2,1,1,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,3,2,1,2,1,3,2,1,3,0,0,0,2,2,1,2,1,1,2,1,1,0,0,3,0,0, + 1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1, + 3,1,1,1,1,0,3,3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,3,0,0,3,3,0,0,0,0,0,3,0,0,0,0,0,0, + 2,1,1,3,3,0,3,3,1,3,0,3,1,2,2,3,0,2,3,2,2,1,1,2,3,3,2, + 1,2,1,3,2,0,3,1,0,2,1,2,2,2,0,2,1,1,3,2,1,1,3,1,2,3,1, + 1,2,3,3,3,1,3,3,1,3,0,2,1,1,2,3,2,3,2,2,2,1,2,0,3,1,0, + 2,1,2,3,1,1,3,1,1,2,2,0,1,1,2,1,0,1,0,1,3,3,1,1,1,1,0, + 2,0,1,1,1,1,2,1,2,2,0,0,0,2,2,0,2,0,2,2,1,1,1,3,2,0,3, + 0,3,0,0,0,0,0,0,0,1,0,3,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0, + 2,1,1,2,3,2,3,3,1,3,0,3,1,2,1,1,1,3,1,2,1,1,1,0,3,1,1, + 3,1,0,1,3,0,1,1,0,1,0,1,1,1,0,3,1,0,3,2,1,3,1,3,2,1,0, + 1,3,1,0,3,0,2,2,2,2,0,0,2,2,1,1,0,0,1,2,1,1,1,3,2,0,1, + 0,2,2,0,1,0,0,0,1,1,0,0,2,0,0,0,2,1,2,0,1,0,0,1,1,1,0, 3,0,3,2,0,0,0,3,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,3,1,0,3,1,3,2,0,2,0,2,0,1,2,0,0,1,0,2,2,2,0,3,1,0,0, - 2,0,1,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,3,0,0,2,0,0,0,1, + 0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,1,3,2,3,2,3,2,3,3,0,0,2,3,3,0,2,0,2,3,2,2,2,0,2,0,0, + 0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2, + 3,0,3,3,3,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,3,3,0,0,0,3,3,0,0,0,0,0,0,0,3,0,0,0,0,0,1,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 3,0,1,1,0,0,0,3,3,0,0,0,0,0,1,0,1,0,0,0,3,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,0,0,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,2,1,0,0,0,3,3,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0, - 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,3,3,0,0,0,3,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,2,2,3,0,0,0,3,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0, - 0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,3,3,3,0,0,0,2,3,0,0,0,0,0,0,3,0,0,0,2,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,3,3,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,3,3,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,1,2,3,0,3,0,2,0,0,1,0,1,0,0,2,0,0,0,0,0,0,0,1,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,1,1,1,1,0,3,3,0,1,0,0,1,0,0,1,0,0,2,0,0,0,0,0,0,0, + 0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, + 3,0,1,1,3,1,0,0,3,0,0,0,0,1,0,0,1,0,1,0,0,1,1,0,0,0,0, + 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,3,3,3,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,3,3,3,0,0,0,3,3,0,0,0,0,0,0,3,0,0,0,2,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,1,0,0,1,0,1,3,0,1,1,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0, + 0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 3,0,0,0,3,0,0,0,2,3,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,0,0,0,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,3,0,0,0,2,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,2,3,3,0,0,2,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,0,0,0,0,0,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,2,3,3,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,3,0,0,0,0,3,3,0,0,0,0,0,0,0,3,0,0,0,3,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,1,3,0,0,3,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,0,0,1,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,1,3,3,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,0,0,0,0,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 2,0,1,0,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,1,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,1,2,3,0,3,1,2,0,0,0,0,1,0,0,2,0,1,1,0,0,0,0,1,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,1,3,3,0,0,1,2,0,0,0,0,0,0,0,2,0,0,0,0,0,1,0,0,0,0, + 0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 3,0,0,0,0,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,1,1,3,0,0,2,2,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,2,3,0,0,2,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,1,3,1,0,0,0,1,0,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,3,1,0,0,0,2,2,0,0,0,0,0,0,0,3,0,0,0,2,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,2,3,0,1,3,1,2,1,0,0,0,1,1,0,1,0,0,1,0,1,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,0,0,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,2,3,3,0,0,1,0,0,0,0,0,0,0,0,3,0,0,0,0,0,1,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,3,1,1,0,0,3,3,0,0,0,0,0,0,0,3,0,0,0,3,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,0,0,0,0,0,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,1,1,0,0,0,0,3,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,1,3,0,0,3,2,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,3,2,0,0,0,2,2,0,0,0,0,0,0,0,3,0,0,0,2,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,1,1,3,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 2,0,0,3,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,0,0,3,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,1,3,1,0,1,0,2,0,0,0,0,0,0,0,1,0,0,0,2,0,1,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,1,0,0,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 3,0,0,0,0,0,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,1,1,1,0,0,0,3,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,2,3,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,0,0,2,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,1,2,1,2,0,3,3,0,1,0,0,0,2,0,3,1,2,2,0,1,3,0,2,0,2,0, - 2,0,2,1,1,0,1,2,0,0,0,1,0,0,1,0,0,0,0,1,2,0,0,1,1,2,0,2, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,1,2,1,3,0,2,3,1,2,0,1,2,0,1,2,1,2,0,0,1,1,0,2,3,1,0, + 3,1,3,0,1,0,2,0,0,1,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0,1,2, }; @@ -230,8 +248,8 @@ const SequenceModel Windows_1258VietnameseModel = { Windows_1258_CharToOrderMap, VietnameseLangModel, - 55, - (float)0.9321889118082535, + 54, + (float)0.9336493792477815, PR_FALSE, "WINDOWS-1258", "vi" @@ -241,9 +259,19 @@ const SequenceModel VisciiVietnameseModel = { Viscii_CharToOrderMap, VietnameseLangModel, - 55, - (float)0.9321889118082535, + 54, + (float)0.9336493792477815, PR_FALSE, "VISCII", "vi" }; + +const LanguageModel VietnameseModel = +{ + "vi", + Unicode_CharOrder, + 108, + VietnameseLangModel, + 54, + (float)0.9336493792477815, +}; diff --git a/src/nsLanguageDetector.h b/src/nsLanguageDetector.h index 6c22d17..5300a4d 100644 --- a/src/nsLanguageDetector.h +++ b/src/nsLanguageDetector.h @@ -110,10 +110,30 @@ private: }; extern const LanguageModel ArabicModel; +extern const LanguageModel CroatianModel; +extern const LanguageModel CzechModel; extern const LanguageModel DanishModel; +extern const LanguageModel EsperantoModel; +extern const LanguageModel EstonianModel; +extern const LanguageModel FinnishModel; extern const LanguageModel FrenchModel; extern const LanguageModel GermanModel; +extern const LanguageModel GreekModel; +extern const LanguageModel HungarianModel; +extern const LanguageModel IrishModel; extern const LanguageModel ItalianModel; +extern const LanguageModel LatvianModel; +extern const LanguageModel LithuanianModel; +extern const LanguageModel MalteseModel; +extern const LanguageModel PolishModel; +extern const LanguageModel PortugueseModel; +extern const LanguageModel RomanianModel; +extern const LanguageModel SlovakModel; +extern const LanguageModel SloveneModel; extern const LanguageModel SpanishModel; +extern const LanguageModel SwedishModel; +extern const LanguageModel ThaiModel; +extern const LanguageModel TurkishModel; +extern const LanguageModel VietnameseModel; #endif /* nsLanguageDetector_h__ */ diff --git a/src/nsMBCSGroupProber.cpp b/src/nsMBCSGroupProber.cpp index 544a8dd..6144d2d 100644 --- a/src/nsMBCSGroupProber.cpp +++ b/src/nsMBCSGroupProber.cpp @@ -85,12 +85,34 @@ nsMBCSGroupProber::nsMBCSGroupProber(PRUint32 aLanguageFilter) { if (mProbers[i]->DecodeToUnicode()) { - langDetectors[i][0] = new nsLanguageDetector(&FrenchModel); - langDetectors[i][1] = new nsLanguageDetector(&ItalianModel); - langDetectors[i][2] = new nsLanguageDetector(&DanishModel); - langDetectors[i][3] = new nsLanguageDetector(&GermanModel); - langDetectors[i][4] = new nsLanguageDetector(&ArabicModel); - langDetectors[i][5] = new nsLanguageDetector(&SpanishModel); + int j = 0; + + langDetectors[i][j++] = new nsLanguageDetector(&ArabicModel); + langDetectors[i][j++] = new nsLanguageDetector(&CroatianModel); + langDetectors[i][j++] = new nsLanguageDetector(&CzechModel); + langDetectors[i][j++] = new nsLanguageDetector(&DanishModel); + langDetectors[i][j++] = new nsLanguageDetector(&EsperantoModel); + langDetectors[i][j++] = new nsLanguageDetector(&EstonianModel); + langDetectors[i][j++] = new nsLanguageDetector(&FinnishModel); + langDetectors[i][j++] = new nsLanguageDetector(&FrenchModel); + langDetectors[i][j++] = new nsLanguageDetector(&GermanModel); + langDetectors[i][j++] = new nsLanguageDetector(&GreekModel); + langDetectors[i][j++] = new nsLanguageDetector(&HungarianModel); + langDetectors[i][j++] = new nsLanguageDetector(&IrishModel); + langDetectors[i][j++] = new nsLanguageDetector(&ItalianModel); + langDetectors[i][j++] = new nsLanguageDetector(&LatvianModel); + langDetectors[i][j++] = new nsLanguageDetector(&LithuanianModel); + langDetectors[i][j++] = new nsLanguageDetector(&MalteseModel); + langDetectors[i][j++] = new nsLanguageDetector(&PolishModel); + langDetectors[i][j++] = new nsLanguageDetector(&PortugueseModel); + langDetectors[i][j++] = new nsLanguageDetector(&RomanianModel); + langDetectors[i][j++] = new nsLanguageDetector(&SlovakModel); + langDetectors[i][j++] = new nsLanguageDetector(&SloveneModel); + langDetectors[i][j++] = new nsLanguageDetector(&SpanishModel); + langDetectors[i][j++] = new nsLanguageDetector(&SwedishModel); + langDetectors[i][j++] = new nsLanguageDetector(&ThaiModel); + langDetectors[i][j++] = new nsLanguageDetector(&TurkishModel); + langDetectors[i][j++] = new nsLanguageDetector(&VietnameseModel); } else { diff --git a/src/nsMBCSGroupProber.h b/src/nsMBCSGroupProber.h index ee6669e..2ed028e 100644 --- a/src/nsMBCSGroupProber.h +++ b/src/nsMBCSGroupProber.h @@ -48,7 +48,7 @@ #include "nsEUCTWProber.h" #define NUM_OF_PROBERS 7 -#define NUM_OF_LANGUAGES 6 +#define NUM_OF_LANGUAGES 26 class nsMBCSGroupProber: public nsCharSetProber { public: |