diff options
author | Jehan <jehan@girinstud.io> | 2022-12-14 00:32:52 +0100 |
---|---|---|
committer | Jehan <jehan@girinstud.io> | 2022-12-14 00:36:02 +0100 |
commit | 7f386d922e0ac0094a39ef18ffbf4e7b3d6f0c89 (patch) | |
tree | 1f0bc2df5882dde43d4ce7a3841fda57ebd4e018 | |
parent | fb433a57b50ae4f6d3f17919f43282b30e95393f (diff) |
script, src: rebuild the English model.
The previous model was most obviously wrong: all letters had the same
probability, even non-ASCII ones! Anyway this new model does make unit
tests a tiny bit better though the English detection is still weak (I
have more concepts which I want to experiment to get this better).
-rw-r--r-- | script/BuildLangModelLogs/LangEnglishModel.log | 399 | ||||
-rw-r--r-- | src/LangModels/LangEnglishModel.cpp | 234 |
2 files changed, 302 insertions, 331 deletions
diff --git a/script/BuildLangModelLogs/LangEnglishModel.log b/script/BuildLangModelLogs/LangEnglishModel.log index 22f3ede..4348ae0 100644 --- a/script/BuildLangModelLogs/LangEnglishModel.log +++ b/script/BuildLangModelLogs/LangEnglishModel.log @@ -1,181 +1,252 @@ = Logs of language model for English (en) = - Generated by BuildLangModel.py -- Started: 2021-03-19 23:26:14.143096 -- Maximum depth: 4 -- Max number of pages: 100 +- Started: 2022-12-03 20:28:44.618364 +- Maximum depth: 2 +- Max number of pages: 200 == Parsed pages == -Marmot (revision 1000529225) -Alashan ground squirrel (revision 1010437381) -Alaska (revision 1012870556) -Alaska marmot (revision 1010409368) -Allen's chipmunk (revision 1010890232) -Alpine chipmunk (revision 1010409470) -Alpine marmot (revision 1012720679) -Alps (revision 1007908369) -Altai Mountains (revision 1006577543) -Ancient Greece (revision 1012778875) -Animal (revision 1013060732) -Animal Diversity Web (revision 996899740) -Antelope squirrel (revision 1010441265) -Apennine Mountains (revision 1009656710) -Arctic ground squirrel (revision 1010409925) -Asia Minor ground squirrel (revision 1010437585) -BNF (identifier) (revision 1010501260) -Baja California rock squirrel (revision 1010410301) -Barcode of Life Data System (revision 997241036) -Bat (revision 1012442106) -Bear (revision 1012937821) -Belding's ground squirrel (revision 1010410588) -Bibcode (identifier) (revision 1009103296) -Black-capped marmot (revision 992988317) -Black-tailed prairie dog (revision 1010411000) -Black Hills (revision 1011995885) -Bobak marmot (revision 1010411082) -Brokpa (revision 1001820104) -Brooks Range (revision 1009930357) -Buller's chipmunk (revision 1010411572) -California chipmunk (revision 1010411807) -California ground squirrel (revision 1010411812) -Callospermophilus (revision 1010416079) -Carpathian Mountains (revision 1011395807) -Cascade Range (revision 1011474213) -Cascade golden-mantled ground squirrel (revision 1010416079) -Chordate (revision 1008964469) -Cliff chipmunk (revision 1010412814) -Colorado chipmunk (revision 1010412919) -Daurian ground squirrel (revision 1010413422) -Deosai National Park (revision 1006913741) -Doi (identifier) (revision 1010427488) -Durango chipmunk (revision 1010413819) -EPPO Code (revision 998151320) -Eastern chipmunk (revision 999177830) -Encyclopedia of Life (revision 994178741) -Espíritu Santo antelope squirrel (revision 1010414324) -Ethnology (revision 1011057083) -Eulipotyphla (revision 1012652578) -Eurasian Steppe (revision 1013064344) -European ground squirrel (revision 1010414381) +Marmot (revision 1116705550) +Hibernate (revision 1115607389) +JSTOR (identifier) (revision 1122926070) +Thirteen-lined ground squirrel (revision 1124658433) +French Alps (revision 1117472036) +INaturalist (revision 1122751314) +Texas antelope squirrel (revision 1121470154) +Himalayas (revision 1124238550) +Vancouver Island marmot (revision 1121598871) +Mount Rainier National Park (revision 1120235066) +Olympic marmot (revision 1121472039) +Root (revision 1117256593) +Durango chipmunk (revision 1121473683) +France (revision 1125268533) +Sciuromorpha (revision 1107286064) +Alps (revision 1124362400) +Yellow-cheeked chipmunk (revision 1121299976) +Washington ground squirrel (revision 1121468941) +Hopi chipmunk (revision 1121297258) +Mexican prairie dog (revision 1121472442) +Antelope squirrel (revision 1089053714) +Deosai National Park (revision 1125376855) Eutamias (revision 1010406609) -Extinction (revision 1011028396) -Fauna Europaea (revision 963073975) -Flower (revision 1010385350) -Forest-steppe marmot (revision 1010436539) -Forrest's rock squirrel (revision 1010437668) -France (revision 1012524494) -Franklin's ground squirrel (revision 1010415067) -French Alps (revision 1006041101) -GND (identifier) (revision 1010440981) -Gallo-Romance languages (revision 1012668074) -Genus (revision 1007184632) -Global Biodiversity Information Facility (revision 1010489511) -Gold (revision 1012856700) -Gold-digging ant (revision 1007959560) -Golden-mantled ground squirrel (revision 1010416079) -Gray-collared chipmunk (revision 1010416642) -Gray-footed chipmunk (revision 1010416658) -Gray marmot (revision 1010416479) -Ground squirrel (revision 1010442953) -Groundhog Day (revision 1012802985) -Gunnison's prairie dog (revision 1010416998) -Harris's antelope squirrel (revision 1010417210) -Herbivore (revision 1006902225) -Herodotus (revision 1012927818) -Hibernate (revision 1009048926) -Hibernation (revision 1009048926) -Himalayan marmot (revision 1010417424) -Hoary marmot (revision 1010417525) -Hopi chipmunk (revision 1010417623) -INaturalist (revision 1009815294) -ISBN (identifier) (revision 1009586768) -Ictidomys (revision 1010406819) -Ictidomys parvidens (revision 1010426310) -Integrated Taxonomic Information System (revision 999235988) -Interim Register of Marine and Nonmarine Genera (revision 995182351) -JSTOR (identifier) (revision 1011078319) -Jacopo Ligozzi (revision 1006687935) -Johann Friedrich Blumenbach (revision 1006564504) -Kazakhstan (revision 1012748504) -LCCN (identifier) (revision 1006934344) -Ladakh (revision 1010799326) -Latin (revision 1012971392) -Least chipmunk (revision 1010419221) +Eastern chipmunk (revision 1120765340) +Golden-mantled ground squirrel (revision 1121777526) +Tuolumne Meadows (revision 1094508214) +Cascade Range (revision 1114533492) +Mammal Species of the World (revision 1093112025) +Franklin's ground squirrel (revision 1121361872) +Ladakh (revision 1124124745) +Groundhog (revision 1117813429) +Natural reservoir (revision 1110806364) +Neotamias (revision 1117512650) +Yosemite National Park (revision 1125019703) +Ontario (revision 1125244433) +Russet ground squirrel (revision 1121469545) +Bat (revision 1125180714) +Wayback Machine (revision 1125067302) +Long-eared chipmunk (revision 1121298477) +Southern Idaho ground squirrel (revision 1121468339) +Moss (revision 1122019251) +Altai Mountains (revision 1124752508) +Townsend's ground squirrel (revision 1121468829) +Richardson's ground squirrel (revision 1122297225) +Utah prairie dog (revision 1125084849) +Yersinia pestis (revision 1121719480) +European ground squirrel (revision 1121469378) +Spermophilus relictus (revision 1121469745) +Least chipmunk (revision 1120765536) +Panamint chipmunk (revision 1121299808) +Catalogue of Life (revision 1118132647) +Gray marmot (revision 1122462225) +Columbian ground squirrel (revision 1124139650) +Alberni-Clayoquot Regional District (revision 1109499216) +La Tania (revision 1115267378) +Populus tremuloides (revision 1120966005) +Paradise River Waterfalls (revision 1054159583) +Long-tongued nectar bat (revision 1123039710) +Happy Isles (revision 1113517959) +Tourism in France (revision 1120671901) +Otospermophilus (revision 1093268410) +History of Canada (revision 1123782373) +California chipmunk (revision 1121299691) +Mexican ground squirrel (revision 1121470340) +White-tailed antelope squirrel (revision 1121470211) +Sedentism (revision 1110063134) +Terabyte (revision 1123174616) +Tamias (revision 1121473202) +RECAP US Federal Court Documents (collection) (revision 1122929164) +Belding's ground squirrel (revision 1121468288) +Cannibalism (revision 1125092745) +Yellow-pine chipmunk (revision 1121473478) +Monoclonal antibody therapy (revision 1114372687) +Menzbier's marmot (revision 1121471953) +Black-footed ferret (revision 1123500226) +Floods in Bihar (revision 1119748410) +Mammal (revision 1124779293) +Alaska marmot (revision 1124026979) +Sierra Madre ground squirrel (revision 1121471267) +Computer security (revision 1125370428) +Kedarnath Temple (revision 1122647471) +Frog Creek Cabin (revision 1048164755) +Outline of botany (revision 1100540741) +Agriculture in Nepal (revision 1088978356) +Plant evolution (revision 1116709561) +Little ground squirrel (revision 1121469707) +Dicranales (revision 1110407415) +Ultrasound (revision 1117397225) +White-tailed prairie dog (revision 1121472368) +Espíritu Santo antelope squirrel (revision 1121470113) +Brown County, Wisconsin (revision 1122831345) +Timeline of audio formats (revision 1120236679) +List of mountain peaks of Uttarakhand (revision 1121014571) +Antiviral drug (revision 1118217791) +California ground squirrel (revision 1121359049) +Red-tailed chipmunk (revision 1121297616) +Bobak marmot (revision 1121471769) +National Register of Historic Places listings in the Northern Mariana Islands (revision 1115478435) +Spermophilus pallidicauda (revision 1121469669) +Yellow-bellied marmot (revision 1121472145) +Sexually transmitted infection (revision 1122774900) +List of Yosemite destinations (revision 1119350249) +Baitarani River (revision 1118320499) +Baja California rock squirrel (revision 1121471079) +Years of Lead (Italy) (revision 1123769084) +Snow leopard (revision 1122462489) +Coyote (revision 1125069820) +Villard-Reculas (revision 1077275360) +Vancouver Island (revision 1121908258) +Sciurotamias (revision 1120570732) +Canada 2021 Census (revision 1114664828) +Time in Canada (revision 1120998431) +Forrest's rock squirrel (revision 1121471379) +Via Lattea (revision 1110201667) +Phylogenetic tree (revision 1117394267) +Hibernation (revision 1115607389) +Altai wapiti (revision 1111750851) +Alpine chipmunk (revision 1121473423) +Schist (revision 1116202480) +Rodent (revision 1123634696) +Nepalese literature (revision 1117603265) +Unification of Nepal (revision 1125350055) +CBC News (revision 1124984918) +Harris's antelope squirrel (revision 1121470079) +Alpine meadow (revision 1114658726) +Himalayan marmot (revision 1113552191) +Merriam's ground squirrel (revision 1121468396) +Heliscomyidae (revision 1010405407) +Siberian chipmunk (revision 1121472776) +1980 eruption of Mount St. Helens (revision 1123425632) +Tarbagan marmot (revision 1121488248) +Uinta chipmunk (revision 1121367930) +Asia Minor ground squirrel (revision 1121357197) +San Bernardino National Forest (revision 1113614977) +British Columbia (revision 1124903693) +List of Web archiving initiatives (revision 1120507741) +2011 Kashgar attacks (revision 1124413350) +Genus (revision 1125331312) +IUCN Red List (revision 1123293379) +Attack rate (revision 1118026995) +Atlas of Living Australia (revision 1069034125) +Riparian zone (revision 1100819694) +Natural History Museum of Los Angeles County (revision 1118638991) +Flying squirrel typhus (revision 1108887986) +New Scientist (revision 1121186695) +Sonoma chipmunk (revision 1121298317) +Basic reproduction number (revision 1122698892) +Homeothermic (revision 1082125124) +Library Genesis (revision 1123879366) +Ecological succession (revision 1116584234) +Taurus ground squirrel (revision 1121469893) +Edmund Jaeger (revision 1042985886) +Wolverine (revision 1123904337) +Puget Sound (revision 1124438931) +List of highest points of European countries (revision 1125124917) +Amburiq Mosque (revision 1101963105) +Mohave ground squirrel (revision 1121470764) +Kali Gandaki Gorge (revision 1091465924) +Palmer's chipmunk (revision 1121473732) +Citizen Science Association (revision 1076637865) +Alpha male (revision 1123599649) +Thermotogota (revision 1108216914) +Gray-footed chipmunk (revision 1121473564) +ISSN (identifier) (revision 1117323780) +The Daily Excelsior (revision 1073376573) +National Center for Biotechnology Information (revision 1117911694) +Haridwar (revision 1124587996) +Ground squirrel (revision 1106618817) +ISBN (identifier) (revision 1124259962) +Breton language (revision 1123193740) +Notocitellus (revision 1092528025) +Wayback Machine (Peabody's Improbable History) (revision 1125111405) +Social animal (revision 1118899517) +Conservation status (revision 1124721586) +Doi (identifier) (revision 1121872952) +Drop (liquid) (revision 1115117361) +Monogamy in animals (revision 1115061008) +Grand Slam (tennis) (revision 1125138113) +Synonym (taxonomy) (revision 1115465643) +Encyclopedia of Life (revision 1123215390) +Algonquian languages (revision 1118973728) +Circulatory system (revision 1123361226) +Kenneth Oppel (revision 1115838353) +Red-cheeked ground squirrel (revision 1121469468) +Prairie dog (revision 1125350300) +Zygomasseteric system (revision 1093682242) +Black-tailed prairie dog (revision 1120101763) +Scenic Beach State Park (revision 1085870429) +Fashion capital (revision 1122240170) +Herbivory (revision 1124405692) +Artemisia tridentata (revision 1097902309) +ARKive (revision 1028182358) +Emblem of Uttarakhand (revision 1085229611) +Northern Italy (revision 1122409316) +Bibcode (identifier) (revision 1119780351) +Squirrel (revision 1121741651) +Birch Bay State Park (revision 1068937174) +Whistling (revision 1124843854) +Gobiomyidae (revision 1090208761) == End of Parsed pages == -- Wikipedia parsing ended at: 2021-03-19 23:29:33.380471 +- Wikipedia parsing ended at: 2022-12-03 20:32:27.933336 -59 characters appeared 59 times. +58 characters appeared 2027474 times. Most Frequent characters: -[ 0] Char m: 1.694915254237288 % -[ 1] Char a: 1.694915254237288 % -[ 2] Char r: 1.694915254237288 % -[ 3] Char o: 1.694915254237288 % -[ 4] Char t: 1.694915254237288 % -[ 5] Char s: 1.694915254237288 % -[ 6] Char e: 1.694915254237288 % -[ 7] Char l: 1.694915254237288 % -[ 8] Char i: 1.694915254237288 % -[ 9] Char v: 1.694915254237288 % -[10] Char y: 1.694915254237288 % -[11] Char g: 1.694915254237288 % -[12] Char u: 1.694915254237288 % -[13] Char n: 1.694915254237288 % -[14] Char d: 1.694915254237288 % -[15] Char q: 1.694915254237288 % -[16] Char h: 1.694915254237288 % -[17] Char w: 1.694915254237288 % -[18] Char p: 1.694915254237288 % -[19] Char c: 1.694915254237288 % -[20] Char b: 1.694915254237288 % -[21] Char f: 1.694915254237288 % -[22] Char k: 1.694915254237288 % -[23] Char x: 1.694915254237288 % -[24] Char z: 1.694915254237288 % -[25] Char j: 1.694915254237288 % -[26] Char á: 1.694915254237288 % -[27] Char ö: 1.694915254237288 % -[28] Char ä: 1.694915254237288 % -[29] Char í: 1.694915254237288 % -[30] Char ç: 1.694915254237288 % -[31] Char ô: 1.694915254237288 % -[32] Char à: 1.694915254237288 % -[33] Char ü: 1.694915254237288 % -[34] Char æ: 1.694915254237288 % -[35] Char é: 1.694915254237288 % -[36] Char ï: 1.694915254237288 % -[37] Char û: 1.694915254237288 % -[38] Char ó: 1.694915254237288 % -[39] Char µ: 1.694915254237288 % -[40] Char è: 1.694915254237288 % -[41] Char ì: 1.694915254237288 % -[42] Char î: 1.694915254237288 % -[43] Char ë: 1.694915254237288 % -[44] Char ð: 1.694915254237288 % -[45] Char ý: 1.694915254237288 % -[46] Char š: 1.694915254237288 % -[47] Char ñ: 1.694915254237288 % -[48] Char œ: 1.694915254237288 % -[49] Char ê: 1.694915254237288 % -[50] Char â: 1.694915254237288 % -[51] Char ø: 1.694915254237288 % -[52] Char þ: 1.694915254237288 % -[53] Char å: 1.694915254237288 % -[54] Char ß: 1.694915254237288 % -[55] Char ã: 1.694915254237288 % -[56] Char ž: 1.694915254237288 % -[57] Char õ: 1.694915254237288 % -[58] Char ú: 1.694915254237288 % +[ 0] Char e: 11.847648847778073 % +[ 1] Char a: 8.861519309248848 % +[ 2] Char t: 8.523956410785045 % +[ 3] Char i: 7.880199696765532 % +[ 4] Char n: 7.477629799445023 % +[ 5] Char o: 7.206405606187798 % +[ 6] Char s: 6.8668698094278895 % +[ 7] Char r: 6.763489938711914 % +[ 8] Char l: 4.301066252884131 % +[ 9] Char h: 4.232754649381447 % +[10] Char d: 3.7247333381340524 % +[11] Char c: 3.556839693135399 % +[12] Char u: 2.763981190387645 % +[13] Char m: 2.7244739020081146 % +[14] Char p: 2.17398595493703 % +[15] Char f: 2.1424195821993277 % +[16] Char g: 2.0356364619225698 % +[17] Char b: 1.575457934355755 % +[18] Char y: 1.572005362337569 % +[19] Char w: 1.3260835897279077 % +[20] Char v: 1.1594230061643207 % +[21] Char k: 0.6102667654431081 % +[22] Char x: 0.2356133790125052 % +[23] Char z: 0.13746168878121248 % +[24] Char j: 0.1346503087092609 % +[25] Char q: 0.1320855409243226 % -The first 59 characters have an accumulated ratio of 0.9999999999999989. +The first 26 characters have an accumulated ratio of 0.9996665801879581. -920 sequences found. +863 sequences found. -First 378 (typical positive ratio): 0.9950109024233114 -Next 182 (560-378): 0.003993012537786833 -Rest: 0.000996085038901806 +First 369 (typical positive ratio): 0.9950424985513596 +Next 125 (494-369): 0.003963798368833871 +Rest: 0.0009937030798065072 -- Processing end: 2021-03-19 23:29:33.474226 +- Processing end: 2022-12-03 20:32:28.010953 diff --git a/src/LangModels/LangEnglishModel.cpp b/src/LangModels/LangEnglishModel.cpp index dfe86f3..e06d15c 100644 --- a/src/LangModels/LangEnglishModel.cpp +++ b/src/LangModels/LangEnglishModel.cpp @@ -42,7 +42,7 @@ /** * Generated by BuildLangModel.py - * On: 2021-03-19 23:29:33.380823 + * On: 2022-12-03 20:32:27.947524 **/ /* Character Mapping Table: @@ -68,18 +68,18 @@ static const unsigned char Iso_8859_1_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 1, 20, 19, 14, 6, 21, 11, 16, 8, 25, 22, 7, 0, 13, 3, /* 4X */ - 18, 15, 2, 5, 4, 12, 9, 17, 23, 10, 24,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 1, 20, 19, 14, 6, 21, 11, 16, 8, 25, 22, 7, 0, 13, 3, /* 6X */ - 18, 15, 2, 5, 4, 12, 9, 17, 23, 10, 24,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 1, 17, 11, 10, 0, 15, 16, 9, 3, 24, 21, 8, 13, 4, 5, /* 4X */ + 14, 25, 7, 6, 2, 12, 20, 19, 22, 18, 23,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 1, 17, 11, 10, 0, 15, 16, 9, 3, 24, 21, 8, 13, 4, 5, /* 6X */ + 14, 25, 7, 6, 2, 12, 20, 19, 22, 18, 23,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 39,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 32, 26, 50, 55, 28, 53, 34, 30, 40, 35, 49, 43, 41, 29, 42, 36, /* CX */ - 44, 47, 59, 38, 31, 57, 27,SYM, 51, 60, 58, 37, 33, 45, 52, 54, /* DX */ - 32, 26, 50, 55, 28, 53, 34, 30, 40, 35, 49, 43, 41, 29, 42, 36, /* EX */ - 44, 47, 61, 38, 31, 57, 27,SYM, 51, 62, 58, 37, 33, 45, 52, 63, /* FX */ + SYM,SYM,SYM,SYM,SYM, 52,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 39, 36, 30, 47, 46, 58, 43, 31, 27, 26, 38, 44, 50, 35, 40, 42, /* CX */ + 48, 29, 53, 37, 32, 59, 28,SYM, 49, 34, 55, 45, 33, 51, 60, 57, /* DX */ + 39, 36, 30, 47, 46, 61, 43, 31, 27, 26, 38, 44, 50, 35, 40, 42, /* EX */ + 48, 29, 53, 37, 32, 62, 28,SYM, 49, 34, 55, 45, 33, 51, 63, 64, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -89,170 +89,70 @@ static const unsigned char Windows_1252_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 1, 20, 19, 14, 6, 21, 11, 16, 8, 25, 22, 7, 0, 13, 3, /* 4X */ - 18, 15, 2, 5, 4, 12, 9, 17, 23, 10, 24,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 1, 20, 19, 14, 6, 21, 11, 16, 8, 25, 22, 7, 0, 13, 3, /* 6X */ - 18, 15, 2, 5, 4, 12, 9, 17, 23, 10, 24,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM,ILL,SYM, 64,SYM,SYM,SYM,SYM,SYM,SYM, 46,SYM, 48,ILL, 56,ILL, /* 8X */ - ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 46,SYM, 48,ILL, 56, 65, /* 9X */ + SYM, 1, 17, 11, 10, 0, 15, 16, 9, 3, 24, 21, 8, 13, 4, 5, /* 4X */ + 14, 25, 7, 6, 2, 12, 20, 19, 22, 18, 23,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 1, 17, 11, 10, 0, 15, 16, 9, 3, 24, 21, 8, 13, 4, 5, /* 6X */ + 14, 25, 7, 6, 2, 12, 20, 19, 22, 18, 23,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM,ILL,SYM, 65,SYM,SYM,SYM,SYM,SYM,SYM, 54,SYM, 41,ILL, 56,ILL, /* 8X */ + ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 54,SYM, 41,ILL, 56, 66, /* 9X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 39,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 32, 26, 50, 55, 28, 53, 34, 30, 40, 35, 49, 43, 41, 29, 42, 36, /* CX */ - 44, 47, 66, 38, 31, 57, 27,SYM, 51, 67, 58, 37, 33, 45, 52, 54, /* DX */ - 32, 26, 50, 55, 28, 53, 34, 30, 40, 35, 49, 43, 41, 29, 42, 36, /* EX */ - 44, 47, 68, 38, 31, 57, 27,SYM, 51, 69, 58, 37, 33, 45, 52, 70, /* FX */ + SYM,SYM,SYM,SYM,SYM, 52,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 39, 36, 30, 47, 46, 67, 43, 31, 27, 26, 38, 44, 50, 35, 40, 42, /* CX */ + 48, 29, 53, 37, 32, 68, 28,SYM, 49, 34, 55, 45, 33, 51, 69, 57, /* DX */ + 39, 36, 30, 47, 46, 70, 43, 31, 27, 26, 38, 44, 50, 35, 40, 42, /* EX */ + 48, 29, 53, 37, 32, 71, 28,SYM, 49, 34, 55, 45, 33, 51, 72, 73, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const int Unicode_Char_size = 117; +static const int Unicode_Char_size = 52; static const unsigned int Unicode_CharOrder[] = { - 65, 1, 66, 20, 67, 19, 68, 14, 69, 6, 70, 21, 71, 11, 72, 16, - 73, 8, 74, 25, 75, 22, 76, 7, 77, 0, 78, 13, 79, 3, 80, 18, - 81, 15, 82, 2, 83, 5, 84, 4, 85, 12, 86, 9, 87, 17, 88, 23, - 89, 10, 90, 24, 97, 1, 98, 20, 99, 19, 100, 14, 101, 6,102, 21, - 103, 11, 104, 16, 105, 8, 106, 25, 107, 22, 108, 7, 109, 0,110, 13, - 111, 3, 112, 18, 113, 15, 114, 2, 115, 5, 116, 4, 117, 12,118, 9, - 119, 17, 120, 23, 121, 10, 122, 24, 181, 39, 192, 32, 193, 26,194, 50, - 195, 55, 196, 28, 197, 53, 198, 34, 199, 30, 200, 40, 201, 35,202, 49, - 203, 43, 204, 41, 205, 29, 206, 42, 207, 36, 208, 44, 209, 47,211, 38, - 212, 31, 213, 57, 214, 27, 216, 51, 218, 58, 219, 37, 220, 33,221, 45, - 222, 52, 223, 54, 224, 32, 225, 26, 226, 50, 227, 55, 228, 28,229, 53, - 230, 34, 231, 30, 232, 40, 233, 35, 234, 49, 235, 43, 236, 41,237, 29, - 238, 42, 239, 36, 240, 44, 241, 47, 243, 38, 244, 31, 245, 57,246, 27, - 248, 51, 250, 58, 251, 37, 252, 33, 253, 45, 254, 52, 338, 48,339, 48, - 352, 46, 353, 46, 381, 56, 382, 56, 924, 39, + 65, 1, 66, 17, 67, 11, 68, 10, 69, 0, 70, 15, 71, 16, 72, 9, + 73, 3, 74, 24, 75, 21, 76, 8, 77, 13, 78, 4, 79, 5, 80, 14, + 81, 25, 82, 7, 83, 6, 84, 2, 85, 12, 86, 20, 87, 19, 88, 22, + 89, 18, 90, 23, 97, 1, 98, 17, 99, 11, 100, 10, 101, 0,102, 15, + 103, 16, 104, 9, 105, 3, 106, 24, 107, 21, 108, 8, 109, 13,110, 4, + 111, 5, 112, 14, 113, 25, 114, 7, 115, 6, 116, 2, 117, 12,118, 20, + 119, 19, 120, 22, 121, 18, 122, 23, }; /* Model Table: - * Total considered sequences: 920 / 3481 - * - Positive sequences: first 378 (0.9950109024233114) - * - Probable sequences: next 182 (560-378) (0.003993012537786833) - * - Neutral sequences: last 2921 (0.000996085038901806) - * - Negative sequences: 2561 (off-ratio) + * Total considered sequences: 863 / 676 + * - Positive sequences: first 369 (0.9950424985513596) + * - Probable sequences: next 125 (494-369) (0.003963798368833871) + * - Neutral sequences: last 182 (0.0009937030798065072) + * - Negative sequences: -187 (off-ratio) * Negative sequences: TODO */ static const PRUint8 EnglishLangModel[] = { - 3,3,1,3,2,3,3,2,3,1,3,2,3,3,2,1,2,1,3,2,3,2,1,1,1,1,2,1,1, - 1,0,0,1,1,1,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0, - 3,2,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,0,0,0, - 0,1,0,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,1,2,1,1,1,1, - 1,0,1,0,0,0,2,0,1,1,0,1,0,1,0,0,0,0,0,0,1,2,0,0,0,0,0,0,0,1, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,0,0,0, - 0,0,0,0,0,0,1,2,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,2,3,2,3,3,1,2,3,3,2,3,2,2,2,0,3,0,1,0,0, - 1,0,0,1,1,0,2,0,0,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1, - 3,3,3,3,3,3,3,3,3,1,3,2,3,3,2,3,3,3,3,3,3,3,3,1,1,1,1,1,0, - 1,0,0,0,0,0,2,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,1,0,0, - 1,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,2,3,3,3,3,3,3,0,2,1,1,0,1, - 1,0,1,0,0,0,2,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,2, - 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,1,3,3,3,3,3,3,3,2,1,0,0, - 1,0,0,0,0,0,1,0,0,1,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0, - 1,3,2,3,1,2,3,2,3,2,2,1,2,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0,0, - 1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,1,2,2,3,3,3,0,1,2,3,3,2,1,2,2,2,1,1,0,0, - 0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,1,3,2,2,1,2,2,1,0,2,1,1,2,0, - 1,0,0,0,0,0,2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,1,2,1,3,3,3,3,3,3,2,2,1,0,0, - 1,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,2,2,3,1,1,0, - 2,2,1,0,0,1,2,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, - 3,3,3,3,2,3,3,3,3,2,3,3,3,2,3,2,3,3,3,2,2,3,1,1,2,2,0,0,0, - 1,0,1,0,1,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0, - 0,2,0,0,1,1,0,0,2,1,1,0,3,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,1,3,2,3,3,2,2,1,3,1,2,2,1,1,0,1,0,1,1,1, - 1,0,1,0,1,1,2,0,0,1,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,1, - 1,3,3,3,3,3,3,3,3,0,2,1,1,3,2,0,3,2,1,2,2,2,2,0,0,0,0,0,0, - 0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,0,3,2,3,1,2,0,3,2,3,2,2,0,2,1,1,0,1,0,0, - 2,0,1,0,0,2,2,0,0,1,0,1,0,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,0,3,2,3,2,2,2,3,0,0,3,1,2,3,0,2,0,1,0,0, - 1,0,1,1,0,0,2,0,1,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, - 2,3,3,3,3,3,3,3,3,1,3,2,3,3,2,0,2,1,2,3,3,2,1,0,1,3,0,0,0, - 0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0, - 2,3,3,3,3,2,3,3,3,0,3,1,3,1,2,0,0,1,1,2,1,3,1,0,0,1,0,1,0, - 1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0, - 3,3,3,3,2,3,3,3,3,0,3,2,3,3,1,1,3,2,2,2,2,1,1,0,1,1,1,2,0, - 0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0, - 1,3,0,3,3,2,3,0,3,2,2,0,3,0,1,1,2,1,3,3,0,2,0,2,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,3,1,3,2,2,3,2,3,1,2,0,2,2,1,0,2,1,0,0,2,0,1,0,2,1,0,0,0, - 0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,3,2,3,0,3,3,1,3,1,1,0,3,1,0,0,1,1,1,0,0,0,1,0,0,2,1,1,0, - 0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,1,1,0,1,1,0,2,0,1,0,1,1,2,0,0,0,0,1,1,1,0,0,1,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,0,1,0,2,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, - 0,2,1,0,2,2,0,1,0,1,0,0,0,2,1,1,0,0,1,0,1,0,0,0,1,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,2,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0, - 1,0,0,0,2,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,1,0,1,1,0,0,0,0,0,0,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,1,2,1,2,2,2,2,1,2,0,2,1,2,2,1,0,0,2,2,1,0,0,0,1,1,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,2,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,0,1,0,0,2,0,1,0,1,0,0,0,2,1,0,0,1,0,1,0,0,1,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,0,2,0,0,1,0,0,0,1,0,1,0,1,1,2,0,0,0,1,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,1,0,2,1,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,0,0,0,2,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,0,0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3, + 3,2,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2, + 3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,2,2,2,3,3,2,2,1,2,1,0, + 3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,3,3,3,3,2,3, + 3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,3,3,2,3,2,3,3,2,2,3,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2, + 3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,3,2,3,3,3,2,3,1,1,1,3, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,2,1,1, + 3,3,3,3,2,3,3,2,3,2,3,3,3,3,3,3,3,3,3,3,3,3,1,1,1,1, + 3,3,3,3,3,3,3,3,3,1,2,2,3,3,2,1,2,2,3,3,2,1,0,2,1,2, + 3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,3,1,1,1,2,2, + 3,3,3,3,2,3,3,3,3,3,2,3,3,2,1,2,1,3,3,1,1,3,1,1,0,2, + 3,3,3,3,3,3,3,3,3,1,3,3,2,3,3,3,3,3,2,2,3,3,2,2,2,1, + 3,3,2,3,3,3,3,2,2,1,1,2,3,3,3,2,2,3,3,1,1,1,0,1,1,1, + 3,3,3,3,2,3,3,3,3,3,2,2,3,3,3,1,2,2,3,1,1,2,1,0,1,1, + 3,3,3,3,1,3,3,3,3,0,1,1,3,1,1,3,1,1,2,1,1,1,0,0,1,1, + 3,3,3,3,3,3,3,3,3,3,3,1,3,3,2,1,3,2,3,2,1,1,1,1,1,1, + 3,3,2,3,3,3,3,3,3,3,2,3,3,2,1,2,2,3,3,1,1,1,0,1,3,0, + 3,3,3,3,3,3,3,2,3,1,2,3,2,3,3,1,3,3,1,2,1,1,1,2,1,0, + 3,3,3,3,3,3,3,3,3,3,2,1,1,1,1,2,1,2,2,1,0,2,0,0,0,0, + 3,3,1,3,1,3,1,2,1,1,1,1,2,1,1,1,1,0,2,0,1,1,0,0,0,1, + 3,3,2,3,3,3,3,2,3,3,2,1,3,3,2,1,2,2,3,2,1,2,0,0,1,1, + 3,3,3,3,1,3,1,0,0,2,0,3,3,1,3,2,0,0,2,1,1,0,1,0,0,1, + 3,3,1,3,1,3,1,1,2,2,1,1,2,1,0,1,0,2,2,1,1,1,0,2,1,0, + 3,3,0,2,1,3,2,2,0,1,0,0,3,1,1,0,0,1,1,1,1,1,1,0,1,0, + 1,1,1,2,0,1,0,1,1,0,0,1,3,0,0,1,0,0,0,0,0,0,0,0,0,0, }; @@ -260,8 +160,8 @@ const SequenceModel Iso_8859_1EnglishModel = { Iso_8859_1_CharToOrderMap, EnglishLangModel, - 59, - (float)0.9990039149610982, + 26, + (float)0.9990062969201935, PR_TRUE, "ISO-8859-1", "en" @@ -271,8 +171,8 @@ const SequenceModel Windows_1252EnglishModel = { Windows_1252_CharToOrderMap, EnglishLangModel, - 59, - (float)0.9990039149610982, + 26, + (float)0.9990062969201935, PR_TRUE, "WINDOWS-1252", "en" @@ -282,8 +182,8 @@ const LanguageModel EnglishModel = { "en", Unicode_CharOrder, - 117, + 52, EnglishLangModel, - 59, - (float)0.9999999999999989, + 26, + (float)0.9996665801879581, }; |