summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJehan <jehan@girinstud.io>2022-12-14 00:32:52 +0100
committerJehan <jehan@girinstud.io>2022-12-14 00:36:02 +0100
commit7f386d922e0ac0094a39ef18ffbf4e7b3d6f0c89 (patch)
tree1f0bc2df5882dde43d4ce7a3841fda57ebd4e018
parentfb433a57b50ae4f6d3f17919f43282b30e95393f (diff)
script, src: rebuild the English model.
The previous model was most obviously wrong: all letters had the same probability, even non-ASCII ones! Anyway this new model does make unit tests a tiny bit better though the English detection is still weak (I have more concepts which I want to experiment to get this better).
-rw-r--r--script/BuildLangModelLogs/LangEnglishModel.log399
-rw-r--r--src/LangModels/LangEnglishModel.cpp234
2 files changed, 302 insertions, 331 deletions
diff --git a/script/BuildLangModelLogs/LangEnglishModel.log b/script/BuildLangModelLogs/LangEnglishModel.log
index 22f3ede..4348ae0 100644
--- a/script/BuildLangModelLogs/LangEnglishModel.log
+++ b/script/BuildLangModelLogs/LangEnglishModel.log
@@ -1,181 +1,252 @@
= Logs of language model for English (en) =
- Generated by BuildLangModel.py
-- Started: 2021-03-19 23:26:14.143096
-- Maximum depth: 4
-- Max number of pages: 100
+- Started: 2022-12-03 20:28:44.618364
+- Maximum depth: 2
+- Max number of pages: 200
== Parsed pages ==
-Marmot (revision 1000529225)
-Alashan ground squirrel (revision 1010437381)
-Alaska (revision 1012870556)
-Alaska marmot (revision 1010409368)
-Allen's chipmunk (revision 1010890232)
-Alpine chipmunk (revision 1010409470)
-Alpine marmot (revision 1012720679)
-Alps (revision 1007908369)
-Altai Mountains (revision 1006577543)
-Ancient Greece (revision 1012778875)
-Animal (revision 1013060732)
-Animal Diversity Web (revision 996899740)
-Antelope squirrel (revision 1010441265)
-Apennine Mountains (revision 1009656710)
-Arctic ground squirrel (revision 1010409925)
-Asia Minor ground squirrel (revision 1010437585)
-BNF (identifier) (revision 1010501260)
-Baja California rock squirrel (revision 1010410301)
-Barcode of Life Data System (revision 997241036)
-Bat (revision 1012442106)
-Bear (revision 1012937821)
-Belding's ground squirrel (revision 1010410588)
-Bibcode (identifier) (revision 1009103296)
-Black-capped marmot (revision 992988317)
-Black-tailed prairie dog (revision 1010411000)
-Black Hills (revision 1011995885)
-Bobak marmot (revision 1010411082)
-Brokpa (revision 1001820104)
-Brooks Range (revision 1009930357)
-Buller's chipmunk (revision 1010411572)
-California chipmunk (revision 1010411807)
-California ground squirrel (revision 1010411812)
-Callospermophilus (revision 1010416079)
-Carpathian Mountains (revision 1011395807)
-Cascade Range (revision 1011474213)
-Cascade golden-mantled ground squirrel (revision 1010416079)
-Chordate (revision 1008964469)
-Cliff chipmunk (revision 1010412814)
-Colorado chipmunk (revision 1010412919)
-Daurian ground squirrel (revision 1010413422)
-Deosai National Park (revision 1006913741)
-Doi (identifier) (revision 1010427488)
-Durango chipmunk (revision 1010413819)
-EPPO Code (revision 998151320)
-Eastern chipmunk (revision 999177830)
-Encyclopedia of Life (revision 994178741)
-Espíritu Santo antelope squirrel (revision 1010414324)
-Ethnology (revision 1011057083)
-Eulipotyphla (revision 1012652578)
-Eurasian Steppe (revision 1013064344)
-European ground squirrel (revision 1010414381)
+Marmot (revision 1116705550)
+Hibernate (revision 1115607389)
+JSTOR (identifier) (revision 1122926070)
+Thirteen-lined ground squirrel (revision 1124658433)
+French Alps (revision 1117472036)
+INaturalist (revision 1122751314)
+Texas antelope squirrel (revision 1121470154)
+Himalayas (revision 1124238550)
+Vancouver Island marmot (revision 1121598871)
+Mount Rainier National Park (revision 1120235066)
+Olympic marmot (revision 1121472039)
+Root (revision 1117256593)
+Durango chipmunk (revision 1121473683)
+France (revision 1125268533)
+Sciuromorpha (revision 1107286064)
+Alps (revision 1124362400)
+Yellow-cheeked chipmunk (revision 1121299976)
+Washington ground squirrel (revision 1121468941)
+Hopi chipmunk (revision 1121297258)
+Mexican prairie dog (revision 1121472442)
+Antelope squirrel (revision 1089053714)
+Deosai National Park (revision 1125376855)
Eutamias (revision 1010406609)
-Extinction (revision 1011028396)
-Fauna Europaea (revision 963073975)
-Flower (revision 1010385350)
-Forest-steppe marmot (revision 1010436539)
-Forrest's rock squirrel (revision 1010437668)
-France (revision 1012524494)
-Franklin's ground squirrel (revision 1010415067)
-French Alps (revision 1006041101)
-GND (identifier) (revision 1010440981)
-Gallo-Romance languages (revision 1012668074)
-Genus (revision 1007184632)
-Global Biodiversity Information Facility (revision 1010489511)
-Gold (revision 1012856700)
-Gold-digging ant (revision 1007959560)
-Golden-mantled ground squirrel (revision 1010416079)
-Gray-collared chipmunk (revision 1010416642)
-Gray-footed chipmunk (revision 1010416658)
-Gray marmot (revision 1010416479)
-Ground squirrel (revision 1010442953)
-Groundhog Day (revision 1012802985)
-Gunnison's prairie dog (revision 1010416998)
-Harris's antelope squirrel (revision 1010417210)
-Herbivore (revision 1006902225)
-Herodotus (revision 1012927818)
-Hibernate (revision 1009048926)
-Hibernation (revision 1009048926)
-Himalayan marmot (revision 1010417424)
-Hoary marmot (revision 1010417525)
-Hopi chipmunk (revision 1010417623)
-INaturalist (revision 1009815294)
-ISBN (identifier) (revision 1009586768)
-Ictidomys (revision 1010406819)
-Ictidomys parvidens (revision 1010426310)
-Integrated Taxonomic Information System (revision 999235988)
-Interim Register of Marine and Nonmarine Genera (revision 995182351)
-JSTOR (identifier) (revision 1011078319)
-Jacopo Ligozzi (revision 1006687935)
-Johann Friedrich Blumenbach (revision 1006564504)
-Kazakhstan (revision 1012748504)
-LCCN (identifier) (revision 1006934344)
-Ladakh (revision 1010799326)
-Latin (revision 1012971392)
-Least chipmunk (revision 1010419221)
+Eastern chipmunk (revision 1120765340)
+Golden-mantled ground squirrel (revision 1121777526)
+Tuolumne Meadows (revision 1094508214)
+Cascade Range (revision 1114533492)
+Mammal Species of the World (revision 1093112025)
+Franklin's ground squirrel (revision 1121361872)
+Ladakh (revision 1124124745)
+Groundhog (revision 1117813429)
+Natural reservoir (revision 1110806364)
+Neotamias (revision 1117512650)
+Yosemite National Park (revision 1125019703)
+Ontario (revision 1125244433)
+Russet ground squirrel (revision 1121469545)
+Bat (revision 1125180714)
+Wayback Machine (revision 1125067302)
+Long-eared chipmunk (revision 1121298477)
+Southern Idaho ground squirrel (revision 1121468339)
+Moss (revision 1122019251)
+Altai Mountains (revision 1124752508)
+Townsend's ground squirrel (revision 1121468829)
+Richardson's ground squirrel (revision 1122297225)
+Utah prairie dog (revision 1125084849)
+Yersinia pestis (revision 1121719480)
+European ground squirrel (revision 1121469378)
+Spermophilus relictus (revision 1121469745)
+Least chipmunk (revision 1120765536)
+Panamint chipmunk (revision 1121299808)
+Catalogue of Life (revision 1118132647)
+Gray marmot (revision 1122462225)
+Columbian ground squirrel (revision 1124139650)
+Alberni-Clayoquot Regional District (revision 1109499216)
+La Tania (revision 1115267378)
+Populus tremuloides (revision 1120966005)
+Paradise River Waterfalls (revision 1054159583)
+Long-tongued nectar bat (revision 1123039710)
+Happy Isles (revision 1113517959)
+Tourism in France (revision 1120671901)
+Otospermophilus (revision 1093268410)
+History of Canada (revision 1123782373)
+California chipmunk (revision 1121299691)
+Mexican ground squirrel (revision 1121470340)
+White-tailed antelope squirrel (revision 1121470211)
+Sedentism (revision 1110063134)
+Terabyte (revision 1123174616)
+Tamias (revision 1121473202)
+RECAP US Federal Court Documents (collection) (revision 1122929164)
+Belding's ground squirrel (revision 1121468288)
+Cannibalism (revision 1125092745)
+Yellow-pine chipmunk (revision 1121473478)
+Monoclonal antibody therapy (revision 1114372687)
+Menzbier's marmot (revision 1121471953)
+Black-footed ferret (revision 1123500226)
+Floods in Bihar (revision 1119748410)
+Mammal (revision 1124779293)
+Alaska marmot (revision 1124026979)
+Sierra Madre ground squirrel (revision 1121471267)
+Computer security (revision 1125370428)
+Kedarnath Temple (revision 1122647471)
+Frog Creek Cabin (revision 1048164755)
+Outline of botany (revision 1100540741)
+Agriculture in Nepal (revision 1088978356)
+Plant evolution (revision 1116709561)
+Little ground squirrel (revision 1121469707)
+Dicranales (revision 1110407415)
+Ultrasound (revision 1117397225)
+White-tailed prairie dog (revision 1121472368)
+Espíritu Santo antelope squirrel (revision 1121470113)
+Brown County, Wisconsin (revision 1122831345)
+Timeline of audio formats (revision 1120236679)
+List of mountain peaks of Uttarakhand (revision 1121014571)
+Antiviral drug (revision 1118217791)
+California ground squirrel (revision 1121359049)
+Red-tailed chipmunk (revision 1121297616)
+Bobak marmot (revision 1121471769)
+National Register of Historic Places listings in the Northern Mariana Islands (revision 1115478435)
+Spermophilus pallidicauda (revision 1121469669)
+Yellow-bellied marmot (revision 1121472145)
+Sexually transmitted infection (revision 1122774900)
+List of Yosemite destinations (revision 1119350249)
+Baitarani River (revision 1118320499)
+Baja California rock squirrel (revision 1121471079)
+Years of Lead (Italy) (revision 1123769084)
+Snow leopard (revision 1122462489)
+Coyote (revision 1125069820)
+Villard-Reculas (revision 1077275360)
+Vancouver Island (revision 1121908258)
+Sciurotamias (revision 1120570732)
+Canada 2021 Census (revision 1114664828)
+Time in Canada (revision 1120998431)
+Forrest's rock squirrel (revision 1121471379)
+Via Lattea (revision 1110201667)
+Phylogenetic tree (revision 1117394267)
+Hibernation (revision 1115607389)
+Altai wapiti (revision 1111750851)
+Alpine chipmunk (revision 1121473423)
+Schist (revision 1116202480)
+Rodent (revision 1123634696)
+Nepalese literature (revision 1117603265)
+Unification of Nepal (revision 1125350055)
+CBC News (revision 1124984918)
+Harris's antelope squirrel (revision 1121470079)
+Alpine meadow (revision 1114658726)
+Himalayan marmot (revision 1113552191)
+Merriam's ground squirrel (revision 1121468396)
+Heliscomyidae (revision 1010405407)
+Siberian chipmunk (revision 1121472776)
+1980 eruption of Mount St. Helens (revision 1123425632)
+Tarbagan marmot (revision 1121488248)
+Uinta chipmunk (revision 1121367930)
+Asia Minor ground squirrel (revision 1121357197)
+San Bernardino National Forest (revision 1113614977)
+British Columbia (revision 1124903693)
+List of Web archiving initiatives (revision 1120507741)
+2011 Kashgar attacks (revision 1124413350)
+Genus (revision 1125331312)
+IUCN Red List (revision 1123293379)
+Attack rate (revision 1118026995)
+Atlas of Living Australia (revision 1069034125)
+Riparian zone (revision 1100819694)
+Natural History Museum of Los Angeles County (revision 1118638991)
+Flying squirrel typhus (revision 1108887986)
+New Scientist (revision 1121186695)
+Sonoma chipmunk (revision 1121298317)
+Basic reproduction number (revision 1122698892)
+Homeothermic (revision 1082125124)
+Library Genesis (revision 1123879366)
+Ecological succession (revision 1116584234)
+Taurus ground squirrel (revision 1121469893)
+Edmund Jaeger (revision 1042985886)
+Wolverine (revision 1123904337)
+Puget Sound (revision 1124438931)
+List of highest points of European countries (revision 1125124917)
+Amburiq Mosque (revision 1101963105)
+Mohave ground squirrel (revision 1121470764)
+Kali Gandaki Gorge (revision 1091465924)
+Palmer's chipmunk (revision 1121473732)
+Citizen Science Association (revision 1076637865)
+Alpha male (revision 1123599649)
+Thermotogota (revision 1108216914)
+Gray-footed chipmunk (revision 1121473564)
+ISSN (identifier) (revision 1117323780)
+The Daily Excelsior (revision 1073376573)
+National Center for Biotechnology Information (revision 1117911694)
+Haridwar (revision 1124587996)
+Ground squirrel (revision 1106618817)
+ISBN (identifier) (revision 1124259962)
+Breton language (revision 1123193740)
+Notocitellus (revision 1092528025)
+Wayback Machine (Peabody's Improbable History) (revision 1125111405)
+Social animal (revision 1118899517)
+Conservation status (revision 1124721586)
+Doi (identifier) (revision 1121872952)
+Drop (liquid) (revision 1115117361)
+Monogamy in animals (revision 1115061008)
+Grand Slam (tennis) (revision 1125138113)
+Synonym (taxonomy) (revision 1115465643)
+Encyclopedia of Life (revision 1123215390)
+Algonquian languages (revision 1118973728)
+Circulatory system (revision 1123361226)
+Kenneth Oppel (revision 1115838353)
+Red-cheeked ground squirrel (revision 1121469468)
+Prairie dog (revision 1125350300)
+Zygomasseteric system (revision 1093682242)
+Black-tailed prairie dog (revision 1120101763)
+Scenic Beach State Park (revision 1085870429)
+Fashion capital (revision 1122240170)
+Herbivory (revision 1124405692)
+Artemisia tridentata (revision 1097902309)
+ARKive (revision 1028182358)
+Emblem of Uttarakhand (revision 1085229611)
+Northern Italy (revision 1122409316)
+Bibcode (identifier) (revision 1119780351)
+Squirrel (revision 1121741651)
+Birch Bay State Park (revision 1068937174)
+Whistling (revision 1124843854)
+Gobiomyidae (revision 1090208761)
== End of Parsed pages ==
-- Wikipedia parsing ended at: 2021-03-19 23:29:33.380471
+- Wikipedia parsing ended at: 2022-12-03 20:32:27.933336
-59 characters appeared 59 times.
+58 characters appeared 2027474 times.
Most Frequent characters:
-[ 0] Char m: 1.694915254237288 %
-[ 1] Char a: 1.694915254237288 %
-[ 2] Char r: 1.694915254237288 %
-[ 3] Char o: 1.694915254237288 %
-[ 4] Char t: 1.694915254237288 %
-[ 5] Char s: 1.694915254237288 %
-[ 6] Char e: 1.694915254237288 %
-[ 7] Char l: 1.694915254237288 %
-[ 8] Char i: 1.694915254237288 %
-[ 9] Char v: 1.694915254237288 %
-[10] Char y: 1.694915254237288 %
-[11] Char g: 1.694915254237288 %
-[12] Char u: 1.694915254237288 %
-[13] Char n: 1.694915254237288 %
-[14] Char d: 1.694915254237288 %
-[15] Char q: 1.694915254237288 %
-[16] Char h: 1.694915254237288 %
-[17] Char w: 1.694915254237288 %
-[18] Char p: 1.694915254237288 %
-[19] Char c: 1.694915254237288 %
-[20] Char b: 1.694915254237288 %
-[21] Char f: 1.694915254237288 %
-[22] Char k: 1.694915254237288 %
-[23] Char x: 1.694915254237288 %
-[24] Char z: 1.694915254237288 %
-[25] Char j: 1.694915254237288 %
-[26] Char á: 1.694915254237288 %
-[27] Char ö: 1.694915254237288 %
-[28] Char ä: 1.694915254237288 %
-[29] Char í: 1.694915254237288 %
-[30] Char ç: 1.694915254237288 %
-[31] Char ô: 1.694915254237288 %
-[32] Char à: 1.694915254237288 %
-[33] Char ü: 1.694915254237288 %
-[34] Char æ: 1.694915254237288 %
-[35] Char é: 1.694915254237288 %
-[36] Char ï: 1.694915254237288 %
-[37] Char û: 1.694915254237288 %
-[38] Char ó: 1.694915254237288 %
-[39] Char µ: 1.694915254237288 %
-[40] Char è: 1.694915254237288 %
-[41] Char ì: 1.694915254237288 %
-[42] Char î: 1.694915254237288 %
-[43] Char ë: 1.694915254237288 %
-[44] Char ð: 1.694915254237288 %
-[45] Char ý: 1.694915254237288 %
-[46] Char š: 1.694915254237288 %
-[47] Char ñ: 1.694915254237288 %
-[48] Char œ: 1.694915254237288 %
-[49] Char ê: 1.694915254237288 %
-[50] Char â: 1.694915254237288 %
-[51] Char ø: 1.694915254237288 %
-[52] Char þ: 1.694915254237288 %
-[53] Char å: 1.694915254237288 %
-[54] Char ß: 1.694915254237288 %
-[55] Char ã: 1.694915254237288 %
-[56] Char ž: 1.694915254237288 %
-[57] Char õ: 1.694915254237288 %
-[58] Char ú: 1.694915254237288 %
+[ 0] Char e: 11.847648847778073 %
+[ 1] Char a: 8.861519309248848 %
+[ 2] Char t: 8.523956410785045 %
+[ 3] Char i: 7.880199696765532 %
+[ 4] Char n: 7.477629799445023 %
+[ 5] Char o: 7.206405606187798 %
+[ 6] Char s: 6.8668698094278895 %
+[ 7] Char r: 6.763489938711914 %
+[ 8] Char l: 4.301066252884131 %
+[ 9] Char h: 4.232754649381447 %
+[10] Char d: 3.7247333381340524 %
+[11] Char c: 3.556839693135399 %
+[12] Char u: 2.763981190387645 %
+[13] Char m: 2.7244739020081146 %
+[14] Char p: 2.17398595493703 %
+[15] Char f: 2.1424195821993277 %
+[16] Char g: 2.0356364619225698 %
+[17] Char b: 1.575457934355755 %
+[18] Char y: 1.572005362337569 %
+[19] Char w: 1.3260835897279077 %
+[20] Char v: 1.1594230061643207 %
+[21] Char k: 0.6102667654431081 %
+[22] Char x: 0.2356133790125052 %
+[23] Char z: 0.13746168878121248 %
+[24] Char j: 0.1346503087092609 %
+[25] Char q: 0.1320855409243226 %
-The first 59 characters have an accumulated ratio of 0.9999999999999989.
+The first 26 characters have an accumulated ratio of 0.9996665801879581.
-920 sequences found.
+863 sequences found.
-First 378 (typical positive ratio): 0.9950109024233114
-Next 182 (560-378): 0.003993012537786833
-Rest: 0.000996085038901806
+First 369 (typical positive ratio): 0.9950424985513596
+Next 125 (494-369): 0.003963798368833871
+Rest: 0.0009937030798065072
-- Processing end: 2021-03-19 23:29:33.474226
+- Processing end: 2022-12-03 20:32:28.010953
diff --git a/src/LangModels/LangEnglishModel.cpp b/src/LangModels/LangEnglishModel.cpp
index dfe86f3..e06d15c 100644
--- a/src/LangModels/LangEnglishModel.cpp
+++ b/src/LangModels/LangEnglishModel.cpp
@@ -42,7 +42,7 @@
/**
* Generated by BuildLangModel.py
- * On: 2021-03-19 23:29:33.380823
+ * On: 2022-12-03 20:32:27.947524
**/
/* Character Mapping Table:
@@ -68,18 +68,18 @@ static const unsigned char Iso_8859_1_CharToOrderMap[] =
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 1, 20, 19, 14, 6, 21, 11, 16, 8, 25, 22, 7, 0, 13, 3, /* 4X */
- 18, 15, 2, 5, 4, 12, 9, 17, 23, 10, 24,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 1, 20, 19, 14, 6, 21, 11, 16, 8, 25, 22, 7, 0, 13, 3, /* 6X */
- 18, 15, 2, 5, 4, 12, 9, 17, 23, 10, 24,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ SYM, 1, 17, 11, 10, 0, 15, 16, 9, 3, 24, 21, 8, 13, 4, 5, /* 4X */
+ 14, 25, 7, 6, 2, 12, 20, 19, 22, 18, 23,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 1, 17, 11, 10, 0, 15, 16, 9, 3, 24, 21, 8, 13, 4, 5, /* 6X */
+ 14, 25, 7, 6, 2, 12, 20, 19, 22, 18, 23,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
- SYM,SYM,SYM,SYM,SYM, 39,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
- 32, 26, 50, 55, 28, 53, 34, 30, 40, 35, 49, 43, 41, 29, 42, 36, /* CX */
- 44, 47, 59, 38, 31, 57, 27,SYM, 51, 60, 58, 37, 33, 45, 52, 54, /* DX */
- 32, 26, 50, 55, 28, 53, 34, 30, 40, 35, 49, 43, 41, 29, 42, 36, /* EX */
- 44, 47, 61, 38, 31, 57, 27,SYM, 51, 62, 58, 37, 33, 45, 52, 63, /* FX */
+ SYM,SYM,SYM,SYM,SYM, 52,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
+ 39, 36, 30, 47, 46, 58, 43, 31, 27, 26, 38, 44, 50, 35, 40, 42, /* CX */
+ 48, 29, 53, 37, 32, 59, 28,SYM, 49, 34, 55, 45, 33, 51, 60, 57, /* DX */
+ 39, 36, 30, 47, 46, 61, 43, 31, 27, 26, 38, 44, 50, 35, 40, 42, /* EX */
+ 48, 29, 53, 37, 32, 62, 28,SYM, 49, 34, 55, 45, 33, 51, 63, 64, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
@@ -89,170 +89,70 @@ static const unsigned char Windows_1252_CharToOrderMap[] =
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 1, 20, 19, 14, 6, 21, 11, 16, 8, 25, 22, 7, 0, 13, 3, /* 4X */
- 18, 15, 2, 5, 4, 12, 9, 17, 23, 10, 24,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 1, 20, 19, 14, 6, 21, 11, 16, 8, 25, 22, 7, 0, 13, 3, /* 6X */
- 18, 15, 2, 5, 4, 12, 9, 17, 23, 10, 24,SYM,SYM,SYM,SYM,CTR, /* 7X */
- SYM,ILL,SYM, 64,SYM,SYM,SYM,SYM,SYM,SYM, 46,SYM, 48,ILL, 56,ILL, /* 8X */
- ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 46,SYM, 48,ILL, 56, 65, /* 9X */
+ SYM, 1, 17, 11, 10, 0, 15, 16, 9, 3, 24, 21, 8, 13, 4, 5, /* 4X */
+ 14, 25, 7, 6, 2, 12, 20, 19, 22, 18, 23,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 1, 17, 11, 10, 0, 15, 16, 9, 3, 24, 21, 8, 13, 4, 5, /* 6X */
+ 14, 25, 7, 6, 2, 12, 20, 19, 22, 18, 23,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ SYM,ILL,SYM, 65,SYM,SYM,SYM,SYM,SYM,SYM, 54,SYM, 41,ILL, 56,ILL, /* 8X */
+ ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 54,SYM, 41,ILL, 56, 66, /* 9X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
- SYM,SYM,SYM,SYM,SYM, 39,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
- 32, 26, 50, 55, 28, 53, 34, 30, 40, 35, 49, 43, 41, 29, 42, 36, /* CX */
- 44, 47, 66, 38, 31, 57, 27,SYM, 51, 67, 58, 37, 33, 45, 52, 54, /* DX */
- 32, 26, 50, 55, 28, 53, 34, 30, 40, 35, 49, 43, 41, 29, 42, 36, /* EX */
- 44, 47, 68, 38, 31, 57, 27,SYM, 51, 69, 58, 37, 33, 45, 52, 70, /* FX */
+ SYM,SYM,SYM,SYM,SYM, 52,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
+ 39, 36, 30, 47, 46, 67, 43, 31, 27, 26, 38, 44, 50, 35, 40, 42, /* CX */
+ 48, 29, 53, 37, 32, 68, 28,SYM, 49, 34, 55, 45, 33, 51, 69, 57, /* DX */
+ 39, 36, 30, 47, 46, 70, 43, 31, 27, 26, 38, 44, 50, 35, 40, 42, /* EX */
+ 48, 29, 53, 37, 32, 71, 28,SYM, 49, 34, 55, 45, 33, 51, 72, 73, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
-static const int Unicode_Char_size = 117;
+static const int Unicode_Char_size = 52;
static const unsigned int Unicode_CharOrder[] =
{
- 65, 1, 66, 20, 67, 19, 68, 14, 69, 6, 70, 21, 71, 11, 72, 16,
- 73, 8, 74, 25, 75, 22, 76, 7, 77, 0, 78, 13, 79, 3, 80, 18,
- 81, 15, 82, 2, 83, 5, 84, 4, 85, 12, 86, 9, 87, 17, 88, 23,
- 89, 10, 90, 24, 97, 1, 98, 20, 99, 19, 100, 14, 101, 6,102, 21,
- 103, 11, 104, 16, 105, 8, 106, 25, 107, 22, 108, 7, 109, 0,110, 13,
- 111, 3, 112, 18, 113, 15, 114, 2, 115, 5, 116, 4, 117, 12,118, 9,
- 119, 17, 120, 23, 121, 10, 122, 24, 181, 39, 192, 32, 193, 26,194, 50,
- 195, 55, 196, 28, 197, 53, 198, 34, 199, 30, 200, 40, 201, 35,202, 49,
- 203, 43, 204, 41, 205, 29, 206, 42, 207, 36, 208, 44, 209, 47,211, 38,
- 212, 31, 213, 57, 214, 27, 216, 51, 218, 58, 219, 37, 220, 33,221, 45,
- 222, 52, 223, 54, 224, 32, 225, 26, 226, 50, 227, 55, 228, 28,229, 53,
- 230, 34, 231, 30, 232, 40, 233, 35, 234, 49, 235, 43, 236, 41,237, 29,
- 238, 42, 239, 36, 240, 44, 241, 47, 243, 38, 244, 31, 245, 57,246, 27,
- 248, 51, 250, 58, 251, 37, 252, 33, 253, 45, 254, 52, 338, 48,339, 48,
- 352, 46, 353, 46, 381, 56, 382, 56, 924, 39,
+ 65, 1, 66, 17, 67, 11, 68, 10, 69, 0, 70, 15, 71, 16, 72, 9,
+ 73, 3, 74, 24, 75, 21, 76, 8, 77, 13, 78, 4, 79, 5, 80, 14,
+ 81, 25, 82, 7, 83, 6, 84, 2, 85, 12, 86, 20, 87, 19, 88, 22,
+ 89, 18, 90, 23, 97, 1, 98, 17, 99, 11, 100, 10, 101, 0,102, 15,
+ 103, 16, 104, 9, 105, 3, 106, 24, 107, 21, 108, 8, 109, 13,110, 4,
+ 111, 5, 112, 14, 113, 25, 114, 7, 115, 6, 116, 2, 117, 12,118, 20,
+ 119, 19, 120, 22, 121, 18, 122, 23,
};
/* Model Table:
- * Total considered sequences: 920 / 3481
- * - Positive sequences: first 378 (0.9950109024233114)
- * - Probable sequences: next 182 (560-378) (0.003993012537786833)
- * - Neutral sequences: last 2921 (0.000996085038901806)
- * - Negative sequences: 2561 (off-ratio)
+ * Total considered sequences: 863 / 676
+ * - Positive sequences: first 369 (0.9950424985513596)
+ * - Probable sequences: next 125 (494-369) (0.003963798368833871)
+ * - Neutral sequences: last 182 (0.0009937030798065072)
+ * - Negative sequences: -187 (off-ratio)
* Negative sequences: TODO
*/
static const PRUint8 EnglishLangModel[] =
{
- 3,3,1,3,2,3,3,2,3,1,3,2,3,3,2,1,2,1,3,2,3,2,1,1,1,1,2,1,1,
- 1,0,0,1,1,1,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,
- 3,2,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,0,0,0,
- 0,1,0,0,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,1,2,1,1,1,1,
- 1,0,1,0,0,0,2,0,1,1,0,1,0,1,0,0,0,0,0,0,1,2,0,0,0,0,0,0,0,1,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,0,0,0,
- 0,0,0,0,0,0,1,2,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,2,3,2,3,3,1,2,3,3,2,3,2,2,2,0,3,0,1,0,0,
- 1,0,0,1,1,0,2,0,0,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,
- 3,3,3,3,3,3,3,3,3,1,3,2,3,3,2,3,3,3,3,3,3,3,3,1,1,1,1,1,0,
- 1,0,0,0,0,0,2,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,1,0,0,
- 1,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,2,3,3,3,3,3,3,0,2,1,1,0,1,
- 1,0,1,0,0,0,2,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,2,
- 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,1,3,3,3,3,3,3,3,2,1,0,0,
- 1,0,0,0,0,0,1,0,0,1,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
- 1,3,2,3,1,2,3,2,3,2,2,1,2,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0,0,
- 1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,1,2,2,3,3,3,0,1,2,3,3,2,1,2,2,2,1,1,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,1,3,2,2,1,2,2,1,0,2,1,1,2,0,
- 1,0,0,0,0,0,2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,1,2,1,3,3,3,3,3,3,2,2,1,0,0,
- 1,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,2,2,3,1,1,0,
- 2,2,1,0,0,1,2,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
- 3,3,3,3,2,3,3,3,3,2,3,3,3,2,3,2,3,3,3,2,2,3,1,1,2,2,0,0,0,
- 1,0,1,0,1,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,
- 0,2,0,0,1,1,0,0,2,1,1,0,3,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,1,3,2,3,3,2,2,1,3,1,2,2,1,1,0,1,0,1,1,1,
- 1,0,1,0,1,1,2,0,0,1,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,1,
- 1,3,3,3,3,3,3,3,3,0,2,1,1,3,2,0,3,2,1,2,2,2,2,0,0,0,0,0,0,
- 0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,0,3,2,3,1,2,0,3,2,3,2,2,0,2,1,1,0,1,0,0,
- 2,0,1,0,0,2,2,0,0,1,0,1,0,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,0,3,2,3,2,2,2,3,0,0,3,1,2,3,0,2,0,1,0,0,
- 1,0,1,1,0,0,2,0,1,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
- 2,3,3,3,3,3,3,3,3,1,3,2,3,3,2,0,2,1,2,3,3,2,1,0,1,3,0,0,0,
- 0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,
- 2,3,3,3,3,2,3,3,3,0,3,1,3,1,2,0,0,1,1,2,1,3,1,0,0,1,0,1,0,
- 1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,2,3,3,3,3,0,3,2,3,3,1,1,3,2,2,2,2,1,1,0,1,1,1,2,0,
- 0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,
- 1,3,0,3,3,2,3,0,3,2,2,0,3,0,1,1,2,1,3,3,0,2,0,2,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 1,3,1,3,2,2,3,2,3,1,2,0,2,2,1,0,2,1,0,0,2,0,1,0,2,1,0,0,0,
- 0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,3,2,3,0,3,3,1,3,1,1,0,3,1,0,0,1,1,1,0,0,0,1,0,0,2,1,1,0,
- 0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 1,1,1,0,1,1,0,2,0,1,0,1,1,2,0,0,0,0,1,1,1,0,0,1,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 1,0,1,0,2,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,2,1,0,2,2,0,1,0,1,0,0,0,2,1,1,0,0,1,0,1,0,0,0,1,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,2,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,
- 1,0,0,0,2,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,1,0,1,1,0,0,0,0,0,0,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 2,1,2,1,2,2,2,2,1,2,0,2,1,2,2,1,0,0,2,2,1,0,0,0,1,1,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,2,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 1,0,1,0,0,2,0,1,0,1,0,0,0,2,1,0,0,1,0,1,0,0,1,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 1,0,2,0,0,1,0,0,0,1,0,1,0,1,1,2,0,0,0,1,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,1,0,2,1,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 1,0,0,0,2,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 2,0,0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,
+ 3,2,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,
+ 3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,2,2,2,3,3,2,2,1,2,1,0,
+ 3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,3,3,3,3,2,3,
+ 3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,3,3,2,3,2,3,3,2,2,3,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,
+ 3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,3,2,3,3,3,2,3,1,1,1,3,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,2,1,1,
+ 3,3,3,3,2,3,3,2,3,2,3,3,3,3,3,3,3,3,3,3,3,3,1,1,1,1,
+ 3,3,3,3,3,3,3,3,3,1,2,2,3,3,2,1,2,2,3,3,2,1,0,2,1,2,
+ 3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,3,1,1,1,2,2,
+ 3,3,3,3,2,3,3,3,3,3,2,3,3,2,1,2,1,3,3,1,1,3,1,1,0,2,
+ 3,3,3,3,3,3,3,3,3,1,3,3,2,3,3,3,3,3,2,2,3,3,2,2,2,1,
+ 3,3,2,3,3,3,3,2,2,1,1,2,3,3,3,2,2,3,3,1,1,1,0,1,1,1,
+ 3,3,3,3,2,3,3,3,3,3,2,2,3,3,3,1,2,2,3,1,1,2,1,0,1,1,
+ 3,3,3,3,1,3,3,3,3,0,1,1,3,1,1,3,1,1,2,1,1,1,0,0,1,1,
+ 3,3,3,3,3,3,3,3,3,3,3,1,3,3,2,1,3,2,3,2,1,1,1,1,1,1,
+ 3,3,2,3,3,3,3,3,3,3,2,3,3,2,1,2,2,3,3,1,1,1,0,1,3,0,
+ 3,3,3,3,3,3,3,2,3,1,2,3,2,3,3,1,3,3,1,2,1,1,1,2,1,0,
+ 3,3,3,3,3,3,3,3,3,3,2,1,1,1,1,2,1,2,2,1,0,2,0,0,0,0,
+ 3,3,1,3,1,3,1,2,1,1,1,1,2,1,1,1,1,0,2,0,1,1,0,0,0,1,
+ 3,3,2,3,3,3,3,2,3,3,2,1,3,3,2,1,2,2,3,2,1,2,0,0,1,1,
+ 3,3,3,3,1,3,1,0,0,2,0,3,3,1,3,2,0,0,2,1,1,0,1,0,0,1,
+ 3,3,1,3,1,3,1,1,2,2,1,1,2,1,0,1,0,2,2,1,1,1,0,2,1,0,
+ 3,3,0,2,1,3,2,2,0,1,0,0,3,1,1,0,0,1,1,1,1,1,1,0,1,0,
+ 1,1,1,2,0,1,0,1,1,0,0,1,3,0,0,1,0,0,0,0,0,0,0,0,0,0,
};
@@ -260,8 +160,8 @@ const SequenceModel Iso_8859_1EnglishModel =
{
Iso_8859_1_CharToOrderMap,
EnglishLangModel,
- 59,
- (float)0.9990039149610982,
+ 26,
+ (float)0.9990062969201935,
PR_TRUE,
"ISO-8859-1",
"en"
@@ -271,8 +171,8 @@ const SequenceModel Windows_1252EnglishModel =
{
Windows_1252_CharToOrderMap,
EnglishLangModel,
- 59,
- (float)0.9990039149610982,
+ 26,
+ (float)0.9990062969201935,
PR_TRUE,
"WINDOWS-1252",
"en"
@@ -282,8 +182,8 @@ const LanguageModel EnglishModel =
{
"en",
Unicode_CharOrder,
- 117,
+ 52,
EnglishLangModel,
- 59,
- (float)0.9999999999999989,
+ 26,
+ (float)0.9996665801879581,
};