diff options
author | Jehan <jehan@girinstud.io> | 2022-11-30 20:33:11 +0100 |
---|---|---|
committer | Jehan <jehan@girinstud.io> | 2022-12-14 00:24:53 +0100 |
commit | 0be80a21db41321da0a33ffc6b5d272a712cbf6c (patch) | |
tree | b758cf3ca3856f8b064d8d11a478a14113683106 | |
parent | 784f614c849b3482d7fbbac9d6ddb3f8e1fffd82 (diff) |
script, src: update Norwegian model with the new language features.
As I just rebased my branch about new language detection API, I needed
to re-generate Norwegian language models. Unfortunately it doesn't
detect UTF-8 Norwegian text, though not far off (it detects it as second
candidate with high 91% confidence; beaten by Danish UTF-8 with 94%
confidence unfortunately!).
Note that I also update the alphabet list for Norwegian as there were
too many letters in there (according to Wikipedia at least), so even
when training a model, we had some missing characters in the training
set.
-rw-r--r-- | script/BuildLangModelLogs/LangNorwegianModel.log | 234 | ||||
-rw-r--r-- | script/langs/no.py | 2 | ||||
-rw-r--r-- | src/LangModels/LangNorwegianModel.cpp | 293 | ||||
-rw-r--r-- | src/nsLanguageDetector.h | 1 | ||||
-rw-r--r-- | src/nsMBCSGroupProber.cpp | 1 | ||||
-rw-r--r-- | src/nsMBCSGroupProber.h | 2 |
6 files changed, 352 insertions, 181 deletions
diff --git a/script/BuildLangModelLogs/LangNorwegianModel.log b/script/BuildLangModelLogs/LangNorwegianModel.log new file mode 100644 index 0000000..9c066bf --- /dev/null +++ b/script/BuildLangModelLogs/LangNorwegianModel.log @@ -0,0 +1,234 @@ += Logs of language model for Norwegian (no) = + +- Generated by BuildLangModel.py +- Started: 2022-11-30 20:26:27.916571 +- Maximum depth: 2 +- Max number of pages: 200 + +== Parsed pages == + +Norsk (revision 22974717) +Saft (revision 22967608) +Hund (revision 23005187) +Valg i Norge (revision 22782362) +Asia (revision 23117912) +Saarloos wolfhond (revision 22789727) +Østfold (revision 23055508) +Fårehunder (revision 22264555) +Stripesjakal (revision 18745363) +12. mai (revision 23118103) +Gullsjakal (revision 23104601) +Urhund (revision 23050226) +E (revision 22904440) +Luxembourgsk (revision 22813155) +Obstruent (revision 15267134) +Gudbrandsdalen (revision 23014277) +Norges berggrunn (revision 21768509) +Riksforsamlingen (revision 22999081) +Sosiolekt (revision 21458982) +Habitat (revision 23123646) +Norsk språkhistorie (20. århundre) (revision 22891154) +Søsterart (revision 20748512) +Halvdan Koht (revision 22303367) +Plosiver (revision 21816753) +Svorsk (revision 20789512) +Skandinavia (revision 22814296) +Partisipp (revision 22785842) +H (revision 23086416) +Kreft (revision 23050449) +Kreft hos hunder (revision 21811805) +Q (revision 23024714) +Fédération Cynologique Internationale (revision 22172054) +Rosin (revision 22818749) +Tribus (biologi) (revision 21339936) +Siste istids maksimum (revision 23141296) +Laurents Hallager (revision 22655416) +Canider (revision 22229857) +Individ (revision 20992252) +Stortingsvalg 1945– (revision 22861299) +Svalbards geologi (revision 22935346) +Riksmålsvernet (revision 22966421) +Magedreining (hund) (revision 21661370) +Stortinget (revision 23071662) +Bokmål (revision 22928969) +Recessiv (revision 21780786) +Synkopetida (revision 22906353) +Artskompleks (revision 20848344) +Homogenitet (revision 22857280) +Pyometra (hund) (revision 22374115) +Den norske språkstriden (revision 22428585) +Gruppe (biologi) (revision 21969525) +Stående fuglehunder (revision 22264516) +Samnorsk (revision 22785915) +Fastlands-Norge (revision 23141642) +Drivende hunder (revision 22264618) +Sibir (revision 22369404) +Norges demografi (revision 23034159) +FCI (revision 22172054) +Vannhunder (revision 22264145) +Prednisolon (revision 21804718) +Midtvesten (revision 22423559) +Buskerud (revision 22915767) +Sogn og Fjordane (revision 22811825) +Transport i Norge (revision 23131810) +Ustemt palatal frikativ (revision 19011330) +Anatolsk gjeterhund (revision 22303224) +Norges fylker (revision 23129287) +Tonelag (revision 22751959) +Statsforvalter (revision 23133685) +Sjokolade (revision 22988920) +Nasaler (revision 16002502) +Hundens pels (revision 22900550) +Approksimanter (revision 16000119) +Tapper (revision 18322970) +Vakt- og vokterhunder (revision 23091054) +Saluki (revision 22267261) +Canis (revision 23079627) +Island (revision 23097723) +Flyball (revision 20457011) +Staffordshire bull terrier (revision 23135078) +Stockholm (revision 22770528) +Sahel (revision 19821400) +ISO 639-3 (revision 18859824) +Ny-guinea villhund (revision 22567866) +Rabies (revision 19440055) +Ordbog over det norske Folkesprog (revision 23096800) +Norge (revision 23141642) +Flåttbårne sykdommer (hund) (revision 21355504) +Bombehund (revision 22942055) +Læreboknormalen av 1959 (revision 18841941) +Tromøy (revision 22053767) +Vorstehhund korthåret (revision 22264532) +Tåkeskog (revision 20461967) +Vest-Telemark (revision 22923647) +Oslo (revision 23118371) +Tyrkia (revision 23034073) +Liste over Norges største tettsteder (revision 23138252) +Energi (revision 22979461) +Jakt med hund (revision 22890790) +Sogn fogderi (revision 22425444) +Integrated Taxonomic Information System (revision 20457376) +Tadsjikistan (revision 22864814) +Befolkningstetthet (revision 22253839) +Tøddel (revision 21641445) +Den lille istid (revision 22782643) +Norsk språkhistorie (1400–1800) (revision 21342667) +Unionen mellom Sverige og Norge (revision 22922743) +Fylkeskommune (revision 22011606) +ĸ (revision 17096887) +Degas (revision 22751270) +Gløgg (revision 22902469) +Antistoff (revision 20746889) +Norges statsminister (revision 22948566) +Lørdag (revision 23031303) +Ş (revision 12094187) +Hallingdal (revision 22811584) +1969 (revision 22958238) +Juli (revision 22359558) +Shar pei (revision 22891357) +Dyr (revision 23101991) +Ƙ (revision 15223100) +PhyloCode (revision 22857413) +Y-kromosom (revision 22783781) +Høst (revision 23087627) +Geit (revision 21989005) +Guatemala (revision 22780680) +USA (revision 22781448) +Tamhund (revision 23005187) +Populasjonsdynamikk (revision 20640003) +Christoffer Oftedahl (revision 19783269) +Mellomnorsk (revision 22546096) +1000 (revision 20456192) +Servicehund (revision 22337757) +Himalayaulv (revision 21791662) +Ø (bokstav) (revision 22617366) +Ǩ (revision 15223173) +Bordeaux dogge (revision 22266230) +Frøplanter (revision 21763501) +Ustemt bilabial plosiv (revision 22354758) +Digraf (revision 19954081) +12. århundre (revision 23123540) +Sametingsvalget 1993 (revision 21890290) +Førerhund (revision 20465384) +Grenada (revision 22948831) +Aserbajdsjans administrative inndeling (revision 22782483) +Verneområder i Norge (revision 22076171) +Pelsdyroppdrett (revision 22827568) +Kretahund (revision 22201230) +Etne (revision 22659600) +Koreansk chejudo (revision 22199018) +Riesenschnauzer (revision 23103775) +Italias regioner (revision 22182270) +Dingo (revision 23050226) +Firfisle (revision 21650282) +Dominans (revision 21160764) +CITES (revision 22637082) +Helligdager i Norge (revision 22095322) +Bunad (revision 23086915) +Barnekreftforeningen (revision 19888945) +Guttorm Hansen (revision 22098933) +Albania (revision 22939774) +Medier i Norge (revision 21776331) +Finsk (revision 22908244) +Anders Lysgaard (revision 22858529) +Bakverk (revision 15226081) +Ć (revision 15785421) +Vatikanstaten (revision 22782366) +Steinalderen i Norge (revision 23106147) +Johnny Depp (revision 22764203) +Sverre Steen (revision 22112509) +Fjellrev (revision 22812483) +Bayersk viltsporhund (revision 22805751) +Ń (revision 15222385) +Utdannelse i Norge (revision 22814897) +Espen Berntsen (revision 21025561) +Nederland (revision 23024484) +Liste over hundegrupper (revision 18570830) + +== End of Parsed pages == + +- Wikipedia parsing ended at: 2022-11-30 20:29:27.551046 + +62 characters appeared 1228749 times. + +Most Frequent characters: +[ 0] Char e: 15.049208585317261 % +[ 1] Char r: 8.84924423132796 % +[ 2] Char n: 8.422550089562636 % +[ 3] Char t: 7.726394894319344 % +[ 4] Char s: 6.64798099530498 % +[ 5] Char a: 6.28020856985438 % +[ 6] Char i: 5.99455218274847 % +[ 7] Char l: 5.422262805503809 % +[ 8] Char o: 5.386942329149403 % +[ 9] Char d: 4.534774799409806 % +[10] Char g: 3.86091870674971 % +[11] Char k: 3.6487516978650643 % +[12] Char m: 3.216197937902696 % +[13] Char v: 2.4669806445417253 % +[14] Char f: 2.0122091655822305 % +[15] Char u: 1.8136332155712844 % +[16] Char p: 1.6869189720602011 % +[17] Char b: 1.4243755233981878 % +[18] Char h: 1.3665117937023752 % +[19] Char å: 1.1134902246105591 % +[20] Char y: 0.8473658981614633 % +[21] Char ø: 0.792431977564173 % +[22] Char j: 0.7630525029928814 % +[23] Char c: 0.2926553755079353 % +[24] Char æ: 0.20012223814627725 % +[25] Char w: 0.05932863424507365 % +[26] Char z: 0.028565638710591017 % +[27] Char x: 0.023194322029967063 % +[28] Char é: 0.017171936660782636 % +[29] Char q: 0.009521879570197005 % + +The first 30 characters have an accumulated ratio of 0.9995751776807141. + +967 sequences found. + +First 442 (typical positive ratio): 0.9950425176429516 +Next 157 (599-442): 0.0039580060347621515 +Rest: 0.0009994763222862524 + +- Processing end: 2022-11-30 20:29:27.623923 diff --git a/script/langs/no.py b/script/langs/no.py index 93cf23f..b8d777f 100644 --- a/script/langs/no.py +++ b/script/langs/no.py @@ -48,7 +48,7 @@ charsets = ['IBM865', 'ISO-8859-15', 'ISO-8859-1', 'WINDOWS-1252'] ## Optional Properties ## # Alphabet characters. -alphabet = 'æøåéìîàêÆØÅ' +alphabet = 'æøå' # Some pages that should contain most norwegian-norwegian norwegian start_pages = ['Norsk', 'Saft', 'Hund'] wikipedia_code = code diff --git a/src/LangModels/LangNorwegianModel.cpp b/src/LangModels/LangNorwegianModel.cpp index f3a876d..2bc2281 100644 --- a/src/LangModels/LangNorwegianModel.cpp +++ b/src/LangModels/LangNorwegianModel.cpp @@ -36,12 +36,13 @@ * ***** END LICENSE BLOCK ***** */ #include "../nsSBCharSetProber.h" +#include "../nsLanguageDetector.h" /********* Language model for: Norwegian *********/ /** * Generated by BuildLangModel.py - * On: 2022-01-28 21:58:11.143599 + * On: 2022-11-30 20:29:27.551827 **/ /* Character Mapping Table: @@ -67,17 +68,17 @@ static const unsigned char Ibm865_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 5, 17, 23, 9, 0, 14, 11, 18, 6, 22, 10, 7, 12, 2, 8, /* 4X */ - 15, 29, 1, 4, 3, 16, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 5, 17, 23, 9, 0, 14, 11, 18, 6, 22, 10, 7, 12, 2, 8, /* 6X */ - 15, 29, 1, 4, 3, 16, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */ - 43, 32, 28, 50, 31, 45, 19, 43, 53, 42, 41, 57, 61, 58, 31, 19, /* 8X */ - 28, 24, 24, 37, 30, 54, 63, 59, 64, 30, 32, 21,SYM, 21,SYM,SYM, /* 9X */ - 36, 33, 35, 40, 44, 44,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ + SYM, 5, 17, 23, 9, 0, 14, 10, 18, 6, 22, 11, 7, 12, 2, 8, /* 4X */ + 16, 29, 1, 4, 3, 15, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 5, 17, 23, 9, 0, 14, 10, 18, 6, 22, 11, 7, 12, 2, 8, /* 6X */ + 16, 29, 1, 4, 3, 15, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */ + 37, 36, 28, 45, 31, 43, 19, 37, 53, 39, 44, 59, 56, 54, 31, 19, /* 8X */ + 28, 24, 24, 41, 30, 48, 62, 55, 63, 30, 36, 21,SYM, 21,SYM,SYM, /* 9X */ + 33, 34, 35, 40, 49, 49,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* CX */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* DX */ - 48, 46, 65, 66, 60, 60, 67, 62, 68, 69, 70, 71, 72, 73, 52,SYM, /* EX */ + 52, 60, 64, 65, 61, 61, 66, 47, 67, 68, 69, 70, 71, 72, 42,SYM, /* EX */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -88,18 +89,18 @@ static const unsigned char Iso_8859_15_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 5, 17, 23, 9, 0, 14, 11, 18, 6, 22, 10, 7, 12, 2, 8, /* 4X */ - 15, 29, 1, 4, 3, 16, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 5, 17, 23, 9, 0, 14, 11, 18, 6, 22, 10, 7, 12, 2, 8, /* 6X */ - 15, 29, 1, 4, 3, 16, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 5, 17, 23, 9, 0, 14, 10, 18, 6, 22, 11, 7, 12, 2, 8, /* 4X */ + 16, 29, 1, 4, 3, 15, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 5, 17, 23, 9, 0, 14, 10, 18, 6, 22, 11, 7, 12, 2, 8, /* 6X */ + 16, 29, 1, 4, 3, 15, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM, 47,SYM, 47,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM, 49, 74,SYM,SYM, 49,SYM,SYM,SYM, 51, 51, 75,SYM, /* BX */ - 45, 36, 50, 55, 31, 19, 24, 43, 41, 28, 53, 42, 58, 33, 61, 57, /* CX */ - 34, 44, 54, 35, 37, 56, 30,SYM, 21, 59, 40, 76, 32, 39, 38, 46, /* DX */ - 45, 36, 50, 55, 31, 19, 24, 43, 41, 28, 53, 42, 58, 33, 61, 57, /* EX */ - 34, 44, 54, 35, 37, 56, 30,SYM, 21, 59, 40, 77, 32, 39, 38, 78, /* FX */ + SYM,SYM,SYM,SYM,SYM,SYM, 58,SYM, 58,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ + SYM,SYM,SYM,SYM, 73, 74,SYM,SYM, 75,SYM,SYM,SYM, 50, 50, 76,SYM, /* BX */ + 43, 33, 45, 57, 31, 19, 24, 37, 44, 28, 53, 39, 54, 34, 56, 59, /* CX */ + 32, 49, 48, 35, 41, 46, 30,SYM, 21, 55, 40, 77, 36, 51, 38, 60, /* DX */ + 43, 33, 45, 57, 31, 19, 24, 37, 44, 28, 53, 39, 54, 34, 56, 59, /* EX */ + 32, 49, 48, 35, 41, 46, 30,SYM, 21, 55, 40, 78, 36, 51, 38, 79, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -109,18 +110,18 @@ static const unsigned char Iso_8859_1_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 5, 17, 23, 9, 0, 14, 11, 18, 6, 22, 10, 7, 12, 2, 8, /* 4X */ - 15, 29, 1, 4, 3, 16, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 5, 17, 23, 9, 0, 14, 11, 18, 6, 22, 10, 7, 12, 2, 8, /* 6X */ - 15, 29, 1, 4, 3, 16, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 5, 17, 23, 9, 0, 14, 10, 18, 6, 22, 11, 7, 12, 2, 8, /* 4X */ + 16, 29, 1, 4, 3, 15, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 5, 17, 23, 9, 0, 14, 10, 18, 6, 22, 11, 7, 12, 2, 8, /* 6X */ + 16, 29, 1, 4, 3, 15, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 79,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 45, 36, 50, 55, 31, 19, 24, 43, 41, 28, 53, 42, 58, 33, 61, 57, /* CX */ - 34, 44, 54, 35, 37, 56, 30,SYM, 21, 59, 40, 80, 32, 39, 38, 46, /* DX */ - 45, 36, 50, 55, 31, 19, 24, 43, 41, 28, 53, 42, 58, 33, 61, 57, /* EX */ - 34, 44, 54, 35, 37, 56, 30,SYM, 21, 59, 40, 81, 32, 39, 38, 82, /* FX */ + SYM,SYM,SYM,SYM,SYM, 80,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 43, 33, 45, 57, 31, 19, 24, 37, 44, 28, 53, 39, 54, 34, 56, 59, /* CX */ + 32, 49, 48, 35, 41, 46, 30,SYM, 21, 55, 40, 81, 36, 51, 38, 60, /* DX */ + 43, 33, 45, 57, 31, 19, 24, 37, 44, 28, 53, 39, 54, 34, 56, 59, /* EX */ + 32, 49, 48, 35, 41, 46, 30,SYM, 21, 55, 40, 82, 36, 51, 38, 83, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -130,155 +131,75 @@ static const unsigned char Windows_1252_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 5, 17, 23, 9, 0, 14, 11, 18, 6, 22, 10, 7, 12, 2, 8, /* 4X */ - 15, 29, 1, 4, 3, 16, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 5, 17, 23, 9, 0, 14, 11, 18, 6, 22, 10, 7, 12, 2, 8, /* 6X */ - 15, 29, 1, 4, 3, 16, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM,ILL,SYM, 83,SYM,SYM,SYM,SYM,SYM,SYM, 47,SYM, 51,ILL, 49,ILL, /* 8X */ - ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 47,SYM, 51,ILL, 49, 84, /* 9X */ + SYM, 5, 17, 23, 9, 0, 14, 10, 18, 6, 22, 11, 7, 12, 2, 8, /* 4X */ + 16, 29, 1, 4, 3, 15, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 5, 17, 23, 9, 0, 14, 10, 18, 6, 22, 11, 7, 12, 2, 8, /* 6X */ + 16, 29, 1, 4, 3, 15, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM,ILL,SYM, 84,SYM,SYM,SYM,SYM,SYM,SYM, 58,SYM, 50,ILL, 85,ILL, /* 8X */ + ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 58,SYM, 50,ILL, 86, 87, /* 9X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 85,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 45, 36, 50, 55, 31, 19, 24, 43, 41, 28, 53, 42, 58, 33, 61, 57, /* CX */ - 34, 44, 54, 35, 37, 56, 30,SYM, 21, 59, 40, 86, 32, 39, 38, 46, /* DX */ - 45, 36, 50, 55, 31, 19, 24, 43, 41, 28, 53, 42, 58, 33, 61, 57, /* EX */ - 34, 44, 54, 35, 37, 56, 30,SYM, 21, 59, 40, 87, 32, 39, 38, 88, /* FX */ + SYM,SYM,SYM,SYM,SYM, 88,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 43, 33, 45, 57, 31, 19, 24, 37, 44, 28, 53, 39, 54, 34, 56, 59, /* CX */ + 32, 49, 48, 35, 41, 46, 30,SYM, 21, 55, 40, 89, 36, 51, 38, 60, /* DX */ + 43, 33, 45, 57, 31, 19, 24, 37, 44, 28, 53, 39, 54, 34, 56, 59, /* EX */ + 32, 49, 48, 35, 41, 46, 30,SYM, 21, 55, 40, 90, 36, 51, 38, 91, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ +static const int Unicode_Char_size = 60; +static const unsigned int Unicode_CharOrder[] = +{ + 65, 5, 66, 17, 67, 23, 68, 9, 69, 0, 70, 14, 71, 10, 72, 18, + 73, 6, 74, 22, 75, 11, 76, 7, 77, 12, 78, 2, 79, 8, 80, 16, + 81, 29, 82, 1, 83, 4, 84, 3, 85, 15, 86, 13, 87, 25, 88, 27, + 89, 20, 90, 26, 97, 5, 98, 17, 99, 23, 100, 9, 101, 0,102, 14, + 103, 10, 104, 18, 105, 6, 106, 22, 107, 11, 108, 7, 109, 12,110, 2, + 111, 8, 112, 16, 113, 29, 114, 1, 115, 4, 116, 3, 117, 15,118, 13, + 119, 25, 120, 27, 121, 20, 122, 26, 197, 19, 198, 24, 201, 28,216, 21, + 229, 19, 230, 24, 233, 28, 248, 21, +}; + /* Model Table: - * Total sequences: 991 - * First 512 sequences: 0.9975864274305254 - * Next 512 sequences (512-1024): 0.002413572569474574 - * Rest: 3.5128150388530344e-17 + * Total considered sequences: 967 / 900 + * - Positive sequences: first 442 (0.9950425176429516) + * - Probable sequences: next 157 (599-442) (0.0039580060347621515) + * - Neutral sequences: last 301 (0.0009994763222862524) + * - Negative sequences: -67 (off-ratio) * Negative sequences: TODO */ static const PRUint8 NorwegianLangModel[] = { - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,0,2,0, - 0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2, - 2,2,2,2,2,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,2,2, - 2,2,2,0,0,2,0,0,2,2,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,3,2,0, - 2,2,2,0,2,0,0,0,2,0,0,2,0,0,2,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,0,2,2,2, - 2,2,0,0,0,2,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,0,3,3,3,0,2,0, - 0,0,2,2,0,0,0,0,0,2,0,2,0,2,0,2,2,0,2,0,0,0,0,0,0,0,2,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,2,2,3,2,2,2,0, - 0,0,0,2,2,0,0,0,0,0,2,2,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,2,2,2,2, - 2,2,2,0,2,2,0,2,0,0,2,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,0,2,3,0,3,2,3,0,2,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,0,2,0,2, - 0,2,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,0,0,0,0,2, - 0,0,0,0,2,2,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,3,2,2,2,0,0,2,2,2, - 2,2,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,0,2,0,2,2,2, - 2,2,0,0,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,3,2,0,0,2,0,0, - 2,0,2,0,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,2,2,3,2,2,3,0,3,2,2,3,3,3,3,3,3,0,0,0,2,0,2, - 0,2,0,0,2,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,2,0,0,2,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,0,2,0,0, - 2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,2,3,3,0,2,2,2,3,2,2,3,2,2,2,0, - 0,0,0,2,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,2,3,3,3,3,3,2,2,2,2,0,2,2,3,3,2,3,3,3,3,2,3,2,2,0,2,0,2, - 2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,2,2,2,3,3,2,2,3,2,2,3,3,3,3,2,3,2,2,0,2,0,2, - 2,2,2,0,2,2,0,0,0,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,2,2,3,2,3,3,3,2,3,3,3,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,2,0,2,3,2,2,2,2,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,0,2,3,2,3,3,3,3,3,3,3,0,3,2,0,3,2,2,2,0,2,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,2,2,2,2,3,3,2,3,2,2,2,2,0,2,0,3,0,0,2,2,3,2,0,3,0,0,0,0,0,2, - 2,2,0,0,2,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,2,3,3,3,3,3,3,2,3,0,3,2,0,2,3,2,3,0,3,0,0,3,2,0,2,0,2,2,0, - 0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0, - 0,3,3,3,3,0,2,2,0,2,2,2,2,2,2,2,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,2,2,2,3,3,3,2,3,2,2,0,2,0,2,2,2,2,3,0,2,2,2,2,0,2,0,0,0,0,0, - 2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,2,2,2,2,3,3,2,3,2,2,2,2,0,2,0,2,2,2,0,3,0,0,2,0,2,2,0,0,0,0, - 0,2,0,0,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,0,2,2,2,3,3,2,2,0,0,0,0,2,2,2,2,2,2,0,2,0,0,0,0,0,0,2,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,2,3,2,2,2,0,2,2,2,0,2,2,2,2,0,0,2,2,0,0,0,0,2,0,0,2,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,2,0,0,2,2,0,0,0,0,0,0,2,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,2,2,2,2,0,0,2,0,2,2,2,2,2,2,2,0,0,2,0,2,0,0,2,0,0,0,0,0,0,2, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,2,2,2,2,0,0,2,0,2,0,2,2,0,2,0,2,2,2,0,0,0,0,2,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,2,2,0,2,0,0,2,0,2,0,2,0,0,2,0,0,2,2,0,0,0,0,2,0,2,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,2,2,0,2,2,0,2,0,0,2,2,2,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,2,2,0,0,2,2,2,0,0,0,0,2,0,0,0,2,2,0,0,0,0,0,0,0,2,0,0,0,0,0, - 0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,2,2,2,2,0,0,2,0,0,2,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,2,2,2,2,0,0,2,0,0,2,2,2,2,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0, - 0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,2,2,2,2,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,2,2,0,2,0,0,0,0,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,3,2,3,2,3,0,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,2,1,2,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,1,2,0,2,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,0,2,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,0,1,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,1,2,3,0,2,2,2,0,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,3,1,2,2,2,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,1,0,1,1,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,1,2,3,0,2,2,2,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,2,2,1,1,0,2,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,3,0,1,1,0,1,0,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,3,3,3,2,3,1,1,1,0,0,1,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,2,0,0,0,0,1, + 3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,3,3,3,1,2,2,1,3,0,0,0,0,0, + 3,3,2,3,2,3,3,3,3,1,2,1,1,0,3,3,0,1,2,3,3,3,3,2,2,0,1,1,2,1, + 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,3,3,2,1,1,2,1,2,0,1,2,2,1,1, + 3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,3,3,2,3,3,2,3,1,1,1,0,1,0,1,0, + 3,3,3,1,3,3,3,3,3,2,0,1,1,0,2,3,2,3,1,3,3,3,3,1,3,1,0,0,1,0, + 3,3,3,3,2,3,3,2,3,1,1,2,2,3,1,3,1,2,2,3,3,3,3,1,2,2,0,1,0,1, + 3,3,3,3,3,1,2,3,1,3,3,3,2,3,2,0,3,2,2,0,0,1,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,2,3,3,3,3,3,3,3,2,2,3,3,2,0,1,1,1,2,0,1,1,1,0,0, + 3,3,3,3,3,1,2,3,1,3,3,3,3,3,2,0,3,2,1,0,3,1,1,0,0,0,0,0,0,0, + 3,1,1,1,1,3,3,1,3,1,0,1,2,0,2,3,1,0,1,2,1,3,1,0,3,0,0,0,0,0, + 3,2,2,3,2,3,3,3,3,2,1,3,2,0,0,3,0,2,3,0,3,0,0,2,1,1,1,0,0,1, + 0,3,2,2,2,0,1,2,0,1,1,1,1,1,2,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0, + 3,1,2,2,2,3,3,1,3,1,0,1,1,0,1,1,0,1,2,0,1,0,0,0,0,2,1,0,0,0, + 3,0,2,1,1,2,2,1,3,0,1,0,1,0,1,2,0,1,1,0,1,0,1,0,0,1,2,0,0,0, + 2,0,1,1,1,2,2,1,2,0,0,1,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0,1,0,0, + 2,2,3,2,2,1,1,1,0,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0, + 0,1,0,0,0,2,1,0,1,0,0,0,0,1,0,3,0,0,0,0,0,0,0,1,0,0,0,0,0,0, }; @@ -286,38 +207,52 @@ const SequenceModel Ibm865NorwegianModel = { Ibm865_CharToOrderMap, NorwegianLangModel, - 62, - (float)0.9975864274305254, + 30, + (float)0.9990005236777137, PR_TRUE, - "IBM865" + "IBM865", + "no" }; const SequenceModel Iso_8859_15NorwegianModel = { Iso_8859_15_CharToOrderMap, NorwegianLangModel, - 62, - (float)0.9975864274305254, + 30, + (float)0.9990005236777137, PR_TRUE, - "ISO-8859-15" + "ISO-8859-15", + "no" }; const SequenceModel Iso_8859_1NorwegianModel = { Iso_8859_1_CharToOrderMap, NorwegianLangModel, - 62, - (float)0.9975864274305254, + 30, + (float)0.9990005236777137, PR_TRUE, - "ISO-8859-1" + "ISO-8859-1", + "no" }; const SequenceModel Windows_1252NorwegianModel = { Windows_1252_CharToOrderMap, NorwegianLangModel, - 62, - (float)0.9975864274305254, + 30, + (float)0.9990005236777137, PR_TRUE, - "WINDOWS-1252" + "WINDOWS-1252", + "no" +}; + +const LanguageModel NorwegianModel = +{ + "no", + Unicode_CharOrder, + 60, + NorwegianLangModel, + 30, + (float)0.9995751776807141, }; diff --git a/src/nsLanguageDetector.h b/src/nsLanguageDetector.h index 30b935a..339d4e2 100644 --- a/src/nsLanguageDetector.h +++ b/src/nsLanguageDetector.h @@ -131,6 +131,7 @@ extern const LanguageModel ItalianModel; extern const LanguageModel LatvianModel; extern const LanguageModel LithuanianModel; extern const LanguageModel MalteseModel; +extern const LanguageModel NorwegianModel; extern const LanguageModel PolishModel; extern const LanguageModel PortugueseModel; extern const LanguageModel RomanianModel; diff --git a/src/nsMBCSGroupProber.cpp b/src/nsMBCSGroupProber.cpp index 51c268f..8388832 100644 --- a/src/nsMBCSGroupProber.cpp +++ b/src/nsMBCSGroupProber.cpp @@ -111,6 +111,7 @@ nsMBCSGroupProber::nsMBCSGroupProber(PRUint32 aLanguageFilter) langDetectors[i][j++] = new nsLanguageDetector(&LatvianModel); langDetectors[i][j++] = new nsLanguageDetector(&LithuanianModel); langDetectors[i][j++] = new nsLanguageDetector(&MalteseModel); + langDetectors[i][j++] = new nsLanguageDetector(&NorwegianModel); langDetectors[i][j++] = new nsLanguageDetector(&PolishModel); langDetectors[i][j++] = new nsLanguageDetector(&PortugueseModel); langDetectors[i][j++] = new nsLanguageDetector(&RomanianModel); diff --git a/src/nsMBCSGroupProber.h b/src/nsMBCSGroupProber.h index 9596ac0..1dea490 100644 --- a/src/nsMBCSGroupProber.h +++ b/src/nsMBCSGroupProber.h @@ -49,7 +49,7 @@ #include "nsEUCTWProber.h" #define NUM_OF_PROBERS 8 -#define NUM_OF_LANGUAGES 30 +#define NUM_OF_LANGUAGES 31 class nsMBCSGroupProber: public nsCharSetProber { public: |