summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJehan <jehan@girinstud.io>2022-11-30 20:33:11 +0100
committerJehan <jehan@girinstud.io>2022-12-14 00:24:53 +0100
commit0be80a21db41321da0a33ffc6b5d272a712cbf6c (patch)
treeb758cf3ca3856f8b064d8d11a478a14113683106
parent784f614c849b3482d7fbbac9d6ddb3f8e1fffd82 (diff)
script, src: update Norwegian model with the new language features.
As I just rebased my branch about new language detection API, I needed to re-generate Norwegian language models. Unfortunately it doesn't detect UTF-8 Norwegian text, though not far off (it detects it as second candidate with high 91% confidence; beaten by Danish UTF-8 with 94% confidence unfortunately!). Note that I also update the alphabet list for Norwegian as there were too many letters in there (according to Wikipedia at least), so even when training a model, we had some missing characters in the training set.
-rw-r--r--script/BuildLangModelLogs/LangNorwegianModel.log234
-rw-r--r--script/langs/no.py2
-rw-r--r--src/LangModels/LangNorwegianModel.cpp293
-rw-r--r--src/nsLanguageDetector.h1
-rw-r--r--src/nsMBCSGroupProber.cpp1
-rw-r--r--src/nsMBCSGroupProber.h2
6 files changed, 352 insertions, 181 deletions
diff --git a/script/BuildLangModelLogs/LangNorwegianModel.log b/script/BuildLangModelLogs/LangNorwegianModel.log
new file mode 100644
index 0000000..9c066bf
--- /dev/null
+++ b/script/BuildLangModelLogs/LangNorwegianModel.log
@@ -0,0 +1,234 @@
+= Logs of language model for Norwegian (no) =
+
+- Generated by BuildLangModel.py
+- Started: 2022-11-30 20:26:27.916571
+- Maximum depth: 2
+- Max number of pages: 200
+
+== Parsed pages ==
+
+Norsk (revision 22974717)
+Saft (revision 22967608)
+Hund (revision 23005187)
+Valg i Norge (revision 22782362)
+Asia (revision 23117912)
+Saarloos wolfhond (revision 22789727)
+Østfold (revision 23055508)
+Fårehunder (revision 22264555)
+Stripesjakal (revision 18745363)
+12. mai (revision 23118103)
+Gullsjakal (revision 23104601)
+Urhund (revision 23050226)
+E (revision 22904440)
+Luxembourgsk (revision 22813155)
+Obstruent (revision 15267134)
+Gudbrandsdalen (revision 23014277)
+Norges berggrunn (revision 21768509)
+Riksforsamlingen (revision 22999081)
+Sosiolekt (revision 21458982)
+Habitat (revision 23123646)
+Norsk språkhistorie (20. århundre) (revision 22891154)
+Søsterart (revision 20748512)
+Halvdan Koht (revision 22303367)
+Plosiver (revision 21816753)
+Svorsk (revision 20789512)
+Skandinavia (revision 22814296)
+Partisipp (revision 22785842)
+H (revision 23086416)
+Kreft (revision 23050449)
+Kreft hos hunder (revision 21811805)
+Q (revision 23024714)
+Fédération Cynologique Internationale (revision 22172054)
+Rosin (revision 22818749)
+Tribus (biologi) (revision 21339936)
+Siste istids maksimum (revision 23141296)
+Laurents Hallager (revision 22655416)
+Canider (revision 22229857)
+Individ (revision 20992252)
+Stortingsvalg 1945– (revision 22861299)
+Svalbards geologi (revision 22935346)
+Riksmålsvernet (revision 22966421)
+Magedreining (hund) (revision 21661370)
+Stortinget (revision 23071662)
+Bokmål (revision 22928969)
+Recessiv (revision 21780786)
+Synkopetida (revision 22906353)
+Artskompleks (revision 20848344)
+Homogenitet (revision 22857280)
+Pyometra (hund) (revision 22374115)
+Den norske språkstriden (revision 22428585)
+Gruppe (biologi) (revision 21969525)
+Stående fuglehunder (revision 22264516)
+Samnorsk (revision 22785915)
+Fastlands-Norge (revision 23141642)
+Drivende hunder (revision 22264618)
+Sibir (revision 22369404)
+Norges demografi (revision 23034159)
+FCI (revision 22172054)
+Vannhunder (revision 22264145)
+Prednisolon (revision 21804718)
+Midtvesten (revision 22423559)
+Buskerud (revision 22915767)
+Sogn og Fjordane (revision 22811825)
+Transport i Norge (revision 23131810)
+Ustemt palatal frikativ (revision 19011330)
+Anatolsk gjeterhund (revision 22303224)
+Norges fylker (revision 23129287)
+Tonelag (revision 22751959)
+Statsforvalter (revision 23133685)
+Sjokolade (revision 22988920)
+Nasaler (revision 16002502)
+Hundens pels (revision 22900550)
+Approksimanter (revision 16000119)
+Tapper (revision 18322970)
+Vakt- og vokterhunder (revision 23091054)
+Saluki (revision 22267261)
+Canis (revision 23079627)
+Island (revision 23097723)
+Flyball (revision 20457011)
+Staffordshire bull terrier (revision 23135078)
+Stockholm (revision 22770528)
+Sahel (revision 19821400)
+ISO 639-3 (revision 18859824)
+Ny-guinea villhund (revision 22567866)
+Rabies (revision 19440055)
+Ordbog over det norske Folkesprog (revision 23096800)
+Norge (revision 23141642)
+Flåttbårne sykdommer (hund) (revision 21355504)
+Bombehund (revision 22942055)
+Læreboknormalen av 1959 (revision 18841941)
+Tromøy (revision 22053767)
+Vorstehhund korthåret (revision 22264532)
+Tåkeskog (revision 20461967)
+Vest-Telemark (revision 22923647)
+Oslo (revision 23118371)
+Tyrkia (revision 23034073)
+Liste over Norges største tettsteder (revision 23138252)
+Energi (revision 22979461)
+Jakt med hund (revision 22890790)
+Sogn fogderi (revision 22425444)
+Integrated Taxonomic Information System (revision 20457376)
+Tadsjikistan (revision 22864814)
+Befolkningstetthet (revision 22253839)
+Tøddel (revision 21641445)
+Den lille istid (revision 22782643)
+Norsk språkhistorie (1400–1800) (revision 21342667)
+Unionen mellom Sverige og Norge (revision 22922743)
+Fylkeskommune (revision 22011606)
+ĸ (revision 17096887)
+Degas (revision 22751270)
+Gløgg (revision 22902469)
+Antistoff (revision 20746889)
+Norges statsminister (revision 22948566)
+Lørdag (revision 23031303)
+Ş (revision 12094187)
+Hallingdal (revision 22811584)
+1969 (revision 22958238)
+Juli (revision 22359558)
+Shar pei (revision 22891357)
+Dyr (revision 23101991)
+Ƙ (revision 15223100)
+PhyloCode (revision 22857413)
+Y-kromosom (revision 22783781)
+Høst (revision 23087627)
+Geit (revision 21989005)
+Guatemala (revision 22780680)
+USA (revision 22781448)
+Tamhund (revision 23005187)
+Populasjonsdynamikk (revision 20640003)
+Christoffer Oftedahl (revision 19783269)
+Mellomnorsk (revision 22546096)
+1000 (revision 20456192)
+Servicehund (revision 22337757)
+Himalayaulv (revision 21791662)
+Ø (bokstav) (revision 22617366)
+Ǩ (revision 15223173)
+Bordeaux dogge (revision 22266230)
+Frøplanter (revision 21763501)
+Ustemt bilabial plosiv (revision 22354758)
+Digraf (revision 19954081)
+12. århundre (revision 23123540)
+Sametingsvalget 1993 (revision 21890290)
+Førerhund (revision 20465384)
+Grenada (revision 22948831)
+Aserbajdsjans administrative inndeling (revision 22782483)
+Verneområder i Norge (revision 22076171)
+Pelsdyroppdrett (revision 22827568)
+Kretahund (revision 22201230)
+Etne (revision 22659600)
+Koreansk chejudo (revision 22199018)
+Riesenschnauzer (revision 23103775)
+Italias regioner (revision 22182270)
+Dingo (revision 23050226)
+Firfisle (revision 21650282)
+Dominans (revision 21160764)
+CITES (revision 22637082)
+Helligdager i Norge (revision 22095322)
+Bunad (revision 23086915)
+Barnekreftforeningen (revision 19888945)
+Guttorm Hansen (revision 22098933)
+Albania (revision 22939774)
+Medier i Norge (revision 21776331)
+Finsk (revision 22908244)
+Anders Lysgaard (revision 22858529)
+Bakverk (revision 15226081)
+Ć (revision 15785421)
+Vatikanstaten (revision 22782366)
+Steinalderen i Norge (revision 23106147)
+Johnny Depp (revision 22764203)
+Sverre Steen (revision 22112509)
+Fjellrev (revision 22812483)
+Bayersk viltsporhund (revision 22805751)
+Ń (revision 15222385)
+Utdannelse i Norge (revision 22814897)
+Espen Berntsen (revision 21025561)
+Nederland (revision 23024484)
+Liste over hundegrupper (revision 18570830)
+
+== End of Parsed pages ==
+
+- Wikipedia parsing ended at: 2022-11-30 20:29:27.551046
+
+62 characters appeared 1228749 times.
+
+Most Frequent characters:
+[ 0] Char e: 15.049208585317261 %
+[ 1] Char r: 8.84924423132796 %
+[ 2] Char n: 8.422550089562636 %
+[ 3] Char t: 7.726394894319344 %
+[ 4] Char s: 6.64798099530498 %
+[ 5] Char a: 6.28020856985438 %
+[ 6] Char i: 5.99455218274847 %
+[ 7] Char l: 5.422262805503809 %
+[ 8] Char o: 5.386942329149403 %
+[ 9] Char d: 4.534774799409806 %
+[10] Char g: 3.86091870674971 %
+[11] Char k: 3.6487516978650643 %
+[12] Char m: 3.216197937902696 %
+[13] Char v: 2.4669806445417253 %
+[14] Char f: 2.0122091655822305 %
+[15] Char u: 1.8136332155712844 %
+[16] Char p: 1.6869189720602011 %
+[17] Char b: 1.4243755233981878 %
+[18] Char h: 1.3665117937023752 %
+[19] Char å: 1.1134902246105591 %
+[20] Char y: 0.8473658981614633 %
+[21] Char ø: 0.792431977564173 %
+[22] Char j: 0.7630525029928814 %
+[23] Char c: 0.2926553755079353 %
+[24] Char æ: 0.20012223814627725 %
+[25] Char w: 0.05932863424507365 %
+[26] Char z: 0.028565638710591017 %
+[27] Char x: 0.023194322029967063 %
+[28] Char é: 0.017171936660782636 %
+[29] Char q: 0.009521879570197005 %
+
+The first 30 characters have an accumulated ratio of 0.9995751776807141.
+
+967 sequences found.
+
+First 442 (typical positive ratio): 0.9950425176429516
+Next 157 (599-442): 0.0039580060347621515
+Rest: 0.0009994763222862524
+
+- Processing end: 2022-11-30 20:29:27.623923
diff --git a/script/langs/no.py b/script/langs/no.py
index 93cf23f..b8d777f 100644
--- a/script/langs/no.py
+++ b/script/langs/no.py
@@ -48,7 +48,7 @@ charsets = ['IBM865', 'ISO-8859-15', 'ISO-8859-1', 'WINDOWS-1252']
## Optional Properties ##
# Alphabet characters.
-alphabet = 'æøåéìîàêÆØÅ'
+alphabet = 'æøå'
# Some pages that should contain most norwegian-norwegian norwegian
start_pages = ['Norsk', 'Saft', 'Hund']
wikipedia_code = code
diff --git a/src/LangModels/LangNorwegianModel.cpp b/src/LangModels/LangNorwegianModel.cpp
index f3a876d..2bc2281 100644
--- a/src/LangModels/LangNorwegianModel.cpp
+++ b/src/LangModels/LangNorwegianModel.cpp
@@ -36,12 +36,13 @@
* ***** END LICENSE BLOCK ***** */
#include "../nsSBCharSetProber.h"
+#include "../nsLanguageDetector.h"
/********* Language model for: Norwegian *********/
/**
* Generated by BuildLangModel.py
- * On: 2022-01-28 21:58:11.143599
+ * On: 2022-11-30 20:29:27.551827
**/
/* Character Mapping Table:
@@ -67,17 +68,17 @@ static const unsigned char Ibm865_CharToOrderMap[] =
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 5, 17, 23, 9, 0, 14, 11, 18, 6, 22, 10, 7, 12, 2, 8, /* 4X */
- 15, 29, 1, 4, 3, 16, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 5, 17, 23, 9, 0, 14, 11, 18, 6, 22, 10, 7, 12, 2, 8, /* 6X */
- 15, 29, 1, 4, 3, 16, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */
- 43, 32, 28, 50, 31, 45, 19, 43, 53, 42, 41, 57, 61, 58, 31, 19, /* 8X */
- 28, 24, 24, 37, 30, 54, 63, 59, 64, 30, 32, 21,SYM, 21,SYM,SYM, /* 9X */
- 36, 33, 35, 40, 44, 44,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
+ SYM, 5, 17, 23, 9, 0, 14, 10, 18, 6, 22, 11, 7, 12, 2, 8, /* 4X */
+ 16, 29, 1, 4, 3, 15, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 5, 17, 23, 9, 0, 14, 10, 18, 6, 22, 11, 7, 12, 2, 8, /* 6X */
+ 16, 29, 1, 4, 3, 15, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ 37, 36, 28, 45, 31, 43, 19, 37, 53, 39, 44, 59, 56, 54, 31, 19, /* 8X */
+ 28, 24, 24, 41, 30, 48, 62, 55, 63, 30, 36, 21,SYM, 21,SYM,SYM, /* 9X */
+ 33, 34, 35, 40, 49, 49,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* CX */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* DX */
- 48, 46, 65, 66, 60, 60, 67, 62, 68, 69, 70, 71, 72, 73, 52,SYM, /* EX */
+ 52, 60, 64, 65, 61, 61, 66, 47, 67, 68, 69, 70, 71, 72, 42,SYM, /* EX */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
@@ -88,18 +89,18 @@ static const unsigned char Iso_8859_15_CharToOrderMap[] =
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 5, 17, 23, 9, 0, 14, 11, 18, 6, 22, 10, 7, 12, 2, 8, /* 4X */
- 15, 29, 1, 4, 3, 16, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 5, 17, 23, 9, 0, 14, 11, 18, 6, 22, 10, 7, 12, 2, 8, /* 6X */
- 15, 29, 1, 4, 3, 16, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ SYM, 5, 17, 23, 9, 0, 14, 10, 18, 6, 22, 11, 7, 12, 2, 8, /* 4X */
+ 16, 29, 1, 4, 3, 15, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 5, 17, 23, 9, 0, 14, 10, 18, 6, 22, 11, 7, 12, 2, 8, /* 6X */
+ 16, 29, 1, 4, 3, 15, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
- SYM,SYM,SYM,SYM,SYM,SYM, 47,SYM, 47,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
- SYM,SYM,SYM,SYM, 49, 74,SYM,SYM, 49,SYM,SYM,SYM, 51, 51, 75,SYM, /* BX */
- 45, 36, 50, 55, 31, 19, 24, 43, 41, 28, 53, 42, 58, 33, 61, 57, /* CX */
- 34, 44, 54, 35, 37, 56, 30,SYM, 21, 59, 40, 76, 32, 39, 38, 46, /* DX */
- 45, 36, 50, 55, 31, 19, 24, 43, 41, 28, 53, 42, 58, 33, 61, 57, /* EX */
- 34, 44, 54, 35, 37, 56, 30,SYM, 21, 59, 40, 77, 32, 39, 38, 78, /* FX */
+ SYM,SYM,SYM,SYM,SYM,SYM, 58,SYM, 58,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
+ SYM,SYM,SYM,SYM, 73, 74,SYM,SYM, 75,SYM,SYM,SYM, 50, 50, 76,SYM, /* BX */
+ 43, 33, 45, 57, 31, 19, 24, 37, 44, 28, 53, 39, 54, 34, 56, 59, /* CX */
+ 32, 49, 48, 35, 41, 46, 30,SYM, 21, 55, 40, 77, 36, 51, 38, 60, /* DX */
+ 43, 33, 45, 57, 31, 19, 24, 37, 44, 28, 53, 39, 54, 34, 56, 59, /* EX */
+ 32, 49, 48, 35, 41, 46, 30,SYM, 21, 55, 40, 78, 36, 51, 38, 79, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
@@ -109,18 +110,18 @@ static const unsigned char Iso_8859_1_CharToOrderMap[] =
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 5, 17, 23, 9, 0, 14, 11, 18, 6, 22, 10, 7, 12, 2, 8, /* 4X */
- 15, 29, 1, 4, 3, 16, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 5, 17, 23, 9, 0, 14, 11, 18, 6, 22, 10, 7, 12, 2, 8, /* 6X */
- 15, 29, 1, 4, 3, 16, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ SYM, 5, 17, 23, 9, 0, 14, 10, 18, 6, 22, 11, 7, 12, 2, 8, /* 4X */
+ 16, 29, 1, 4, 3, 15, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 5, 17, 23, 9, 0, 14, 10, 18, 6, 22, 11, 7, 12, 2, 8, /* 6X */
+ 16, 29, 1, 4, 3, 15, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
- SYM,SYM,SYM,SYM,SYM, 79,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
- 45, 36, 50, 55, 31, 19, 24, 43, 41, 28, 53, 42, 58, 33, 61, 57, /* CX */
- 34, 44, 54, 35, 37, 56, 30,SYM, 21, 59, 40, 80, 32, 39, 38, 46, /* DX */
- 45, 36, 50, 55, 31, 19, 24, 43, 41, 28, 53, 42, 58, 33, 61, 57, /* EX */
- 34, 44, 54, 35, 37, 56, 30,SYM, 21, 59, 40, 81, 32, 39, 38, 82, /* FX */
+ SYM,SYM,SYM,SYM,SYM, 80,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
+ 43, 33, 45, 57, 31, 19, 24, 37, 44, 28, 53, 39, 54, 34, 56, 59, /* CX */
+ 32, 49, 48, 35, 41, 46, 30,SYM, 21, 55, 40, 81, 36, 51, 38, 60, /* DX */
+ 43, 33, 45, 57, 31, 19, 24, 37, 44, 28, 53, 39, 54, 34, 56, 59, /* EX */
+ 32, 49, 48, 35, 41, 46, 30,SYM, 21, 55, 40, 82, 36, 51, 38, 83, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
@@ -130,155 +131,75 @@ static const unsigned char Windows_1252_CharToOrderMap[] =
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 5, 17, 23, 9, 0, 14, 11, 18, 6, 22, 10, 7, 12, 2, 8, /* 4X */
- 15, 29, 1, 4, 3, 16, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 5, 17, 23, 9, 0, 14, 11, 18, 6, 22, 10, 7, 12, 2, 8, /* 6X */
- 15, 29, 1, 4, 3, 16, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */
- SYM,ILL,SYM, 83,SYM,SYM,SYM,SYM,SYM,SYM, 47,SYM, 51,ILL, 49,ILL, /* 8X */
- ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 47,SYM, 51,ILL, 49, 84, /* 9X */
+ SYM, 5, 17, 23, 9, 0, 14, 10, 18, 6, 22, 11, 7, 12, 2, 8, /* 4X */
+ 16, 29, 1, 4, 3, 15, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 5, 17, 23, 9, 0, 14, 10, 18, 6, 22, 11, 7, 12, 2, 8, /* 6X */
+ 16, 29, 1, 4, 3, 15, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ SYM,ILL,SYM, 84,SYM,SYM,SYM,SYM,SYM,SYM, 58,SYM, 50,ILL, 85,ILL, /* 8X */
+ ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 58,SYM, 50,ILL, 86, 87, /* 9X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
- SYM,SYM,SYM,SYM,SYM, 85,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
- 45, 36, 50, 55, 31, 19, 24, 43, 41, 28, 53, 42, 58, 33, 61, 57, /* CX */
- 34, 44, 54, 35, 37, 56, 30,SYM, 21, 59, 40, 86, 32, 39, 38, 46, /* DX */
- 45, 36, 50, 55, 31, 19, 24, 43, 41, 28, 53, 42, 58, 33, 61, 57, /* EX */
- 34, 44, 54, 35, 37, 56, 30,SYM, 21, 59, 40, 87, 32, 39, 38, 88, /* FX */
+ SYM,SYM,SYM,SYM,SYM, 88,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
+ 43, 33, 45, 57, 31, 19, 24, 37, 44, 28, 53, 39, 54, 34, 56, 59, /* CX */
+ 32, 49, 48, 35, 41, 46, 30,SYM, 21, 55, 40, 89, 36, 51, 38, 60, /* DX */
+ 43, 33, 45, 57, 31, 19, 24, 37, 44, 28, 53, 39, 54, 34, 56, 59, /* EX */
+ 32, 49, 48, 35, 41, 46, 30,SYM, 21, 55, 40, 90, 36, 51, 38, 91, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
+static const int Unicode_Char_size = 60;
+static const unsigned int Unicode_CharOrder[] =
+{
+ 65, 5, 66, 17, 67, 23, 68, 9, 69, 0, 70, 14, 71, 10, 72, 18,
+ 73, 6, 74, 22, 75, 11, 76, 7, 77, 12, 78, 2, 79, 8, 80, 16,
+ 81, 29, 82, 1, 83, 4, 84, 3, 85, 15, 86, 13, 87, 25, 88, 27,
+ 89, 20, 90, 26, 97, 5, 98, 17, 99, 23, 100, 9, 101, 0,102, 14,
+ 103, 10, 104, 18, 105, 6, 106, 22, 107, 11, 108, 7, 109, 12,110, 2,
+ 111, 8, 112, 16, 113, 29, 114, 1, 115, 4, 116, 3, 117, 15,118, 13,
+ 119, 25, 120, 27, 121, 20, 122, 26, 197, 19, 198, 24, 201, 28,216, 21,
+ 229, 19, 230, 24, 233, 28, 248, 21,
+};
+
/* Model Table:
- * Total sequences: 991
- * First 512 sequences: 0.9975864274305254
- * Next 512 sequences (512-1024): 0.002413572569474574
- * Rest: 3.5128150388530344e-17
+ * Total considered sequences: 967 / 900
+ * - Positive sequences: first 442 (0.9950425176429516)
+ * - Probable sequences: next 157 (599-442) (0.0039580060347621515)
+ * - Neutral sequences: last 301 (0.0009994763222862524)
+ * - Negative sequences: -67 (off-ratio)
* Negative sequences: TODO
*/
static const PRUint8 NorwegianLangModel[] =
{
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,0,2,0,
- 0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,
- 2,2,2,2,2,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,2,2,
- 2,2,2,0,0,2,0,0,2,2,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,3,2,0,
- 2,2,2,0,2,0,0,0,2,0,0,2,0,0,2,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,0,2,2,2,
- 2,2,0,0,0,2,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,0,3,3,3,0,2,0,
- 0,0,2,2,0,0,0,0,0,2,0,2,0,2,0,2,2,0,2,0,0,0,0,0,0,0,2,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,2,2,3,2,2,2,0,
- 0,0,0,2,2,0,0,0,0,0,2,2,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,2,2,2,2,
- 2,2,2,0,2,2,0,2,0,0,2,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,0,2,3,0,3,2,3,0,2,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,0,2,0,2,
- 0,2,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,0,0,0,0,2,
- 0,0,0,0,2,2,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,3,2,2,2,0,0,2,2,2,
- 2,2,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,0,2,0,2,2,2,
- 2,2,0,0,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,3,2,0,0,2,0,0,
- 2,0,2,0,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,2,2,3,2,2,3,0,3,2,2,3,3,3,3,3,3,0,0,0,2,0,2,
- 0,2,0,0,2,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,2,0,0,2,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,0,2,0,0,
- 2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,2,3,3,0,2,2,2,3,2,2,3,2,2,2,0,
- 0,0,0,2,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,2,3,3,3,3,3,2,2,2,2,0,2,2,3,3,2,3,3,3,3,2,3,2,2,0,2,0,2,
- 2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,2,2,2,3,3,2,2,3,2,2,3,3,3,3,2,3,2,2,0,2,0,2,
- 2,2,2,0,2,2,0,0,0,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,2,2,3,2,3,3,3,2,3,3,3,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,2,0,2,3,2,2,2,2,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,0,2,3,2,3,3,3,3,3,3,3,0,3,2,0,3,2,2,2,0,2,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,2,2,2,2,3,3,2,3,2,2,2,2,0,2,0,3,0,0,2,2,3,2,0,3,0,0,0,0,0,2,
- 2,2,0,0,2,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,2,3,3,3,3,3,3,2,3,0,3,2,0,2,3,2,3,0,3,0,0,3,2,0,2,0,2,2,0,
- 0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,
- 0,3,3,3,3,0,2,2,0,2,2,2,2,2,2,2,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,2,2,2,3,3,3,2,3,2,2,0,2,0,2,2,2,2,3,0,2,2,2,2,0,2,0,0,0,0,0,
- 2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,2,2,2,2,3,3,2,3,2,2,2,2,0,2,0,2,2,2,0,3,0,0,2,0,2,2,0,0,0,0,
- 0,2,0,0,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 2,0,2,2,2,3,3,2,2,0,0,0,0,2,2,2,2,2,2,0,2,0,0,0,0,0,0,2,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 2,2,3,2,2,2,0,2,2,2,0,2,2,2,2,0,0,2,2,0,0,0,0,2,0,0,2,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,2,0,0,2,2,0,0,0,0,0,0,2,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,2,2,2,2,0,0,2,0,2,2,2,2,2,2,2,0,0,2,0,2,0,0,2,0,0,0,0,0,0,2,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,2,2,2,2,0,0,2,0,2,0,2,2,0,2,0,2,2,2,0,0,0,0,2,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 2,2,2,0,2,0,0,2,0,2,0,2,0,0,2,0,0,2,2,0,0,0,0,2,0,2,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,2,2,0,2,2,0,2,0,0,2,2,2,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 2,2,2,0,0,2,2,2,0,0,0,0,2,0,0,0,2,2,0,0,0,0,0,0,0,2,0,0,0,0,0,
- 0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,2,2,2,2,0,0,2,0,0,2,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,2,2,2,2,0,0,2,0,0,2,2,2,2,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
- 0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,2,2,2,2,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,2,2,0,2,0,0,0,0,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,3,2,3,2,3,0,1,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,2,1,2,1,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,1,2,0,2,1,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,0,2,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,0,1,1,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,1,2,3,0,2,2,2,0,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,3,1,2,2,2,2,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,1,0,1,1,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,1,2,3,0,2,2,2,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,2,2,1,1,0,2,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,3,0,1,1,0,1,0,1,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,3,3,3,2,3,1,1,1,0,0,1,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,2,0,0,0,0,1,
+ 3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,3,3,3,1,2,2,1,3,0,0,0,0,0,
+ 3,3,2,3,2,3,3,3,3,1,2,1,1,0,3,3,0,1,2,3,3,3,3,2,2,0,1,1,2,1,
+ 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,3,3,2,1,1,2,1,2,0,1,2,2,1,1,
+ 3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,3,3,2,3,3,2,3,1,1,1,0,1,0,1,0,
+ 3,3,3,1,3,3,3,3,3,2,0,1,1,0,2,3,2,3,1,3,3,3,3,1,3,1,0,0,1,0,
+ 3,3,3,3,2,3,3,2,3,1,1,2,2,3,1,3,1,2,2,3,3,3,3,1,2,2,0,1,0,1,
+ 3,3,3,3,3,1,2,3,1,3,3,3,2,3,2,0,3,2,2,0,0,1,0,0,0,0,0,0,0,0,
+ 3,3,3,3,3,3,2,3,3,3,3,3,3,3,2,2,3,3,2,0,1,1,1,2,0,1,1,1,0,0,
+ 3,3,3,3,3,1,2,3,1,3,3,3,3,3,2,0,3,2,1,0,3,1,1,0,0,0,0,0,0,0,
+ 3,1,1,1,1,3,3,1,3,1,0,1,2,0,2,3,1,0,1,2,1,3,1,0,3,0,0,0,0,0,
+ 3,2,2,3,2,3,3,3,3,2,1,3,2,0,0,3,0,2,3,0,3,0,0,2,1,1,1,0,0,1,
+ 0,3,2,2,2,0,1,2,0,1,1,1,1,1,2,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,
+ 3,1,2,2,2,3,3,1,3,1,0,1,1,0,1,1,0,1,2,0,1,0,0,0,0,2,1,0,0,0,
+ 3,0,2,1,1,2,2,1,3,0,1,0,1,0,1,2,0,1,1,0,1,0,1,0,0,1,2,0,0,0,
+ 2,0,1,1,1,2,2,1,2,0,0,1,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0,1,0,0,
+ 2,2,3,2,2,1,1,1,0,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
+ 0,1,0,0,0,2,1,0,1,0,0,0,0,1,0,3,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
};
@@ -286,38 +207,52 @@ const SequenceModel Ibm865NorwegianModel =
{
Ibm865_CharToOrderMap,
NorwegianLangModel,
- 62,
- (float)0.9975864274305254,
+ 30,
+ (float)0.9990005236777137,
PR_TRUE,
- "IBM865"
+ "IBM865",
+ "no"
};
const SequenceModel Iso_8859_15NorwegianModel =
{
Iso_8859_15_CharToOrderMap,
NorwegianLangModel,
- 62,
- (float)0.9975864274305254,
+ 30,
+ (float)0.9990005236777137,
PR_TRUE,
- "ISO-8859-15"
+ "ISO-8859-15",
+ "no"
};
const SequenceModel Iso_8859_1NorwegianModel =
{
Iso_8859_1_CharToOrderMap,
NorwegianLangModel,
- 62,
- (float)0.9975864274305254,
+ 30,
+ (float)0.9990005236777137,
PR_TRUE,
- "ISO-8859-1"
+ "ISO-8859-1",
+ "no"
};
const SequenceModel Windows_1252NorwegianModel =
{
Windows_1252_CharToOrderMap,
NorwegianLangModel,
- 62,
- (float)0.9975864274305254,
+ 30,
+ (float)0.9990005236777137,
PR_TRUE,
- "WINDOWS-1252"
+ "WINDOWS-1252",
+ "no"
+};
+
+const LanguageModel NorwegianModel =
+{
+ "no",
+ Unicode_CharOrder,
+ 60,
+ NorwegianLangModel,
+ 30,
+ (float)0.9995751776807141,
};
diff --git a/src/nsLanguageDetector.h b/src/nsLanguageDetector.h
index 30b935a..339d4e2 100644
--- a/src/nsLanguageDetector.h
+++ b/src/nsLanguageDetector.h
@@ -131,6 +131,7 @@ extern const LanguageModel ItalianModel;
extern const LanguageModel LatvianModel;
extern const LanguageModel LithuanianModel;
extern const LanguageModel MalteseModel;
+extern const LanguageModel NorwegianModel;
extern const LanguageModel PolishModel;
extern const LanguageModel PortugueseModel;
extern const LanguageModel RomanianModel;
diff --git a/src/nsMBCSGroupProber.cpp b/src/nsMBCSGroupProber.cpp
index 51c268f..8388832 100644
--- a/src/nsMBCSGroupProber.cpp
+++ b/src/nsMBCSGroupProber.cpp
@@ -111,6 +111,7 @@ nsMBCSGroupProber::nsMBCSGroupProber(PRUint32 aLanguageFilter)
langDetectors[i][j++] = new nsLanguageDetector(&LatvianModel);
langDetectors[i][j++] = new nsLanguageDetector(&LithuanianModel);
langDetectors[i][j++] = new nsLanguageDetector(&MalteseModel);
+ langDetectors[i][j++] = new nsLanguageDetector(&NorwegianModel);
langDetectors[i][j++] = new nsLanguageDetector(&PolishModel);
langDetectors[i][j++] = new nsLanguageDetector(&PortugueseModel);
langDetectors[i][j++] = new nsLanguageDetector(&RomanianModel);
diff --git a/src/nsMBCSGroupProber.h b/src/nsMBCSGroupProber.h
index 9596ac0..1dea490 100644
--- a/src/nsMBCSGroupProber.h
+++ b/src/nsMBCSGroupProber.h
@@ -49,7 +49,7 @@
#include "nsEUCTWProber.h"
#define NUM_OF_PROBERS 8
-#define NUM_OF_LANGUAGES 30
+#define NUM_OF_LANGUAGES 31
class nsMBCSGroupProber: public nsCharSetProber {
public: