summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJehan <jehan@girinstud.io>2022-12-16 23:17:47 +0100
committerJehan <jehan@girinstud.io>2022-12-16 23:27:52 +0100
commit0974920bddfbb1eb13a8d84aa1acd96822d9bf33 (patch)
treee2e5d70aaa092af015f94ee601d4d2ea777e664f
parent127d7faf478d62533f2ead1e8df5f2d7a6276da1 (diff)
Issue #22: Hebrew CP862 support.
Added in both visual and logical order since Wikipedia says: > Hebrew text encoded using code page 862 was usually stored in visual > order; nevertheless, a few DOS applications, notably a word processor > named EinsteinWriter, stored Hebrew in logical order. I am not using the nsHebrewProber wrapper (nameProber) for this new support, because I am really unsure this is of any use. Our statistical code based on letter and sequence usage should be more than enough to detect both variants of Hebrew encoding already, and my testing show that so far (with pretty outstanding score on actual Hebrew tests while all the other probers return bad scores). This will have to be studied a bit more later and maybe the whole nsHebrewProber might be deleted, even for Windows-1255 charset. I'm also cleaning a bit nsSBCSGroupProber::nsSBCSGroupProber() code by incrementing a single index, instead of maintaining the indexes by hand (otherwise each time we add probers in the middle, to keep them logically gathered by languages, we have to manually increment dozens of following probers).
-rw-r--r--README.md1
-rw-r--r--script/BuildLangModelLogs/LangHebrewModel.log526
-rw-r--r--script/charsets/ibm862.py71
-rw-r--r--script/langs/he.py2
-rw-r--r--src/LangModels/LangHebrewModel.cpp304
-rw-r--r--src/nsSBCSGroupProber.cpp298
-rw-r--r--src/nsSBCSGroupProber.h2
-rw-r--r--src/nsSBCharSetProber.h1
8 files changed, 661 insertions, 544 deletions
diff --git a/README.md b/README.md
index b01494b..288e0b3 100644
--- a/README.md
+++ b/README.md
@@ -83,6 +83,7 @@ uchardet started as a C language binding of the original C++ implementation of t
* UTF-8
* ISO-8859-8
* WINDOWS-1255
+ * IBM862
* Hindi
* UTF-8
* Hungarian:
diff --git a/script/BuildLangModelLogs/LangHebrewModel.log b/script/BuildLangModelLogs/LangHebrewModel.log
index 7d77ed5..fac1fe9 100644
--- a/script/BuildLangModelLogs/LangHebrewModel.log
+++ b/script/BuildLangModelLogs/LangHebrewModel.log
@@ -1,287 +1,285 @@
= Logs of language model for Hebrew (he) =
- Generated by BuildLangModel.py
-- Started: 2022-12-14 23:51:45.820761
+- Started: 2022-12-15 15:23:40.722736
- Maximum depth: 4
- Max number of pages: 200
== Parsed pages ==
יהדות_בוקרשט (revision 35182799)
-22 במרץ (revision 34452506)
-אלכסנדר איפסילנטי (הנכד) (revision 34666729)
-1812 (revision 25165786)
-ולאכים (revision 34292795)
-לאזאר שינאנו (revision 28852393)
-1857 (revision 26643435)
-פסח (revision 35070750)
-אירופה (revision 35154728)
-אוסטרו-הונגריה (revision 35232497)
-1918 (revision 34819769)
-טרנסילבניה (revision 34937962)
-גאלאץ (revision 35110599)
-5 במרץ (revision 34881550)
-4 בנובמבר (revision 34653109)
-י' באדר (revision 34929200)
-צ'כוסלובקיה (revision 35018067)
-1813 (revision 35224967)
-מונטניה (revision 32867963)
-איחוד הנסיכויות הרומניות (revision 34573782)
-ורסאי (revision 33228948)
-התעמלות (revision 33465424)
-גסטה הונגרורום (revision 34050008)
-ערב פסח (revision 34567652)
-נס (revision 35036327)
+בית כלא (revision 35227881)
+יעקב אלמולי (revision 35001208)
+טודור ולדימירסקו (revision 29886791)
+בלקן (revision 33993972)
+גזירות ת"ח ת"ט (revision 34763839)
+סילביו ברוקאן (revision 29510407)
+1912 (revision 33159662)
+צבא (revision 34619941)
+שיטת ספריית הקונגרס (revision 30163525)
+מרסל ינקו (revision 34359400)
+פנקס הקהילות (revision 34615712)
+יעקב פסנתיר (revision 33120540)
+בוקרשט (revision 35173617)
+יהדות לוב (revision 34775645)
+מדרג (revision 34420008)
קובץ בקרה משולב (revision 34980719)
-דימיטריוס איפסילנטי (revision 30504939)
-אפריקה (revision 35211326)
-דנובה (revision 35092661)
-אוקראינה (revision 35229535)
-גיבור לאומי (revision 34626677)
-2022 (revision 35212298)
-בית הבסבורג (revision 35149190)
-הספרייה הלאומית של יוון (revision 34171107)
-חנינה (revision 34701982)
-קיבוץ (revision 35167035)
-ט"ו באייר (revision 33760929)
-פלנדריה (revision 33811227)
-978 (revision 34219188)
-קרואטיה (revision 35208639)
-28 בדצמבר (revision 35110007)
-ספטאר (revision 19132086)
-מצבות (revision 35144605)
-אתונה (revision 35225868)
-המפלגה הרפובליקנית (revision 35075441)
-ויקישיתוף (revision 34805938)
-23 בדצמבר (revision 35039131)
-האימפריה הביזנטית (revision 35036469)
-אוסטריה (revision 35099816)
-כ"ו בסיוון (revision 34929226)
-בית עלמין (revision 34251157)
-24 במרץ (revision 34871871)
-הספרייה הלאומית של שוודיה (revision 33961967)
-פיימונטה (revision 32640611)
-אנציקלופדיה בריטניקה (revision 35145787)
-קרן ויקימדיה (revision 35175443)
-קונרד פון הצנדורף (revision 34979858)
-STS-3 (revision 35050894)
-אלברט בגר (revision 34720226)
-ראש השנה (revision 35059458)
-ספרות (revision 35203120)
-קרואטית (revision 32668378)
-WorldCat (revision 34980710)
-המאה ה-11 (revision 34809813)
-1910-1919 (revision 35049268)
-אנדריי הקדוש (revision 34598498)
-מיכאיל השני (revision 34445805)
-קולונל (revision 34566784)
-ז' באייר (revision 33760918)
-כ"ג בתשרי (revision 34235305)
-אזור זמן (revision 35002518)
-13 ביוני (revision 35157854)
+צבא קבע (revision 34044374)
+עברית (revision 35171043)
+2003 (revision 34884225)
+תומאס ג'פרסון (revision 35144907)
+אפגניסטן (revision 35015482)
+טנק (revision 34805353)
+21 באפריל (revision 34869840)
רומנית (revision 35154129)
-סורבון (revision 34389722)
-סגן-אלוף (revision 35196592)
-מורש (revision 32633346)
-אוניברסיטת לייפציג (revision 34568149)
-5 בדצמבר (revision 35190389)
-ולשים (revision 34848696)
-הספרייה הלאומית של לטביה (revision 34633898)
-תנור מיקרוגל (revision 34792045)
-יאשי (revision 34273547)
-אלבה יוליה (revision 33226243)
-יוון העתיקה (revision 35186156)
-ימי הביניים (revision 35195331)
-2016 (revision 33649777)
-סרבית (revision 32666563)
-א' בתשרי (revision 35012340)
-גרמניה (revision 35193278)
-1946 (revision 34549625)
-צי הים השחור (revision 34799823)
-13 באוגוסט (revision 35146746)
-קולוניאליזם (revision 35161793)
-רקדן (revision 34022227)
-פינלנד (revision 35217495)
-יין (revision 35158494)
-הלוח העברי (revision 35191477)
-כ"ט באייר (revision 34566795)
-איכר (revision 35209785)
-יואניס קפודיסטריאס (revision 32537246)
-28 בספטמבר (revision 34764952)
-22 ביוני (revision 35068229)
-נצרות אורתודוקסית (revision 35181856)
-חבל ארץ (revision 34910269)
-שובבי"ם (revision 33243828)
-צפצפה (revision 34734441)
-אינגמר ברגמן (revision 34902789)
-הספרייה הלאומית של צרפת (revision 34954915)
-1877 (revision 27881506)
-ו' בטבת (revision 34968733)
-ממלכת הונגריה (revision 34874665)
-מדינת זלצבורג (revision 33184168)
-13 באפריל (revision 35224094)
-85 (revision 30174062)
-רומניה (revision 35063882)
-רש"י (revision 35059927)
-טימישוארה (revision 35029927)
-15 בפברואר (revision 35158681)
+מערב אירופה (revision 35029137)
+פינסק (revision 34882043)
+בית סוהר גבעון (revision 34787725)
+הקהילה היהודית הספרדית בבוקרשט (revision 32942838)
+קומוניזם (revision 34968105)
+אנרגיה (revision 35140939)
+ספריית הוותיקן (revision 32639141)
+לאו רומני (revision 34364476)
+תותח (revision 35035899)
+כ' בסיוון (revision 34741740)
+Union List of Artist Names (revision 34992334)
+בנימין גלאי (revision 33202928)
+חיל הנדסה (revision 33949573)
+25 באוגוסט (revision 34821302)
+אנה טיכו (revision 34831809)
+חייל (revision 35206828)
+הלאמה (revision 34453859)
+קלוויניסט (revision 34763753)
+רפואה (revision 35157098)
+תלמוד תורה (revision 35216490)
+23 בינואר (revision 35038971)
+מלחמת העולם הראשונה (revision 35191080)
+כוחות מילואים (revision 32714547)
+גרמנית (revision 35085309)
+אוטודידקט (revision 34614272)
+דיוויזיית מתנדבים 1, טודור ולדימירסקו (revision 28599203)
+זית (revision 35159584)
+יהדות רומניה (revision 34919407)
+צבי לוקר (revision 34639828)
+WorldCat (revision 34980710)
+ספרי יזכור (revision 34570622)
+דת (revision 35160267)
+גזירות תתנ"ו (revision 34939752)
+פרו (revision 35228614)
+הארץ (revision 35234211)
הספרייה הלאומית (revision 35173909)
-פולנית (revision 35067795)
-אנשלוס (revision 34458944)
-כ"ד באב (revision 34889013)
-דומיטיאנוס (revision 35185961)
-קונסטנטין איפסילנטי (revision 31063233)
-הלוח הקראי (revision 33599480)
-פניני הלכה (revision 34937953)
-כ"ז באדר (revision 33868660)
-23 באפריל (revision 35127136)
-ניו יורק (revision 35216514)
-הארכיון הלאומי של ארצות הברית (revision 33086492)
-הספרייה הלאומית של הולנד (revision 34603407)
-ארומנים (revision 35094875)
-1935 (revision 34549609)
-1988 (revision 32747522)
-ויליאם מרסי (revision 32555101)
-26 ביולי (revision 34638841)
-דלמטיה (revision 32779937)
-מצרים (revision 35232714)
-19 במרץ (revision 34288440)
-10 באוגוסט (revision 35202828)
-קרחון (revision 33190499)
-לגיון הכבוד (revision 32631175)
-מועצת האיחוד האירופי (revision 34940586)
-ים מרמרה (revision 34966060)
-ארמית (revision 35199208)
-פלוישט (revision 33480182)
+בויאר (revision 34292683)
+נפוליאון בונפרטה (revision 35212132)
+העולם השלישי (revision 34866022)
+פנדורים (revision 22519224)
+קושטא (revision 34914083)
+תענית ציבור (revision 35122402)
+י"ט באייר (revision 33760934)
+דוד רובינגר (revision 34618241)
+1886 (revision 30398678)
+9 במרץ (revision 35039056)
+1855 (revision 34224046)
+מרד הלגיונרים ופרעות בוקרשט (revision 35067329)
+1966 (revision 34533574)
+יווני (revision 34012584)
+אוניברסיטת בוקרשט (revision 35188136)
+בוסניה והרצגובינה (revision 35162864)
+נצרות (revision 35210877)
+כלא שש (revision 35057829)
+אלפרד מנספלד (revision 35050837)
+אות (revision 34005221)
+י"א באייר (revision 34914962)
+5 באפריל (revision 35157784)
+ישראל (revision 35213935)
+קיילצה (revision 33935006)
+לותרני (revision 35064164)
+יום ראשון (revision 34281448)
+יהדות איטליה (revision 35198843)
+פרס דיזנגוף (revision 34534024)
+ה' בסיוון (revision 34566809)
+ח' בטבת (revision 35079706)
+האימפריה הרומית (revision 35119178)
+שגריר (revision 34965857)
+דן מכמן (revision 34522541)
+הספרייה הלאומית של צרפת (revision 34954915)
+דן ריזינגר (revision 34757254)
+אסטרטגיה צבאית (revision 35069854)
+אביבה ברושי (revision 35050673)
+טורקית (revision 34730801)
+11 במאי (revision 34445764)
+רב (revision 35062888)
+וולוז'ין (revision 35024306)
+ולאכיה (revision 33077945)
+יהדות הולנד (revision 33771623)
+אנגלית (revision 35222539)
+אוסטרליה (revision 35084368)
+חוק (revision 35117792)
+נצרות אורתודוקסית (revision 35181856)
+שבתאות (revision 35118251)
+הספרייה הלאומית של צ'כיה (revision 34679038)
+שימוש הוגן (revision 34698539)
+המאה ה-19 (revision 35228599)
+אולטניה (revision 35181527)
+תולדות עם ישראל (revision 35227911)
+1999 (revision 34550725)
+טוגאי ביי (revision 29009639)
+בית הדין העממי (רומניה) (revision 29292417)
+יהדות (revision 35238551)
+מוסלמים (revision 35186931)
+סלובניה (revision 34076843)
+1944 (revision 33848050)
+VIAF (revision 34992335)
+יחיאל שמי (revision 35169033)
+משפחת אוסטרוגסקי (revision 27522789)
+בוהמיה (revision 34774081)
+גולאג (revision 33926313)
+משה מוקדי (revision 33579655)
+קרן ויקימדיה (revision 35175443)
+ב' באלול (revision 33761030)
+רגולציה (revision 35168860)
+הקהילה היהודית הספרדית ברומניה (revision 32942827)
+הרתעה (אסטרטגיה) (revision 34184585)
+נובוגרודק (revision 34333750)
+מודל צבא העם (revision 34762715)
+מלחמת העולם השנייה (revision 35218209)
+חשוון (revision 35214064)
+1875 (revision 25165857)
+ליידי בירד ג'ונסון (revision 35156176)
+הספרייה הלאומית של ספרד (revision 34172052)
+רבנים (revision 16968274)
+בית סוהר מגידו (revision 33202574)
+גליציה (revision 34740074)
+יהדות בלארוס (revision 34770618)
+יהודים (revision 35220685)
+עמירם תמרי (revision 33235872)
+יהדות ליטא (revision 35062246)
+עלייה לרגל (revision 34764674)
+המועצה לישראל יפה (revision 34627430)
+יום שישי (revision 34737763)
+ג'מייקה (revision 35022818)
+למ"ד (revision 34438979)
+שיעה (revision 35141725)
+1987 (revision 32747521)
+שיטפון (revision 34831666)
+פרסית (revision 35135705)
+קניין רוחני (revision 34598306)
+תסריטאי (revision 34389192)
+גשם (revision 35214991)
+קצין (revision 35189304)
+שמואל וודניצקי (revision 33250304)
+בית חיים (revision 35213536)
+אליעזר פאפו (revision 34907056)
+יצחק דנציגר (revision 35163501)
+ירמיהו (revision 35170413)
+אלכסנדר סוורוס (revision 34549496)
+יוליסס סימפסון גרנט (revision 35099753)
+אלפבית עברי (revision 35167195)
+יום השבת (revision 32714481)
+ספרד (revision 35240234)
+קרואטיה (revision 35208639)
+יום כיפור קטן (revision 34566029)
+דתיים לאומיים (revision 35191810)
+לוניניץ (revision 34618951)
+מנצ'וריה (revision 35213350)
+ולנטיניאנוס הראשון (revision 35183518)
מערכת התיעוד האוניברסיטאית (צרפת) (revision 34033122)
-ט"ז בתשרי (revision 34975870)
-תשרי (revision 35214048)
-2015 (revision 33078518)
-1 בספטמבר (revision 34604999)
-רועה צאן (revision 35190455)
-מסחר (revision 35199982)
-לילה טרטיקוב (revision 34247187)
-נסיכות מולדובה (revision 34307667)
-שפות קלטיות (revision 32669163)
-2 במרץ (revision 34881461)
-אביר (revision 35210360)
-יוחנן בן זבדי (revision 34992438)
-טומס לייטרסדורף (revision 34885446)
-לשון סגי נהור (revision 33284044)
-רפורמציה (revision 34057367)
-רומאני (שפה) (revision 32668582)
-גרוסגלוקנר (revision 33563588)
-בירגיט אנטונסן (revision 30296687)
-ועידת ניקיאה (revision 32627552)
-סרט קולנוע (revision 35150502)
-הונגרית (revision 35067792)
-המנון אוסטריה (revision 35025960)
-אדוארד טיצ'נר (revision 30414251)
-823 (revision 34214879)
-אנו (פרובינציה) (revision 34152356)
-שוקולד (revision 35227942)
-24 בפברואר (revision 35197344)
-אוצר דינים ומנהגים (revision 29520131)
-מלחמת העצמאות (revision 35192826)
-כ"ח בתשרי (revision 33760633)
-1910 (revision 25165898)
-טורפים (revision 35208149)
-ו' בתשרי (revision 33760609)
-ישיבת פוניבז' לצעירים (revision 35205466)
-12 במאי (revision 35141724)
-בולגריה (revision 35214066)
-דאקיה (revision 35210657)
-איטליה (revision 35204156)
-אריה ניר הוצאה לאור (revision 35013765)
-אוניברסיטת שטרסבורג (revision 34977931)
-משרד המסחר והתעשייה (revision 35216585)
-1836 (revision 25165811)
-חסידות זידיטשוב (revision 35178483)
-בקר הבית (revision 35234239)
-פסיכיאטר (revision 34428172)
-י"ב באייר (revision 34444494)
-יהדות גרמניה (revision 34989399)
+מוזיאון תל אביב (revision 34779076)
+חוזה פריז (1783) (revision 34280442)
+דיקטטורה (revision 34987941)
++ (revision 34951817)
+יוני (revision 33963139)
+כ"ג בסיוון (revision 34929216)
+דרג דיפלומטי (revision 33574252)
+אנציקלופדיה בריטניקה (revision 35145787)
+וגטיוס (revision 33391266)
+מהרי"ל (revision 34613180)
+מוזיאון סטדלייק (revision 33770681)
+ספרייה דיגיטלית (revision 34044215)
+עיצור שפתי-שיני, אפי (revision 34158419)
+פסנתרן (revision 34558921)
+צבא אוסטרליה (revision 34306538)
+בוואריה (revision 35069866)
== End of Parsed pages ==
-- Wikipedia parsing ended at: 2022-12-14 23:55:24.013499
+- Wikipedia parsing ended at: 2022-12-15 15:27:25.018656
-80 characters appeared 1484111 times.
+94 characters appeared 1622917 times.
Most Frequent characters:
-[ 0] Char י: 12.060486041812236 %
-[ 1] Char ו: 11.073026208956069 %
-[ 2] Char ה: 8.602254144063348 %
-[ 3] Char ר: 6.503556674669213 %
-[ 4] Char ל: 6.033106688111603 %
-[ 5] Char ב: 5.481800215752056 %
-[ 6] Char ת: 5.432208237793534 %
-[ 7] Char מ: 5.147728168580382 %
-[ 8] Char א: 4.6662951760346765 %
-[ 9] Char ש: 4.158988108032351 %
-[10] Char נ: 3.8866365116894896 %
-[11] Char ם: 2.7109832081293113 %
-[12] Char ד: 2.617054923789393 %
-[13] Char ע: 2.4286593118708772 %
-[14] Char ק: 2.396788380383947 %
-[15] Char פ: 2.263846841644594 %
-[16] Char ח: 2.159339833745589 %
-[17] Char ס: 2.0885230282640586 %
-[18] Char כ: 1.8159019103018574 %
-[19] Char ט: 1.6087745458392262 %
-[20] Char ג: 1.4954407049068432 %
-[21] Char צ: 1.2486262819964276 %
-[22] Char ן: 1.2011230965877888 %
-[23] Char ז: 0.8258142416571267 %
-[24] Char ך: 0.3579920908880805 %
-[25] Char ף: 0.19762672738090342 %
-[26] Char ץ: 0.1861046781541273 %
-[27] Char e: 0.1302463225459551 %
-[28] Char a: 0.12243019558510111 %
-[29] Char i: 0.11131242878733463 %
-[30] Char r: 0.08961593843048128 %
-[31] Char n: 0.08665120061774355 %
-[32] Char o: 0.07634199867799646 %
-[33] Char t: 0.07108632710087048 %
-[34] Char l: 0.06502209066572515 %
-[35] Char s: 0.06151830961430783 %
-[36] Char u: 0.05019840160203651 %
-[37] Char c: 0.039350156423609825 %
-[38] Char d: 0.035576853752852716 %
-[39] Char m: 0.03463352808516344 %
-[40] Char h: 0.023583141692231916 %
-[41] Char g: 0.02250505521487274 %
-[42] Char C: 0.01866437213928069 %
-[43] Char v: 0.017653666066756463 %
-[44] Char p: 0.017653666066756463 %
-[45] Char A: 0.01610391675555265 %
-[46] Char S: 0.016036536350717702 %
-[47] Char b: 0.01576701473137791 %
-[48] Char I: 0.01563225392170801 %
-[49] Char y: 0.013880363395999356 %
-[50] Char T: 0.013476080966989665 %
-[51] Char B: 0.013273939752484821 %
-[52] Char D: 0.013273939752484821 %
-[53] Char M: 0.013004418133145026 %
-[54] Char k: 0.012734896513805235 %
-[55] Char f: 0.012397994489630491 %
-[56] Char P: 0.010039680320407302 %
-[57] Char E: 0.009770158701067507 %
-[58] Char G: 0.009500637081727714 %
-[59] Char L: 0.009365876272057818 %
-[60] Char N: 0.00929849586722287 %
-[61] Char R: 0.008422550604368542 %
-[62] Char F: 0.0070075621028346255 %
-[63] Char z: 0.006940181697999678 %
+[ 0] Char י: 11.931047613648756 %
+[ 1] Char ו: 11.230395639456608 %
+[ 2] Char ה: 8.702909637399818 %
+[ 3] Char ר: 6.166673958064399 %
+[ 4] Char ל: 6.0917471441854385 %
+[ 5] Char ת: 5.755007803849488 %
+[ 6] Char ב: 5.319434080732409 %
+[ 7] Char מ: 5.124476482777616 %
+[ 8] Char א: 4.594258363181851 %
+[ 9] Char ש: 4.086160906565154 %
+[10] Char נ: 3.7032700994567187 %
+[11] Char ם: 2.782397374603877 %
+[12] Char ד: 2.615537331853693 %
+[13] Char ע: 2.5096785602714125 %
+[14] Char ק: 2.2712806631515967 %
+[15] Char פ: 2.233139464310251 %
+[16] Char ח: 2.124939229794253 %
+[17] Char ס: 2.0344232021723845 %
+[18] Char כ: 1.8796401787645332 %
+[19] Char ט: 1.596261546339092 %
+[20] Char ג: 1.5693347226013405 %
+[21] Char צ: 1.2939663581070382 %
+[22] Char ן: 1.2081948737982287 %
+[23] Char ז: 0.8376275558146227 %
+[24] Char ך: 0.3550397216863216 %
+[25] Char ף: 0.2127034222945474 %
+[26] Char e: 0.16427211003396971 %
+[27] Char ץ: 0.15817198291717938 %
+[28] Char a: 0.14005645390368085 %
+[29] Char i: 0.12958148814757625 %
+[30] Char n: 0.10296275163794574 %
+[31] Char r: 0.10246981207295258 %
+[32] Char t: 0.08983823572000293 %
+[33] Char o: 0.08287546436447459 %
+[34] Char s: 0.08238252479948142 %
+[35] Char l: 0.06894992165341789 %
+[36] Char u: 0.052744533454267835 %
+[37] Char c: 0.04947880883618817 %
+[38] Char d: 0.0451039701968739 %
+[39] Char h: 0.04196148047004252 %
+[40] Char m: 0.03327342063703812 %
+[41] Char g: 0.023414629337174975 %
+[42] Char p: 0.023291394445926684 %
+[43] Char y: 0.0219358106421955 %
+[44] Char b: 0.020025669827847016 %
+[45] Char C: 0.01990243493659873 %
+[46] Char A: 0.017930676676626102 %
+[47] Char B: 0.017437737111632944 %
+[48] Char I: 0.017437737111632944 %
+[49] Char k: 0.017437737111632944 %
+[50] Char v: 0.016390240536022484 %
+[51] Char f: 0.01632862309039834 %
+[52] Char S: 0.015958918416653468 %
+[53] Char M: 0.014418482276049855 %
+[54] Char D: 0.013432603146063538 %
+[55] Char T: 0.013186133363566959 %
+[56] Char L: 0.012754811244197948 %
+[57] Char P: 0.012508341461701369 %
+[58] Char R: 0.010906287875473607 %
+[59] Char E: 0.010598200647352883 %
+[60] Char z: 0.010536583201728738 %
+[61] Char w: 0.010474965756104595 %
+[62] Char N: 0.009304234289245846 %
+[63] Char G: 0.0086880598330044 %
-The first 64 characters have an accumulated ratio of 0.9994865613151579.
-The first 5 characters have an accumulated ratio of 0.44272429757612475.
-All characters whose order is over 21 have an accumulated ratio of 0.04068630985148685.
+The first 64 characters have an accumulated ratio of 0.9992796920606537.
+The first 5 characters have an accumulated ratio of 0.4412277399275502.
+All characters whose order is over 22 have an accumulated ratio of 0.031037939709794155.
-1571 sequences found.
+1640 sequences found.
-First 663 (typical positive ratio): 0.9950033744779837
-Next 339 (1002-663): 0.003997985590807662
-Rest: 0.0009986399312086336
+First 688 (typical positive ratio): 0.9950129360753337
+Next 328 (1016-688): 0.0039909002477918065
+Rest: 0.0009961636768744953
-- Processing end: 2022-12-14 23:55:24.184437
+- Processing end: 2022-12-15 15:27:25.183725
diff --git a/script/charsets/ibm862.py b/script/charsets/ibm862.py
new file mode 100644
index 0000000..762aa57
--- /dev/null
+++ b/script/charsets/ibm862.py
@@ -0,0 +1,71 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+# ##### BEGIN LICENSE BLOCK #####
+# Version: MPL 1.1/GPL 2.0/LGPL 2.1
+#
+# The contents of this file are subject to the Mozilla Public License Version
+# 1.1 (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+# http://www.mozilla.org/MPL/
+#
+# Software distributed under the License is distributed on an "AS IS" basis,
+# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+# for the specific language governing rights and limitations under the
+# License.
+#
+# The Original Code is Mozilla Universal charset detector code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 2001
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+# Jehan <jehan@girinstud.io>
+#
+# Alternatively, the contents of this file may be used under the terms of
+# either the GNU General Public License Version 2 or later (the "GPL"), or
+# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+# in which case the provisions of the GPL or the LGPL are applicable instead
+# of those above. If you wish to allow use of your version of this file only
+# under the terms of either the GPL or the LGPL, and not to allow others to
+# use your version of this file under the terms of the MPL, indicate your
+# decision by deleting the provisions above and replace them with the notice
+# and other provisions required by the GPL or the LGPL. If you do not delete
+# the provisions above, a recipient may use your version of this file under
+# the terms of any one of the MPL, the GPL or the LGPL.
+#
+# ##### END LICENSE BLOCK #####
+
+from codepoints import *
+
+name = 'IBM862'
+aliases = ['CP862', 'OEM 862 (Hebrew)', 'MS-DOS Hebrew']
+
+language = \
+{
+ 'complete': [ 'he' ],
+ 'incomplete': []
+}
+
+# X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF #
+charmap = \
+[
+ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, # 0X
+ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, # 1X
+ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, # 2X
+ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, # 3X
+ SYM,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # 4X
+ LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,SYM,SYM,SYM,SYM, # 5X
+ SYM,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # 6X
+ LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,SYM,SYM,SYM,CTR, # 7X
+ LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # 8X
+ LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,SYM,SYM,SYM,LET, # 9X
+ LET,LET,LET,LET,LET,LET,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, # AX
+ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, # BX
+ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, # CX
+ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, # DX
+ LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,LET,LET,SYM, # EX
+ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, # FX
+]
diff --git a/script/langs/he.py b/script/langs/he.py
index 23713cc..1aa77be 100644
--- a/script/langs/he.py
+++ b/script/langs/he.py
@@ -49,7 +49,7 @@ name = 'Hebrew'
code = 'he'
use_ascii = False
# The charsets we want to support and create data for.
-charsets = ['ISO-8859-8', 'WINDOWS-1255']
+charsets = ['ISO-8859-8', 'WINDOWS-1255', 'IBM862']
## Optional Properties ##
diff --git a/src/LangModels/LangHebrewModel.cpp b/src/LangModels/LangHebrewModel.cpp
index 33b857e..91327ec 100644
--- a/src/LangModels/LangHebrewModel.cpp
+++ b/src/LangModels/LangHebrewModel.cpp
@@ -42,7 +42,7 @@
/**
* Generated by BuildLangModel.py
- * On: 2022-12-14 23:55:24.014547
+ * On: 2022-12-15 15:27:25.018997
**/
/* Character Mapping Table:
@@ -68,18 +68,18 @@ static const unsigned char Iso_8859_8_CharToOrderMap[] =
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 45, 51, 42, 52, 57, 62, 58, 67, 48, 72, 70, 59, 53, 60, 64, /* 4X */
- 56, 78, 61, 46, 50, 65, 66, 69, 76, 74, 77,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 28, 47, 37, 38, 27, 55, 41, 40, 29, 73, 54, 34, 39, 31, 32, /* 6X */
- 44, 75, 30, 35, 33, 36, 43, 68, 71, 49, 63,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ SYM, 46, 47, 45, 54, 59, 66, 63, 69, 48, 67, 72, 56, 53, 62, 68, /* 4X */
+ 57, 80, 58, 52, 55, 64, 65, 70, 74, 73, 77,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 28, 44, 37, 38, 26, 51, 41, 39, 29, 75, 49, 35, 40, 30, 33, /* 6X */
+ 42, 76, 31, 34, 32, 36, 50, 61, 71, 43, 60,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
SYM,CTR,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
- SYM,SYM,SYM,SYM,SYM, 80,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,CTR, /* BX */
+ SYM,SYM,SYM,SYM,SYM, 94,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,CTR, /* BX */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* CX */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,SYM, /* DX */
- 8, 5, 20, 12, 2, 1, 23, 16, 19, 0, 24, 18, 4, 11, 7, 22, /* EX */
- 10, 17, 13, 25, 15, 26, 21, 14, 3, 9, 6,CTR,CTR,SYM,SYM,CTR, /* FX */
+ 8, 6, 20, 12, 2, 1, 23, 16, 19, 0, 24, 18, 4, 11, 7, 22, /* EX */
+ 10, 17, 13, 25, 15, 27, 21, 14, 3, 9, 5,CTR,CTR,SYM,SYM,CTR, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
@@ -89,173 +89,194 @@ static const unsigned char Windows_1255_CharToOrderMap[] =
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
- SYM, 45, 51, 42, 52, 57, 62, 58, 67, 48, 72, 70, 59, 53, 60, 64, /* 4X */
- 56, 78, 61, 46, 50, 65, 66, 69, 76, 74, 77,SYM,SYM,SYM,SYM,SYM, /* 5X */
- SYM, 28, 47, 37, 38, 27, 55, 41, 40, 29, 73, 54, 34, 39, 31, 32, /* 6X */
- 44, 75, 30, 35, 33, 36, 43, 68, 71, 49, 63,SYM,SYM,SYM,SYM,CTR, /* 7X */
- SYM,CTR,SYM, 81,SYM,SYM,SYM,SYM, 82,SYM,CTR,SYM,CTR,CTR,CTR,CTR, /* 8X */
+ SYM, 46, 47, 45, 54, 59, 66, 63, 69, 48, 67, 72, 56, 53, 62, 68, /* 4X */
+ 57, 80, 58, 52, 55, 64, 65, 70, 74, 73, 77,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 28, 44, 37, 38, 26, 51, 41, 39, 29, 75, 49, 35, 40, 30, 33, /* 6X */
+ 42, 76, 31, 34, 32, 36, 50, 61, 71, 43, 60,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ SYM,CTR,SYM, 95,SYM,SYM,SYM,SYM, 96,SYM,CTR,SYM,CTR,CTR,CTR,CTR, /* 8X */
CTR,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,CTR,SYM,CTR,CTR,CTR,CTR, /* 9X */
- SYM,SYM,SYM,SYM, 83,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
- SYM,SYM,SYM,SYM,SYM, 84,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
+ SYM,SYM,SYM,SYM, 97,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
+ SYM,SYM,SYM,SYM,SYM, 98,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* CX */
- SYM,SYM,SYM,SYM, 85, 86, 79,SYM,SYM,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* DX */
- 8, 5, 20, 12, 2, 1, 23, 16, 19, 0, 24, 18, 4, 11, 7, 22, /* EX */
- 10, 17, 13, 25, 15, 26, 21, 14, 3, 9, 6,CTR,CTR,SYM,SYM,CTR, /* FX */
+ SYM,SYM,SYM,SYM, 99,100, 88,SYM,SYM,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* DX */
+ 8, 6, 20, 12, 2, 1, 23, 16, 19, 0, 24, 18, 4, 11, 7, 22, /* EX */
+ 10, 17, 13, 25, 15, 27, 21, 14, 3, 9, 5,CTR,CTR,SYM,SYM,CTR, /* FX */
+};
+/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
+
+static const unsigned char Ibm862_CharToOrderMap[] =
+{
+ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
+ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
+ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
+ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
+ SYM, 46, 47, 45, 54, 59, 66, 63, 69, 48, 67, 72, 56, 53, 62, 68, /* 4X */
+ 57, 80, 58, 52, 55, 64, 65, 70, 74, 73, 77,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 28, 44, 37, 38, 26, 51, 41, 39, 29, 75, 49, 35, 40, 30, 33, /* 6X */
+ 42, 76, 31, 34, 32, 36, 50, 61, 71, 43, 60,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ 8, 6, 20, 12, 2, 1, 23, 16, 19, 0, 24, 18, 4, 11, 7, 22, /* 8X */
+ 10, 17, 13, 25, 15, 27, 21, 14, 3, 9, 5,SYM,SYM,SYM,SYM,101, /* 9X */
+ 81, 86, 78, 89, 82,102,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
+ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
+ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* CX */
+ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* DX */
+ 83, 79,103, 90, 92, 84,104, 85,105, 91,106, 93,SYM,107, 87,SYM, /* EX */
+ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
static const int Unicode_Char_size = 64;
static const unsigned int Unicode_CharOrder[] =
{
- 65, 45, 66, 51, 67, 42, 68, 52, 69, 57, 70, 62, 71, 58, 73, 48,
- 76, 59, 77, 53, 78, 60, 80, 56, 82, 61, 83, 46, 84, 50, 97, 28,
- 98, 47, 99, 37, 100, 38, 101, 27, 102, 55, 103, 41, 104, 40, 105, 29,
- 107, 54, 108, 34, 109, 39, 110, 31, 111, 32, 112, 44, 114, 30, 115, 35,
- 116, 33, 117, 36, 118, 43, 121, 49, 122, 63, 1488, 8, 1489, 5,1490, 20,
+ 65, 46, 66, 47, 67, 45, 68, 54, 69, 59, 71, 63, 73, 48, 76, 56,
+ 77, 53, 78, 62, 80, 57, 82, 58, 83, 52, 84, 55, 97, 28, 98, 44,
+ 99, 37, 100, 38, 101, 26, 102, 51, 103, 41, 104, 39, 105, 29, 107, 49,
+ 108, 35, 109, 40, 110, 30, 111, 33, 112, 42, 114, 31, 115, 34, 116, 32,
+ 117, 36, 118, 50, 119, 61, 121, 43, 122, 60, 1488, 8, 1489, 6,1490, 20,
1491, 12, 1492, 2, 1493, 1, 1494, 23, 1495, 16, 1496, 19, 1497, 0,1498, 24,
1499, 18, 1500, 4, 1501, 11, 1502, 7, 1503, 22, 1504, 10, 1505, 17,1506, 13,
- 1507, 25, 1508, 15, 1509, 26, 1510, 21, 1511, 14, 1512, 3, 1513, 9,1514, 6,
+ 1507, 25, 1508, 15, 1509, 27, 1510, 21, 1511, 14, 1512, 3, 1513, 9,1514, 5,
};
/* Model Table:
- * Total considered sequences: 1571 / 4096
- * - Positive sequences: first 663 (0.9950033744779837)
- * - Probable sequences: next 339 (1002-663) (0.003997985590807662)
- * - Neutral sequences: last 3094 (0.0009986399312086336)
- * - Negative sequences: 2525 (off-ratio)
+ * Total considered sequences: 1640 / 4096
+ * - Positive sequences: first 688 (0.9950129360753337)
+ * - Probable sequences: next 328 (1016-688) (0.0039909002477918065)
+ * - Neutral sequences: last 3080 (0.0009961636768744953)
+ * - Negative sequences: 2456 (off-ratio)
* Negative sequences: TODO
*/
static const PRUint8 HebrewLangModel[] =
{
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,1,3,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,3,0,0,0,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,0,3,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,2,3,0,0,0,0,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,0,0,0,0,0,
+ 3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,2,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,2,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,0,0,0,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0,
+ 1,0,2,1,1,1,1,1,1,0,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 1,0,1,1,1,2,1,2,2,1,1,0,0,1,1,1,0,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,0,0,0,0,0,
+ 3,3,3,3,3,3,3,3,2,3,3,3,3,1,3,3,0,3,3,3,3,3,3,3,2,1,0,3,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,2,3,3,3,3,0,3,3,0,3,3,3,3,3,3,3,1,1,3,0,0,0,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,2,3,3,2,1,3,0,2,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,2,3,3,3,0,3,2,0,0,0,0,0,
+ 3,3,3,3,3,3,3,2,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,3,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,0,0,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,1,3,3,3,3,3,3,3,0,2,0,3,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,0,3,3,3,3,3,3,3,0,2,3,0,0,0,0,0,
+ 3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,2,3,1,3,3,0,1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,2,3,1,3,3,0,0,0,0,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,0,1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,1,0,0,0,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,3,3,3,3,3,1,0,2,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,3,3,3,2,3,2,0,2,1,0,0,0,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,2,3,0,3,3,1,3,3,0,3,0,1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,2,3,0,3,3,1,3,3,0,3,1,0,0,0,0,0,
+ 3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,0,0,3,3,3,2,0,0,2,0,2,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,1,1,3,3,3,2,1,1,2,2,0,0,0,0,0,
+ 1,0,1,0,1,0,2,1,1,0,1,0,0,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 1,0,1,0,1,1,0,1,2,0,1,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,2,2,2,3,1,3,3,3,2,1,2,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,2,2,2,3,1,3,3,3,2,2,3,1,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,3,3,3,3,
+ 3,2,3,3,3,3,3,2,3,2,2,2,3,0,0,0,0,2,2,2,0,2,0,0,0,0,0,0,2,3,1,0,
+ 0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,1,0,0,1,0,1,1,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,3,3,3,
- 2,3,3,3,2,3,3,3,2,3,0,2,2,0,0,2,0,2,0,1,0,2,2,2,0,0,0,0,1,0,0,2,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,1,3,3,3,
- 1,3,3,3,3,3,3,3,2,3,0,3,2,0,1,2,0,2,0,0,0,1,2,2,0,0,0,0,0,0,0,2,
- 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,2,3,3,
- 3,3,3,3,3,3,3,3,1,3,0,3,2,0,0,3,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,2,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,3,2,2,
- 3,3,2,3,3,2,3,2,1,3,1,2,1,0,0,2,0,2,0,0,0,1,2,2,0,0,0,1,0,0,0,2,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,3,2,3,
- 3,3,1,3,3,3,3,1,1,3,0,1,0,0,1,2,0,2,0,0,0,0,2,2,0,0,0,0,0,0,1,1,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,3,3,
- 3,3,3,3,3,3,2,3,1,3,0,3,3,0,0,2,0,2,2,0,0,1,2,3,0,0,0,0,0,0,0,1,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,3,3,1,
- 3,3,2,3,3,1,1,1,3,0,0,1,1,0,0,0,0,3,0,0,0,0,1,0,0,0,0,0,0,0,0,2,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,3,0,1,
- 3,3,3,2,3,1,2,2,0,2,0,2,2,0,0,2,0,2,0,0,0,1,1,2,0,0,0,0,0,0,0,1,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,2,3,2,1,
- 2,3,2,3,2,3,1,2,2,2,0,2,2,0,0,2,0,2,0,0,0,0,2,1,0,0,0,0,0,0,0,2,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,2,2,3,3,
- 2,3,3,3,1,2,2,3,0,2,0,2,2,0,0,2,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,1,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,3,2,0,
- 3,3,2,1,3,2,1,0,3,0,0,0,0,0,0,0,0,2,0,1,0,0,2,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,3,2,1,
- 2,1,1,2,2,1,1,2,0,1,1,0,0,0,0,1,2,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,3,1,2,
- 3,1,1,1,2,1,1,2,1,0,0,0,2,0,0,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,3,2,1,
- 2,2,2,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,3,2,2,
- 2,1,2,2,2,0,1,1,2,1,0,1,0,0,0,0,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,2,2,0,
- 3,0,2,0,2,0,0,0,2,0,2,0,0,1,1,0,2,1,0,1,2,1,0,0,1,1,0,1,1,1,1,1,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,3,2,0,
- 2,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,2,2,0,
- 2,2,2,2,3,0,0,1,2,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,
- 0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,1,2,2,
- 0,1,2,1,2,1,1,1,1,2,2,1,1,1,1,1,1,0,2,1,1,0,1,2,1,0,0,2,2,2,1,1,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,0,0,
- 2,2,1,0,2,2,0,1,1,0,1,1,2,2,1,0,1,1,2,0,0,0,1,1,1,1,1,1,0,0,1,1,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,2,2,2,0,
- 2,0,2,1,3,0,0,0,0,0,0,0,0,0,0,2,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,0,1,2,
- 1,1,1,2,1,0,0,1,0,0,2,0,1,2,2,0,2,0,2,1,1,1,0,0,1,1,1,1,1,1,2,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,1,1,2,
- 1,2,2,2,2,2,0,2,0,1,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,2,3,0,
- 2,0,0,0,1,0,0,0,3,0,2,0,0,1,2,0,2,1,1,1,0,1,0,0,0,2,0,0,0,1,0,1,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,1,
- 2,0,2,0,2,0,0,0,1,0,1,0,0,2,1,0,2,1,1,1,1,1,0,0,0,1,1,0,2,1,1,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,2,1,0,
- 2,0,0,0,2,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,0,1,0,0,1,1,0,0,1,1,1,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,2,0,0,
- 2,0,0,0,2,0,0,0,0,0,1,0,0,2,1,0,1,1,1,0,1,0,0,0,1,1,0,1,0,0,0,0,
- 0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,2,2,2,1,
- 2,2,1,2,2,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,0,
- 2,2,2,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,1,2,0,
- 2,0,2,1,1,0,0,0,1,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,1,1,0,1,1,0,1,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,2,2,
- 0,1,2,1,2,1,1,1,0,1,2,1,0,1,1,1,1,0,1,0,1,1,0,1,1,0,1,1,1,1,1,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,1,2,0,
- 2,0,1,0,1,0,0,0,1,0,0,0,0,1,1,0,1,2,0,1,1,1,0,0,1,1,0,1,2,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,0,0,
- 2,1,0,0,1,0,0,0,0,0,1,0,0,1,1,0,2,0,1,1,0,0,0,0,1,1,0,1,0,1,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,1,0,0,
- 2,0,0,0,0,0,0,0,0,0,1,0,0,2,1,0,2,0,1,2,2,1,0,0,0,1,0,0,1,0,1,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0,1,0,
- 2,0,0,0,1,0,0,0,1,0,1,0,0,2,0,0,1,1,1,1,1,1,0,0,0,2,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,0,
- 2,0,2,0,1,0,0,0,0,0,1,0,0,1,1,0,1,0,0,1,1,0,0,0,1,0,1,1,0,1,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,1,0,
- 2,1,0,1,1,0,1,0,0,0,0,1,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,1,3,3,3,
+ 3,1,3,3,3,3,3,2,3,3,2,3,3,0,0,0,0,2,3,2,1,0,0,0,0,0,1,0,2,2,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,3,2,3,3,
+ 3,3,3,3,3,3,3,1,3,3,2,1,3,0,0,1,0,2,3,2,0,0,0,0,0,0,0,0,2,1,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,3,3,3,2,
+ 3,3,3,2,3,3,3,2,1,3,0,2,2,0,0,0,0,3,1,2,0,0,0,0,0,0,0,0,2,1,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,3,3,2,2,
+ 3,3,3,2,3,2,3,1,2,3,2,3,2,0,1,0,0,3,2,2,0,0,1,0,0,0,0,0,2,1,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,3,0,3,3,1,3,
+ 2,3,3,2,3,1,1,3,1,1,0,3,1,0,0,0,0,1,1,1,0,0,1,0,0,0,0,0,2,1,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,2,2,3,3,
+ 3,3,3,3,3,3,2,2,3,2,3,1,2,1,0,1,0,3,2,3,0,1,0,1,0,0,0,0,2,2,1,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,3,3,1,2,
+ 3,3,3,2,2,3,1,3,1,1,3,2,1,0,0,0,0,2,1,1,0,0,0,0,0,0,0,0,1,1,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,3,3,2,1,
+ 3,3,2,3,3,2,2,1,2,1,1,2,1,0,0,0,0,2,1,2,0,0,1,0,0,0,0,0,1,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,2,2,3,3,
+ 3,1,3,3,1,3,3,1,3,2,2,1,2,0,0,0,0,2,1,2,0,0,0,1,0,0,0,1,1,1,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,3,3,0,2,
+ 3,3,1,2,3,2,0,3,1,0,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,1,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,3,3,1,2,
+ 1,2,2,1,2,1,1,0,1,1,0,2,1,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,3,3,2,2,
+ 2,3,2,2,3,0,1,1,2,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,2,0,1,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,3,3,2,0,
+ 1,2,1,1,2,1,1,0,2,0,2,1,2,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,1,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,3,2,1,2,
+ 1,2,2,2,2,0,0,2,1,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,3,2,0,2,
+ 2,3,1,3,2,0,0,2,0,0,2,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,2,1,2,1,
+ 2,1,3,3,1,2,1,0,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,3,2,1,3,
+ 1,2,2,3,2,0,1,0,0,0,0,1,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,3,2,0,2,
+ 0,3,0,1,2,0,0,2,0,0,0,0,0,2,1,1,2,0,1,0,1,0,2,1,1,1,1,0,1,0,1,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,2,2,
+ 2,0,2,2,2,1,1,1,2,1,1,0,2,1,1,1,1,0,2,1,2,1,1,2,1,1,2,0,0,1,2,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,2,2,1,3,
+ 0,3,0,1,2,0,1,0,0,0,0,1,0,2,1,1,1,0,0,0,1,1,1,1,0,1,1,1,0,0,2,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,2,0,
+ 1,1,2,1,1,2,0,0,1,1,0,0,0,1,2,1,2,0,0,0,2,2,1,1,1,2,1,1,1,0,0,1,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,2,2,1,2,
+ 2,2,3,1,1,1,0,1,0,1,0,2,1,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,1,0,0,
+ 0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,2,3,1,2,
+ 0,2,1,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,2,2,0,2,
+ 2,2,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,2,2,0,0,
+ 2,2,0,1,2,3,0,2,1,0,2,1,0,1,1,1,1,1,0,2,1,0,1,2,0,1,1,1,1,1,1,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,3,2,0,1,
+ 0,2,0,0,2,1,0,0,0,0,0,1,0,0,2,1,1,0,0,0,0,1,1,0,1,1,1,1,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,2,2,0,2,
+ 0,2,0,0,2,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,1,1,1,0,0,1,1,1,0,0,1,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,2,2,0,2,
+ 0,2,1,0,2,0,0,3,0,0,0,1,0,1,1,1,2,0,1,0,2,1,0,0,0,1,0,1,1,1,0,1,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,3,2,0,0,
+ 0,2,0,0,1,0,0,0,0,0,0,1,0,2,1,0,3,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,2,2,0,3,
+ 0,2,2,2,2,0,0,1,0,0,0,0,0,1,2,1,1,0,0,1,1,0,1,0,1,1,1,1,0,0,2,1,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,2,1,0,0,
+ 0,2,0,0,2,0,0,1,0,0,0,0,0,1,2,1,2,0,0,0,0,0,1,1,0,1,0,1,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,2,1,
+ 0,0,2,1,2,1,2,0,1,1,1,0,1,2,1,0,1,0,2,1,2,1,1,1,0,1,1,1,0,0,1,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,2,3,0,0,
+ 1,1,0,1,1,0,0,1,1,0,1,0,1,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,1,2,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,2,3,1,1,
+ 0,2,2,0,1,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,2,1,0,0,
+ 0,2,0,0,1,0,0,0,0,0,0,0,0,1,2,1,1,0,0,0,0,0,2,1,1,1,0,1,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,2,2,0,3,
+ 0,2,0,1,1,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,1,0,0,0,0,1,1,0,2,0,
};
@@ -264,7 +285,7 @@ const SequenceModel Iso_8859_8HebrewModel =
Iso_8859_8_CharToOrderMap,
HebrewLangModel,
64,
- (float)0.9990013600687914,
+ (float)0.9990038363231255,
PR_FALSE,
"ISO-8859-8",
"he"
@@ -275,12 +296,23 @@ const SequenceModel Windows_1255HebrewModel =
Windows_1255_CharToOrderMap,
HebrewLangModel,
64,
- (float)0.9990013600687914,
+ (float)0.9990038363231255,
PR_FALSE,
"WINDOWS-1255",
"he"
};
+const SequenceModel Ibm862HebrewModel =
+{
+ Ibm862_CharToOrderMap,
+ HebrewLangModel,
+ 64,
+ (float)0.9990038363231255,
+ PR_FALSE,
+ "IBM862",
+ "he"
+};
+
const LanguageModel HebrewModel =
{
"he",
@@ -289,7 +321,7 @@ const LanguageModel HebrewModel =
HebrewLangModel,
64,
5,
- (float)0.44272429757612475,
- 21,
- (float)0.04068630985148685,
+ (float)0.4412277399275502,
+ 22,
+ (float)0.031037939709794155,
};
diff --git a/src/nsSBCSGroupProber.cpp b/src/nsSBCSGroupProber.cpp
index ef8da36..04b8c67 100644
--- a/src/nsSBCSGroupProber.cpp
+++ b/src/nsSBCSGroupProber.cpp
@@ -46,159 +46,173 @@
nsSBCSGroupProber::nsSBCSGroupProber()
{
- mProbers[0] = new nsSingleByteCharSetProber(&Win1251RussianModel);
- mProbers[1] = new nsSingleByteCharSetProber(&Koi8rRussianModel);
- mProbers[2] = new nsSingleByteCharSetProber(&Latin5RussianModel);
- mProbers[3] = new nsSingleByteCharSetProber(&MacCyrillicRussianModel);
- mProbers[4] = new nsSingleByteCharSetProber(&Ibm866RussianModel);
- mProbers[5] = new nsSingleByteCharSetProber(&Ibm855RussianModel);
-
- mProbers[6] = new nsSingleByteCharSetProber(&Iso_8859_7GreekModel);
- mProbers[7] = new nsSingleByteCharSetProber(&Windows_1253GreekModel);
-
- mProbers[8] = new nsSingleByteCharSetProber(&Latin5BulgarianModel);
- mProbers[9] = new nsSingleByteCharSetProber(&Win1251BulgarianModel);
-
nsHebrewProber *hebprober = new nsHebrewProber();
- // Notice: Any change in these indexes - 10,11,12 must be reflected
- // in the code below as well.
- mProbers[10] = hebprober;
- mProbers[11] = new nsSingleByteCharSetProber(&Windows_1255HebrewModel, PR_FALSE, hebprober); // Logical Hebrew
- mProbers[12] = new nsSingleByteCharSetProber(&Windows_1255HebrewModel, PR_TRUE, hebprober); // Visual Hebrew
+ PRUint32 heb_prober_idx;
+ PRUint32 n = 0;
+
+ mProbers[n++] = new nsSingleByteCharSetProber(&Win1251RussianModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Koi8rRussianModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Latin5RussianModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&MacCyrillicRussianModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Ibm866RussianModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Ibm855RussianModel);
+
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_7GreekModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Windows_1253GreekModel);
+
+ mProbers[n++] = new nsSingleByteCharSetProber(&Latin5BulgarianModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Win1251BulgarianModel);
+
+ heb_prober_idx = n;
+ mProbers[n++] = hebprober;
+ mProbers[n++] = new nsSingleByteCharSetProber(&Windows_1255HebrewModel, PR_FALSE, hebprober); // Logical Hebrew
+ mProbers[n++] = new nsSingleByteCharSetProber(&Windows_1255HebrewModel, PR_TRUE, hebprober); // Visual Hebrew
// Tell the Hebrew prober about the logical and visual probers
- if (mProbers[10] && mProbers[11] && mProbers[12]) // all are not null
+ if (mProbers[heb_prober_idx] && mProbers[heb_prober_idx + 1] && mProbers[heb_prober_idx + 2]) // all are not null
{
- hebprober->SetModelProbers(mProbers[11], mProbers[12]);
+ hebprober->SetModelProbers(mProbers[heb_prober_idx + 1], mProbers[heb_prober_idx + 2]);
}
else // One or more is null. avoid any Hebrew probing, null them all
{
- for (PRUint32 i = 10; i <= 12; ++i)
+ for (PRUint32 i = heb_prober_idx; i <= heb_prober_idx + 2; ++i)
{
delete mProbers[i];
mProbers[i] = 0;
}
}
-
- mProbers[13] = new nsSingleByteCharSetProber(&Tis_620ThaiModel);
- mProbers[14] = new nsSingleByteCharSetProber(&Iso_8859_11ThaiModel);
-
- mProbers[15] = new nsSingleByteCharSetProber(&Iso_8859_1FrenchModel);
- mProbers[16] = new nsSingleByteCharSetProber(&Iso_8859_15FrenchModel);
- mProbers[17] = new nsSingleByteCharSetProber(&Windows_1252FrenchModel);
-
- mProbers[18] = new nsSingleByteCharSetProber(&Iso_8859_1SpanishModel);
- mProbers[19] = new nsSingleByteCharSetProber(&Iso_8859_15SpanishModel);
- mProbers[20] = new nsSingleByteCharSetProber(&Windows_1252SpanishModel);
-
- mProbers[21] = new nsSingleByteCharSetProber(&Iso_8859_2HungarianModel);
- mProbers[22] = new nsSingleByteCharSetProber(&Windows_1250HungarianModel);
-
- mProbers[23] = new nsSingleByteCharSetProber(&Iso_8859_1GermanModel);
- mProbers[24] = new nsSingleByteCharSetProber(&Windows_1252GermanModel);
-
- mProbers[25] = new nsSingleByteCharSetProber(&Iso_8859_3EsperantoModel);
-
- mProbers[26] = new nsSingleByteCharSetProber(&Iso_8859_3TurkishModel);
- mProbers[27] = new nsSingleByteCharSetProber(&Iso_8859_9TurkishModel);
-
- mProbers[28] = new nsSingleByteCharSetProber(&Iso_8859_6ArabicModel);
- mProbers[29] = new nsSingleByteCharSetProber(&Windows_1256ArabicModel);
-
- mProbers[30] = new nsSingleByteCharSetProber(&VisciiVietnameseModel);
- mProbers[31] = new nsSingleByteCharSetProber(&Windows_1258VietnameseModel);
-
- mProbers[32] = new nsSingleByteCharSetProber(&Iso_8859_15DanishModel);
- mProbers[33] = new nsSingleByteCharSetProber(&Iso_8859_1DanishModel);
- mProbers[34] = new nsSingleByteCharSetProber(&Windows_1252DanishModel);
- mProbers[35] = new nsSingleByteCharSetProber(&Ibm865DanishModel);
-
- mProbers[36] = new nsSingleByteCharSetProber(&Iso_8859_13LithuanianModel);
- mProbers[37] = new nsSingleByteCharSetProber(&Iso_8859_10LithuanianModel);
- mProbers[38] = new nsSingleByteCharSetProber(&Iso_8859_4LithuanianModel);
-
- mProbers[39] = new nsSingleByteCharSetProber(&Iso_8859_13LatvianModel);
- mProbers[40] = new nsSingleByteCharSetProber(&Iso_8859_10LatvianModel);
- mProbers[41] = new nsSingleByteCharSetProber(&Iso_8859_4LatvianModel);
-
- mProbers[42] = new nsSingleByteCharSetProber(&Iso_8859_1PortugueseModel);
- mProbers[43] = new nsSingleByteCharSetProber(&Iso_8859_9PortugueseModel);
- mProbers[44] = new nsSingleByteCharSetProber(&Iso_8859_15PortugueseModel);
- mProbers[45] = new nsSingleByteCharSetProber(&Windows_1252PortugueseModel);
-
- mProbers[46] = new nsSingleByteCharSetProber(&Iso_8859_3MalteseModel);
-
- mProbers[47] = new nsSingleByteCharSetProber(&Windows_1250CzechModel);
- mProbers[48] = new nsSingleByteCharSetProber(&Iso_8859_2CzechModel);
- mProbers[49] = new nsSingleByteCharSetProber(&Mac_CentraleuropeCzechModel);
- mProbers[50] = new nsSingleByteCharSetProber(&Ibm852CzechModel);
-
- mProbers[51] = new nsSingleByteCharSetProber(&Windows_1250SlovakModel);
- mProbers[52] = new nsSingleByteCharSetProber(&Iso_8859_2SlovakModel);
- mProbers[53] = new nsSingleByteCharSetProber(&Mac_CentraleuropeSlovakModel);
- mProbers[54] = new nsSingleByteCharSetProber(&Ibm852SlovakModel);
-
- mProbers[55] = new nsSingleByteCharSetProber(&Windows_1250PolishModel);
- mProbers[56] = new nsSingleByteCharSetProber(&Iso_8859_2PolishModel);
- mProbers[57] = new nsSingleByteCharSetProber(&Iso_8859_13PolishModel);
- mProbers[58] = new nsSingleByteCharSetProber(&Iso_8859_16PolishModel);
- mProbers[59] = new nsSingleByteCharSetProber(&Mac_CentraleuropePolishModel);
- mProbers[60] = new nsSingleByteCharSetProber(&Ibm852PolishModel);
-
- mProbers[61] = new nsSingleByteCharSetProber(&Iso_8859_1FinnishModel);
- mProbers[62] = new nsSingleByteCharSetProber(&Iso_8859_4FinnishModel);
- mProbers[63] = new nsSingleByteCharSetProber(&Iso_8859_9FinnishModel);
- mProbers[64] = new nsSingleByteCharSetProber(&Iso_8859_13FinnishModel);
- mProbers[65] = new nsSingleByteCharSetProber(&Iso_8859_15FinnishModel);
- mProbers[66] = new nsSingleByteCharSetProber(&Windows_1252FinnishModel);
-
- mProbers[67] = new nsSingleByteCharSetProber(&Iso_8859_1ItalianModel);
- mProbers[68] = new nsSingleByteCharSetProber(&Iso_8859_3ItalianModel);
- mProbers[69] = new nsSingleByteCharSetProber(&Iso_8859_9ItalianModel);
- mProbers[70] = new nsSingleByteCharSetProber(&Iso_8859_15ItalianModel);
- mProbers[71] = new nsSingleByteCharSetProber(&Windows_1252ItalianModel);
-
- mProbers[72] = new nsSingleByteCharSetProber(&Windows_1250CroatianModel);
- mProbers[73] = new nsSingleByteCharSetProber(&Iso_8859_2CroatianModel);
- mProbers[74] = new nsSingleByteCharSetProber(&Iso_8859_13CroatianModel);
- mProbers[75] = new nsSingleByteCharSetProber(&Iso_8859_16CroatianModel);
- mProbers[76] = new nsSingleByteCharSetProber(&Mac_CentraleuropeCroatianModel);
- mProbers[77] = new nsSingleByteCharSetProber(&Ibm852CroatianModel);
-
- mProbers[78] = new nsSingleByteCharSetProber(&Windows_1252EstonianModel);
- mProbers[79] = new nsSingleByteCharSetProber(&Windows_1257EstonianModel);
- mProbers[80] = new nsSingleByteCharSetProber(&Iso_8859_4EstonianModel);
- mProbers[81] = new nsSingleByteCharSetProber(&Iso_8859_13EstonianModel);
- mProbers[82] = new nsSingleByteCharSetProber(&Iso_8859_15EstonianModel);
-
- mProbers[83] = new nsSingleByteCharSetProber(&Iso_8859_1IrishModel);
- mProbers[84] = new nsSingleByteCharSetProber(&Iso_8859_9IrishModel);
- mProbers[85] = new nsSingleByteCharSetProber(&Iso_8859_15IrishModel);
- mProbers[86] = new nsSingleByteCharSetProber(&Windows_1252IrishModel);
-
- mProbers[87] = new nsSingleByteCharSetProber(&Windows_1250RomanianModel);
- mProbers[88] = new nsSingleByteCharSetProber(&Iso_8859_2RomanianModel);
- mProbers[89] = new nsSingleByteCharSetProber(&Iso_8859_16RomanianModel);
- mProbers[90] = new nsSingleByteCharSetProber(&Ibm852RomanianModel);
-
- mProbers[91] = new nsSingleByteCharSetProber(&Windows_1250SloveneModel);
- mProbers[92] = new nsSingleByteCharSetProber(&Iso_8859_2SloveneModel);
- mProbers[93] = new nsSingleByteCharSetProber(&Iso_8859_16SloveneModel);
- mProbers[94] = new nsSingleByteCharSetProber(&Mac_CentraleuropeSloveneModel);
- mProbers[95] = new nsSingleByteCharSetProber(&Ibm852SloveneModel);
-
- mProbers[96] = new nsSingleByteCharSetProber(&Iso_8859_1SwedishModel);
- mProbers[97] = new nsSingleByteCharSetProber(&Iso_8859_4SwedishModel);
- mProbers[98] = new nsSingleByteCharSetProber(&Iso_8859_9SwedishModel);
- mProbers[99] = new nsSingleByteCharSetProber(&Iso_8859_15SwedishModel);
- mProbers[100] = new nsSingleByteCharSetProber(&Windows_1252SwedishModel);
-
- mProbers[101] = new nsSingleByteCharSetProber(&Iso_8859_15NorwegianModel);
- mProbers[102] = new nsSingleByteCharSetProber(&Iso_8859_1NorwegianModel);
- mProbers[103] = new nsSingleByteCharSetProber(&Windows_1252NorwegianModel);
- mProbers[104] = new nsSingleByteCharSetProber(&Ibm865NorwegianModel);
-
- mProbers[105] = new nsSingleByteCharSetProber(&Iso_8859_1EnglishModel);
- mProbers[106] = new nsSingleByteCharSetProber(&Windows_1252EnglishModel);
+ /* XXX: I should verify a bit more closely the Hebrew case. It doesn't look to
+ * me like the additional data handling in nsHebrewProber is really needed
+ * ("Final letter analysis for logical-visual decision").
+ * For this new support of Hebrew with IBM-862, aka CP862, I just directly use
+ * the direct model (in 2 modes, reversed or not, so that it handles both the
+ * logical and visual hebrew cases (Wikipedia says: "Hebrew text encoded using
+ * code page 862 was usually stored in visual order; nevertheless, a few DOS
+ * applications, notably a word processor named EinsteinWriter, stored Hebrew
+ * in logical order.")
+ */
+ mProbers[n++] = new nsSingleByteCharSetProber(&Ibm862HebrewModel, PR_FALSE, NULL); // Logical Hebrew
+ mProbers[n++] = new nsSingleByteCharSetProber(&Ibm862HebrewModel, PR_TRUE, NULL); // Visual Hebrew
+
+ mProbers[n++] = new nsSingleByteCharSetProber(&Tis_620ThaiModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_11ThaiModel);
+
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_1FrenchModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_15FrenchModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Windows_1252FrenchModel);
+
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_1SpanishModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_15SpanishModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Windows_1252SpanishModel);
+
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_2HungarianModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Windows_1250HungarianModel);
+
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_1GermanModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Windows_1252GermanModel);
+
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_3EsperantoModel);
+
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_3TurkishModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_9TurkishModel);
+
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_6ArabicModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Windows_1256ArabicModel);
+
+ mProbers[n++] = new nsSingleByteCharSetProber(&VisciiVietnameseModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Windows_1258VietnameseModel);
+
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_15DanishModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_1DanishModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Windows_1252DanishModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Ibm865DanishModel);
+
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_13LithuanianModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_10LithuanianModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_4LithuanianModel);
+
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_13LatvianModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_10LatvianModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_4LatvianModel);
+
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_1PortugueseModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_9PortugueseModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_15PortugueseModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Windows_1252PortugueseModel);
+
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_3MalteseModel);
+
+ mProbers[n++] = new nsSingleByteCharSetProber(&Windows_1250CzechModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_2CzechModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Mac_CentraleuropeCzechModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Ibm852CzechModel);
+
+ mProbers[n++] = new nsSingleByteCharSetProber(&Windows_1250SlovakModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_2SlovakModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Mac_CentraleuropeSlovakModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Ibm852SlovakModel);
+
+ mProbers[n++] = new nsSingleByteCharSetProber(&Windows_1250PolishModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_2PolishModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_13PolishModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_16PolishModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Mac_CentraleuropePolishModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Ibm852PolishModel);
+
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_1FinnishModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_4FinnishModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_9FinnishModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_13FinnishModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_15FinnishModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Windows_1252FinnishModel);
+
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_1ItalianModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_3ItalianModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_9ItalianModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_15ItalianModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Windows_1252ItalianModel);
+
+ mProbers[n++] = new nsSingleByteCharSetProber(&Windows_1250CroatianModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_2CroatianModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_13CroatianModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_16CroatianModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Mac_CentraleuropeCroatianModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Ibm852CroatianModel);
+
+ mProbers[n++] = new nsSingleByteCharSetProber(&Windows_1252EstonianModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Windows_1257EstonianModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_4EstonianModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_13EstonianModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_15EstonianModel);
+
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_1IrishModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_9IrishModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_15IrishModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Windows_1252IrishModel);
+
+ mProbers[n++] = new nsSingleByteCharSetProber(&Windows_1250RomanianModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_2RomanianModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_16RomanianModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Ibm852RomanianModel);
+
+ mProbers[n++] = new nsSingleByteCharSetProber(&Windows_1250SloveneModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_2SloveneModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_16SloveneModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Mac_CentraleuropeSloveneModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Ibm852SloveneModel);
+
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_1SwedishModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_4SwedishModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_9SwedishModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_15SwedishModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Windows_1252SwedishModel);
+
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_15NorwegianModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_1NorwegianModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Windows_1252NorwegianModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Ibm865NorwegianModel);
+
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_1EnglishModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Windows_1252EnglishModel);
Reset();
}
diff --git a/src/nsSBCSGroupProber.h b/src/nsSBCSGroupProber.h
index 1c473f2..57102a0 100644
--- a/src/nsSBCSGroupProber.h
+++ b/src/nsSBCSGroupProber.h
@@ -40,7 +40,7 @@
#define nsSBCSGroupProber_h__
-#define NUM_OF_SBCS_PROBERS 107
+#define NUM_OF_SBCS_PROBERS 109
class nsCharSetProber;
class nsSBCSGroupProber: public nsCharSetProber {
diff --git a/src/nsSBCharSetProber.h b/src/nsSBCharSetProber.h
index 2fad476..bccb9e1 100644
--- a/src/nsSBCharSetProber.h
+++ b/src/nsSBCharSetProber.h
@@ -151,6 +151,7 @@ extern const SequenceModel Iso_8859_2HungarianModel;
extern const SequenceModel Windows_1250HungarianModel;
extern const SequenceModel Windows_1255HebrewModel;
+extern const SequenceModel Ibm862HebrewModel;
extern const SequenceModel Tis_620ThaiModel;
extern const SequenceModel Iso_8859_11ThaiModel;