diff options
author | Jehan <jehan@girinstud.io> | 2022-12-14 20:16:44 +0100 |
---|---|---|
committer | Jehan <jehan@girinstud.io> | 2022-12-14 20:16:44 +0100 |
commit | 6bb1b3e101e1655a5ff22cd816def6a2e28de749 (patch) | |
tree | 2d18eb869906421912cb0db3569c04a3f4571b95 | |
parent | e311b64cd9d7255365cf35f07f4d4ac768c500cc (diff) |
scripts: all language models rebuilt with the new ratio data.
63 files changed, 11714 insertions, 8583 deletions
diff --git a/script/BuildLangModelLogs/LangArabicModel.log b/script/BuildLangModelLogs/LangArabicModel.log index b7e318f..0373ca2 100644 --- a/script/BuildLangModelLogs/LangArabicModel.log +++ b/script/BuildLangModelLogs/LangArabicModel.log @@ -1,192 +1,269 @@ = Logs of language model for Arabic (ar) = - Generated by BuildLangModel.py -- Started: 2021-03-16 11:33:00.432776 +- Started: 2022-12-14 17:50:01.519149 - Maximum depth: 4 -- Max number of pages: 100 +- Max number of pages: 200 == Parsed pages == -الصفحة_الرئيسية (revision 52017134) -1442 هـ (revision 53072582) -1521 (revision 51053075) -15 مارس (revision 53063546) -16 مارس (revision 53079323) -1775 (revision 50066071) -17 مارس (revision 52787393) -1977 (revision 52535026) -1988 (revision 52921343) -1989 (revision 52945821) -2021 (revision 53072089) -3 شعبان (revision 53076204) -آذار (revision 50305961) -آفة (كائن حي) (revision 50728417) -أبو الريحان البيروني (revision 52901629) -أبو موسى الأشعري (revision 52327088) -أتراك (revision 52923553) -أحلام الجريتلي (revision 53054581) -أستراليا المفتوحة 2021 (revision 52951662) -ألمان (revision 51707635) -أم (revision 52894160) -أمريكا الجنوبية (revision 52623681) -إسبان (revision 53023219) -إسبانيا (revision 52945464) -إسكندنافيا (revision 52901031) -إيران (revision 53077058) -اشتباكات الصحراء الغربية 2020 (revision 52776181) -اغتيال (revision 52605819) -الإسلام (revision 53061751) -الاحتجاجات الروسية 2021 (revision 52959948) -التفسير الموضوعي (تفسير) (revision 53063711) -الجزري (revision 52615628) -الحزب التقدمي الاشتراكي (revision 52719072) -الحضارة الغربية (revision 52663404) -الحملة الصليبية الثانية (revision 53028660) -الشمس (revision 53011313) -العراق (revision 53078113) -الفتح الإسلامي لفارس (revision 52960739) -الفتح الإسلامي للشام (revision 53000955) -الفتح الإسلامي للمغرب (revision 53015306) -الفتح الإسلامي لمصر (revision 52728321) -الفلبين (revision 53043940) -القرآن الكريم (revision 53047128) -القمر (revision 52920452) -القواعد الصاروخية الإيرانية تحت الأرض (revision 50043780) -اللغة العربية (revision 52929542) -المكثرون لرواية الحديث (revision 51989111) -الهجوم الكيماوي على حلبجة (revision 52723565) -انتهازية (revision 52279176) -انقراض العصر الطباشيري-الباليوجيني (revision 52688972) -باتا (revision 46639883) -باتريك أتشي (revision 53024512) -بحث علمي (revision 51195242) -برسفيرنس (مركبة جوالة) (revision 52965815) -برنامج أبولو (revision 52571274) -بعقلين (revision 48961465) -بلاد السند (revision 52279660) -بوتان (revision 52999635) -ترابط زوجي (revision 50219604) -تسمية ثنائية (revision 53076878) -تفجيرات باتا 2021 (revision 53043062) -تقويم هجري (revision 53077283) -توقيت عالمي منسق (revision 52887567) -تونس (revision 53047571) -جائحة فيروس كورونا 2019–20 (revision 52954613) -جائزة الملك فيصل العالمية (revision 52691751) -جائزة الملك فيصل العالمية في الدراسات الإسلامية (revision 53078767) -جواثم (revision 49526826) -جيرارد كايبر (revision 52265632) -جيفة (revision 52535861) -حامد باكايوكو (revision 53079223) -حديث نبوي (revision 53075245) -حرس الثورة الإسلامية (revision 53019030) -حرملة بن كاهل (revision 52891720) -حرية (revision 52761732) -حصار البصرة (1775) (revision 52323981) -خوسيه دي سان مارتين (revision 49958538) -خير الدين حسيب (revision 53076407) -دير دوريت (revision 32094684) -ذكاء (revision 52187723) -روما القديمة (revision 51009123) -ساحل العاج (revision 52429752) -ساعة الفيل (revision 52709413) -سالم بن عبد الله بن عمر بن الخطاب (revision 51752881) -سرب (revision 52828105) -سرعة الصوت (revision 52975385) -سرعة الضوء (revision 52244159) -سوسن ربيع (revision 53077510) -سيبيريا (revision 52919273) -سيمون بوليفار (revision 53011030) -شريعة إسلامية (revision 53070018) -شفق (revision 48963569) -صباح عبد الجليل (revision 52995683) -صحابة (revision 52201334) -صلاة الجمعة (revision 52577966) -صلاة العيد (revision 49726428) -طائر التعريشة الساتاني (revision 44913097) -طائر القيثارة (revision 52673886) -طابا (revision 53078953) -عبد الله الرضيع (revision 50505781) -عبد الله بن عباس (revision 52388329) +الصفحة_الرئيسية (revision 53908210) +ياسويشيرو ياماموتو (revision 53768811) +برلمان معلق (revision 59712805) +سعيد شيبان (revision 60044722) +الغزو الروسي لأوكرانيا 2022 (revision 59739336) +فلسطين (revision 60126849) +قناة يوميأوري (revision 58985377) +بلجيكا (revision 60055993) +دومينيكا (revision 60102624) +إدوارد الثامن ملك المملكة المتحدة (revision 59684159) +المسجد الأقصى (revision 60010716) +كارول الثاني (revision 60092680) +ديردفيل (مارفل) (revision 59309094) +حزب العمال (المملكة المتحدة) (revision 59455493) +أثينا (revision 60058774) +اللغة اليابانية (revision 59643213) +جورج أمير ويلز (revision 59463685) +مغارة الزوتية (revision 53848617) +المحقق كونان: المستهدف هو كوغورو!! (revision 53565135) +فرنسيسكانية (revision 53261183) +سرطان الحنجرة (revision 58991097) +رخصة القيادة في السويد (revision 57454687) +جبهة التحرير الوطني الجزائرية (revision 59793517) +قائمة مساطب المسجد الأقصى (revision 58574376) +تويتر (revision 60057712) +جمهورية أيرلندا (revision 59622911) +كورا (موقع إلكتروني) (revision 60121418) +31 أغسطس (revision 60018430) +جاك كيربي (revision 59305606) +غزوة حمراء الأسد (revision 59377317) +ستيفن هاربر (revision 58457881) +محراب (revision 59936640) +ضبط استنادي (revision 60153585) +جمعية العلماء المسلمين الجزائريين (revision 60138725) +المركز الوطني للأرصاد الجوية (revision 38328493) +13 فبراير (revision 60018277) +شين فين (revision 59033932) +مسجد المنصورة (revision 59851027) +جوليا غيلارد (revision 59864428) +نيلسون مانديلا (revision 60012201) +المحقق كونان: كونان وهيجي والطفل المفقود (revision 56129506) +كايا كالاس (revision 59812613) +شركة (revision 59840479) +أنيسة بنت الحارث (revision 56317507) +سوق العطارين (القدس) (revision 58366700) +رئيس وزراء المملكة المتحدة (revision 59945336) +منطقة حظر الطيران المقترحة خلال الغزو الروسي لأوكرانيا 2022 (revision 59702772) +عمارة (revision 59773216) +محامي (revision 59148036) +إيمانويل أمير بلجيكا (revision 42787317) +مريم بنت عمران (revision 60068097) +1958 (revision 60162727) +2007 (revision 60100863) +وليام هنري دوق غلوستر وأدنبرة (revision 59099200) +الفتح الإسلامي للأندلس (revision 60141779) +فاليري زالوجني (revision 58051942) +مولاي بلحميسي (revision 59275670) +اللغة الإنجليزية (revision 60106396) +كلاوس يوهانيس (revision 58796826) +هيروشي أغاسا (revision 58619633) +باب السلسلة (القدس) (revision 57899330) +قائمة حلقات دراما المحقق كونان (revision 53625091) +غانا (revision 59971577) +المحقق كونان (الموسم 21) (revision 56671783) +دير الكازانوفا (القدس) (revision 58136604) +الحركة الاشتراكية اليونانية (revision 49770738) +ساحل العاج (revision 60058640) +الدورة الاستثنائية الطارئة للجمعية العامة للأمم المتحدة (revision 60155519) +زيمبابوي (revision 60058638) +إنجليزية فلبينية (revision 57208588) +تونس (revision 60058567) +بيل إيفرت (revision 52801229) +تشيرنوبل (revision 59264887) +الحزب الوطني الأسترالي (revision 56681583) +أحمد عروة (revision 58410096) +كانساي (revision 53733839) +غينيا الجديدة (revision 58964634) +هارولد ويلسون (revision 59748438) +جون ميجر (revision 60042000) +واي باك مشين (revision 60022392) +بيزو كولومبي (revision 58027565) +الأمير كيريل من بلغاريا (revision 59227599) +إذاعة (revision 59769741) +المحقق كونان: البحث عن الجوهرة المفقودة!! (revision 54434659) +ستان لي (revision 59610508) +المنتقمون (revision 53566626) +أب (revision 59965717) +الديمقراطيون الليبراليون (المملكة المتحدة) (revision 60026877) +إدوارد هيث (revision 59172917) +7 يونيو (revision 60018465) +خوان إنريك فايفس سيسيليا (revision 59401968) +مطبخ الشرق الأوسط (revision 56657579) +منبر النبي محمد (revision 59431122) +وزارة التعليم العالي والبحث العلمي (الجزائر) (revision 59419639) +نينجا (revision 59605538) +مارفل كومكس (revision 60161839) +بطالمة (revision 60151733) +حكومة ائتلافية (revision 57251185) +بكالوريا (revision 57871364) +سونار (revision 53538810) +نيوزويك (revision 58347101) +جزر كوك (revision 60102613) +خلفية تاريخية عن الحرب الروسية الأوكرانية (revision 59375552) +إدوارد وستمنستر (أمير ويلز) (revision 51362743) +ويكيميديا كومنز (revision 59582200) +فلاندرز الغربية (revision 50429464) +يوكو أوكينو (revision 59853644) +اتحاد مالايا (revision 60084499) +ممثلية سويسرا في رام الله (revision 59653719) +بيعة الرضوان (revision 60068095) +إدوارد الخامس (revision 59636969) +جمهورية دونيتسك الشعبية (revision 59440442) +بلماح (revision 59554493) +المدافعون (قصص مصورة) (revision 58913200) +محطة تلفزيون (revision 60153468) +الرصد بالصدى عند الحيوانات (revision 50523152) +فؤاد السنيورة (revision 55640253) +أرمينيا (revision 59996421) +وزارة التربية الوطنية (الجزائر) (revision 59101448) +وزارة الشؤون الدينية (الجزائر) (revision 58251049) +شوكيتشي هانيدا (revision 59863214) +الحرب الباردة الثانية (revision 60135318) +25 يوليو (revision 60018383) +حدائق البهائيين (revision 59938837) +المحقق كونان (الموسم 17) (revision 60112839) +ليوبولد الثالث ملك بلجيكا (revision 56358591) +إميل كونستانتينسكو (revision 49789037) +المحقق كونان: إستراتيجية ما فوق الأعماق (revision 58049921) +حاييم وايزمان (revision 59916348) +هنري الثامن ملك إنجلترا (revision 59997067) +كهف كيلو (revision 53877153) +بلموت (revision 59078925) +الحرب الفرنسية والهندية (revision 59703093) +عالم مارفل (revision 53752274) +كهوف نهر كلاسيس (revision 60130561) +عمل تجاري (revision 59990514) +كريات موصقين (revision 55098781) +كولن ماثيو (revision 59770283) +15 أكتوبر (revision 60098594) +8 يونيو (revision 60018475) +كهف برونيكيل (revision 53884836) +ردود الفعل الدولية لضم الاتحاد الروسي شبه جزيرة القرم (revision 58066215) +زبابات (revision 50485164) +رجل دولة (revision 57909788) +أرشيبالد بريمروز (revision 58653820) +أدريان لامو (revision 59416691) +ماي سبيس (revision 60059133) +حرب النجوم (revision 60147918) +14 نوفمبر (revision 59982309) +الولايات المتحدة (revision 60011066) +عكا (revision 60046919) +جرجخيلة (revision 59768522) +إدارة المرض (revision 58362794) +رئيس وزراء إيطاليا (revision 60039114) +محكمة النقض (revision 59828395) +حكومة بومدين الثالثة (revision 57940832) +جامع كتشاوة (revision 59782057) +وكالة المخابرات المركزية (revision 59725458) +دير القديسة مريم (القدس) (revision 50310217) +يوري راتاس (revision 58701767) +مهنة (revision 58041911) +تايلاند (revision 60057966) +لوط (revision 59919480) +المحقق كونان (الموسم 11) (revision 56671725) +أليك دوغلاس هيوم (revision 59561312) +الأمير ليوبولد دوق ألباني (revision 52022607) +زينب بنت علي (revision 60141694) +ميريك توبولانيك (revision 50101074) +تعليم إعدادي (revision 60033035) +أشرف الوسائل إلى فهم الشمائل (revision 51950372) +قريش (revision 60139470) +بلاد الغال (revision 60124529) +ألبير الأول من بلجيكا (revision 58261671) +بكالوريوس (revision 59587135) +أكتوبر (revision 60018486) +تقويم ميلادي (revision 60025473) == End of Parsed pages == -- Wikipedia parsing ended at: 2021-03-16 11:42:48.951707 +- Wikipedia parsing ended at: 2022-12-14 17:55:19.583192 -101 characters appeared 1520487 times. +100 characters appeared 1285053 times. -First 64 characters: -[ 0] Char ا: 14.550206611434364 % -[ 1] Char ل: 11.41772340046314 % -[ 2] Char ي: 7.748043883308441 % -[ 3] Char م: 6.294036055553254 % -[ 4] Char و: 5.778148711564124 % -[ 5] Char ن: 5.249304992413615 % -[ 6] Char ر: 4.93203822196441 % -[ 7] Char ت: 4.071261378755622 % -[ 8] Char ب: 3.8685631643019636 % -[ 9] Char ة: 3.2951942371095577 % -[10] Char ع: 3.235344991440243 % -[11] Char د: 2.921103567475421 % -[12] Char س: 2.615806646160079 % -[13] Char ف: 2.609032500771135 % -[14] Char ه: 2.300841769775079 % -[15] Char ق: 2.1174794654607374 % -[16] Char أ: 2.0117238753110023 % -[17] Char ك: 1.952006166445356 % -[18] Char ح: 1.788966298297848 % -[19] Char ج: 1.297939410202126 % -[20] Char ط: 0.9575221623072082 % -[21] Char ص: 0.8946475701535099 % -[22] Char ش: 0.8939898861351658 % -[23] Char إ: 0.8849796150838514 % -[24] Char ى: 0.8706421034839494 % -[25] Char خ: 0.7856693283138889 % -[26] Char ث: 0.6599201440064926 % -[27] Char ز: 0.6011231927665281 % -[28] Char ذ: 0.5680416866438187 % -[29] Char ض: 0.5665290134016273 % -[30] Char غ: 0.5086528197873444 % -[31] Char ئ: 0.3490329085352259 % -[32] Char ء: 0.29898315473923814 % -[33] Char ظ: 0.20197476203348005 % -[34] Char آ: 0.15396382869435912 % -[35] Char ؤ: 0.09148384695166746 % -[36] Char a: 0.05748158320327632 % -[37] Char e: 0.045972112882254175 % -[38] Char i: 0.042946766397871206 % -[39] Char t: 0.042223313977692675 % -[40] Char ـ: 0.03972411470798501 % -[41] Char r: 0.035778010597920275 % -[42] Char s: 0.034988789775907324 % -[43] Char n: 0.031437296076849065 % -[44] Char l: 0.030319233245664053 % -[45] Char o: 0.029661549227319933 % -[46] Char c: 0.0209143517833431 % -[47] Char m: 0.01861245771913867 % -[48] Char d: 0.018086310504463375 % -[49] Char y: 0.015060964020080407 % -[50] Char h: 0.01479789041274276 % -[51] Char p: 0.01479789041274276 % -[52] Char u: 0.014732122010908347 % -[53] Char f: 0.010194102284333902 % -[54] Char C: 0.008221050229301533 % -[55] Char b: 0.007892208220129471 % -[56] Char g: 0.007431829407288587 % -[57] Char v: 0.007234524201785348 % -[58] Char S: 0.007168755799950937 % -[59] Char E: 0.006905682192613288 % -[60] Char I: 0.006445303379772402 % -[61] Char T: 0.006379534977937989 % -[62] Char A: 0.005853387763262692 % -[63] Char B: 0.005458777352256218 % +Most Frequent characters: +[ 0] Char ا: 14.592316425859478 % +[ 1] Char ل: 11.363422364680678 % +[ 2] Char ي: 8.257947337580628 % +[ 3] Char م: 6.40028076662986 % +[ 4] Char و: 5.487322312776204 % +[ 5] Char ن: 5.255892169428032 % +[ 6] Char ر: 4.693892002897935 % +[ 7] Char ت: 4.279123117879185 % +[ 8] Char ب: 3.6930772505102905 % +[ 9] Char ة: 3.349667289987261 % +[10] Char ع: 3.138236321770386 % +[11] Char د: 3.0520920148818766 % +[12] Char س: 2.704946799859617 % +[13] Char ف: 2.4838664241863957 % +[14] Char ك: 2.0565688730348084 % +[15] Char ه: 2.0360249732890394 % +[16] Char ق: 2.0251304809996165 % +[17] Char أ: 1.988556114027982 % +[18] Char ح: 1.7175945272296163 % +[19] Char ج: 1.3692820451763468 % +[20] Char ط: 0.9084450213337505 % +[21] Char ش: 0.8991847028877408 % +[22] Char ى: 0.8644779631657216 % +[23] Char ص: 0.8512489368142793 % +[24] Char إ: 0.7971655643775003 % +[25] Char خ: 0.7792674699020197 % +[26] Char ز: 0.7196590335184618 % +[27] Char ث: 0.6349154470671639 % +[28] Char ض: 0.5499384072096637 % +[29] Char ذ: 0.5261261597770676 % +[30] Char غ: 0.4827038262235099 % +[31] Char ئ: 0.4028627613024521 % +[32] Char ء: 0.29749745730331745 % +[33] Char ظ: 0.20006956911504817 % +[34] Char ؤ: 0.1005406002709616 % +[35] Char آ: 0.09275882006423081 % +[36] Char e: 0.07991888272312503 % +[37] Char a: 0.07851816228591349 % +[38] Char i: 0.06980256845437503 % +[39] Char n: 0.06178733484144234 % +[40] Char o: 0.055639728478125025 % +[41] Char r: 0.05003684672927887 % +[42] Char t: 0.0442783293762981 % +[43] Char l: 0.03984271465846156 % +[44] Char s: 0.03914235443985579 % +[45] Char ـ: 0.032683476868269244 % +[46] Char c: 0.028092226546298088 % +[47] Char u: 0.028014408744230782 % +[48] Char d: 0.021555531172644242 % +[49] Char h: 0.020466081943701933 % +[50] Char m: 0.01852063689201924 % +[51] Char g: 0.014707564590721159 % +[52] Char M: 0.014318475580384621 % +[53] Char C: 0.014162839976250008 % +[54] Char A: 0.013773750965913469 % +[55] Char p: 0.01369593316384616 % +[56] Char S: 0.013618115361778852 % +[57] Char B: 0.013618115361778852 % +[58] Char b: 0.01338466195557693 % +[59] Char y: 0.011205763497692313 % +[60] Char D: 0.010349767674951929 % +[61] Char k: 0.010349767674951929 % +[62] Char I: 0.00996067866461539 % +[63] Char T: 0.009493771852211542 % -The first 64 characters have an accumulated ratio of 0.9992864128400966. +The first 64 characters have an accumulated ratio of 0.9988303984349284. +The first 4 characters have an accumulated ratio of 0.4061396689475064. +All characters whose order is over 27 have an accumulated ratio of 0.034834360917409636. -1820 sequences found. +1932 sequences found. -First 512 (typical positive ratio): 0.9644868613755061 -Next 512 (512-1024): 0.0774804388330844 -Rest: 0.0019191680534433112 +First 902 (typical positive ratio): 0.9950136374489401 +Next 424 (1326-902): 0.003987703013712651 +Rest: 0.0009986595373472351 -- Processing end: 2021-03-16 11:42:49.142159 +- Processing end: 2022-12-14 17:55:20.043871 diff --git a/script/BuildLangModelLogs/LangCroatianModel.log b/script/BuildLangModelLogs/LangCroatianModel.log index 542a251..c9ff3dd 100644 --- a/script/BuildLangModelLogs/LangCroatianModel.log +++ b/script/BuildLangModelLogs/LangCroatianModel.log @@ -1,157 +1,230 @@ = Logs of language model for Croatian (hr) = - Generated by BuildLangModel.py -- Started: 2021-03-16 19:09:36.740256 +- Started: 2022-12-14 17:50:26.432625 - Maximum depth: 4 -- Max number of pages: 100 +- Max number of pages: 200 == Parsed pages == -Fizika čvrstog stanja (revision 5777686) -Agregatno stanje (revision 5764830) -Alnico (revision 3915185) -Aluminij (revision 5755266) -Amorfna tvar (revision 5392804) -Antimon (revision 5435171) -Antoine Henri Becquerel (revision 5556977) +Fizika čvrstog stanja (revision 6149421) +Ugljik (revision 6486622) +Pozitron (revision 6132287) +Dinastija Han (revision 6396262) +Feromagnetizam (revision 6361356) +Napon (revision 6411070) +Integrirani krug (revision 6244897) +Atom (revision 6527497) +Fotodioda (revision 6254765) +Električna vodljivost (revision 6258731) +Kondenzirana tvar (revision 6361399) +Ion (revision 6394254) +Magnetsko polje (revision 6227623) +Gibanje (revision 6392238) +Kadmij (revision 6238898) +Papir (revision 6537929) +Albert Einstein (revision 6494979) +Kovina (revision 6358135) +Električna zavojnica (revision 6258730) +Kulon (revision 5439477) +Električni induktivitet (revision 6258721) +Vanadij (revision 5827591) +Curiejeva temperatura (revision 6264723) +Dijamagnetizam (revision 6262267) +Robert Noyce (revision 6427781) +Električni napon (revision 6411070) +AT&T (revision 6336204) +Fizika (revision 6357726) +Donji kvark (revision 6261061) +Bjelančevine (revision 6492854) +Konstanta (revision 6447814) +Poluvodič (revision 6162819) +Felix Bloch (revision 5784793) +Niobij (revision 6215325) +Barij (revision 6326495) +Nanoamper (revision 6524895) +Aktinidi (revision 4540857) +Etika (revision 6468711) +Elektricitet (revision 6258749) +Kalcij (revision 6397219) +Hlađenje (revision 6411090) +Helij (revision 6328120) +Termodinamika (revision 6393710) +Lenzov zakon (revision 6230710) +Jack Kilby (revision 6242606) +Ferit (magnet) (revision 6256038) +Charles-Augustin de Coulomb (revision 6267297) +Masa (revision 6463862) +Magnetski tok (revision 6469693) +Lepton (revision 6525041) +Temperatura (revision 6541246) +Magnetska histereza (revision 6227636) +Gaugino (revision 6253122) +Mangan (revision 6226876) +Strujni krug (revision 6258728) +Diferencijalne jednadžbe (revision 6521533) +Kineski (revision 6290032) Apsolutna nula (revision 5482633) -Arsen (revision 5752189) -Arthur Holly Compton (revision 5313150) -Atom (revision 5730600) -Atomska jezgra (revision 5731544) -Bell Labs (revision 4769518) -Bor (element) (revision 5549612) -Brian Josephson (revision 5446101) -Cink (revision 5556719) -Comptonov učinak (revision 5313303) -Coulombov zakon (revision 5436283) -Dijamant (revision 5775412) -Dimenzija (revision 5379791) -Dinastija Han (revision 5772176) -Dislokacija (revision 5431109) -EV (revision 5430610) -Eksponencijalna funkcija (revision 5523460) -Električna struja (revision 5653050) -Električna vodljivost (revision 5376333) -Električni izolator (revision 5258197) -Električni luk (revision 5437134) -Električni naboj (revision 5774260) -Električni otpor (revision 4904596) -Električni vodič (revision 5334900) -Električno polje (revision 5247154) -Elektrolit (revision 4858367) -Elektromagnetsko zračenje (revision 5760956) -Elektron (revision 5774256) -Elektronika (revision 5556766) -Elektronska konfiguracija (revision 4949752) -Elektronski mikroskop (revision 5439229) -Elektrotehnika (revision 5254565) -Energetika (revision 4908587) -Energija (revision 5767106) -Fermi-Diracova statistika (revision 3934172) -Feromagnetizam (revision 5392729) -Fizika (revision 5777684) -Fizika kondenzirane tvari (revision 5455580) -Fizikalna veličina (revision 5497656) -Fosfor (revision 5556869) -Fotodioda (revision 5235215) -Fotoelektrični učinak (revision 5632628) -Foton (revision 5635311) -Fotonaponski sustavi (revision 5430012) -Francuski jezik (revision 5771033) -Galij (revision 5437600) -Genitiv (revision 5767472) -Germanij (revision 5437677) -Helij (revision 5556716) -Henri (revision 3922500) -Indij (revision 5439698) -Integrirani krug (revision 5500904) -Ion (revision 5750157) -Ioniziranje (revision 5318213) -John Bardeen (revision 5182165) -Kadmij (revision 5440736) -Kelvin (revision 5240179) -Keramika (revision 5655772) -Kinetička energija (revision 5753997) -Klasična mehanika (revision 5656259) -Kompas (revision 5750313) -Kondenzacija (revision 5492249) -Kondenzirana tvar (revision 5455580) -Konstrukcija (revision 4680450) -Kovalentna veza (revision 5751506) -Kristal (revision 5455704) -Kristalna rešetka (revision 5562348) -Kristalografija (revision 4105956) -Krutine (revision 5196995) -Kubični kristalni sustav (revision 5610803) -Kubični metar (revision 5082862) -Kvantna mehanika (revision 5777687) -Latinski jezik (revision 5663325) -Luminiscencija (revision 5052601) -Magnet (revision 5743549) -Magnetizam (revision 5728489) -Magnetska permeabilnost (revision 4675996) -Magnetska vodljivost (revision 4899860) -Magnetski moment (revision 5489691) -Magnetsko polje (revision 5671905) -Materijal (revision 5748275) -Mehanika (revision 5777691) -Metal (revision 5505185) -Metan (revision 5611051) -Metar (revision 5325605) -Mjerna veličina (revision 5497656) -Molekula (revision 5773190) -Molekule (revision 5773190) -Napon (revision 5556720) -Niskotemperaturna fizika (revision 4657522) -Njemački jezik (revision 5710175) -Optika (revision 5316843) +Organizam (biologija) (revision 6212218) +Kalkulator (revision 6238647) +Strani kvark (revision 6464690) +Dioda (revision 6359047) +Valna duljina (revision 6188667) +Terakota (revision 6446711) +Brzina (revision 6463857) +Ampèreov zakon za petlju (revision 6278215) +CMOS (revision 5477968) +VIA Technologies (revision 6117720) +Matematika (revision 6544669) +Tranzistor (revision 6184941) +Agregatna stanja (revision 6544085) +Prostor (revision 5395638) +Plutonij (revision 6504330) +Mikroamper (revision 6524895) +Elektromagnetsko zračenje (revision 6411063) +Tenis (revision 6518627) +Permeabilnost vakuuma (revision 6160213) +1963. (revision 6529530) +1940. (revision 6282936) +Električna struja (revision 6511049) +Živa (revision 6201024) +Sustav organa (revision 6180122) +Xiao He (revision 6441556) +Mjerna jedinica (revision 6221493) +Heinrich Hertz (revision 6424805) +Fizikalna veličina (revision 6221489) +Računalna memorija (revision 5744674) +Polumetali (revision 5752619) +Elektrostatika (revision 6258646) +Eric Allin Cornell (revision 6257930) +Gadolinij (revision 6253597) +Fermioni (revision 6256031) +Selenij (revision 6173522) +Indukcijski motor (revision 6364014) +Bolest (revision 6537459) +Simens (revision 6433292) +Silicij (revision 6407176) +Elektromagnetsko polje (revision 3924407) +Ceh (revision 5781422) +Carl Wieman (revision 6268101) +Raderfordij (revision 6166927) +Materijal (revision 6318671) +Fosfor (revision 6254775) +Patent (revision 6159416) +Luks (revision 6228377) +Intel 4004 (revision 6463989) +Paulijev princip (revision 6411162) +Zlato (revision 6463174) +Osciloskop (revision 6536779) +Energija (revision 6258189) +Delta barion (revision 6456713) +Žene (revision 6536365) +Fotootpornik (revision 6154304) +Uranij (revision 6374654) +Mikročip (revision 6244897) +Torij (revision 6184635) +Amerika (revision 6505695) +Brzina svjetlosti (revision 6513071) +Radioaktivnost (revision 6205644) +Elektromagnetski valovi (revision 6411063) +Magnetska susceptibilnost (revision 6502820) +Elektronika (revision 6544174) +Ana Konjuh (revision 6540157) +Titranje (revision 6446466) +3. kolovoza (revision 6530528) +Dražen Ladić (revision 6545182) +Aage Niels Bohr (revision 6281538) +Stanična membrana (revision 6177775) +Definicija (revision 6262967) +Mokraćni sustav (revision 6221072) +Elementarni naboj (revision 6258636) +Rudolf Ludwig Mössbauer (revision 6171168) +Indij (revision 6245163) +Fizičar (revision 6255555) +Stari vijek (revision 6150177) +Parni stroj (revision 6159080) +Germanij (revision 6252597) +Stara Grčka (revision 6542269) +Vlačno ispitivanje (revision 6191965) +Dekonstrukcija (revision 6392434) +Ajnštajnij (revision 6280140) +Sila (revision 6392237) +Količina elektriciteta (revision 6258714) +Morfologija (biologija) (revision 5874522) +Zakon očuvanja energije (revision 6357817) +Michael Faraday (revision 6223188) +Klor (revision 6235817) +Usluga (revision 6339178) +Željezo (revision 6392353) +Kositar (revision 6234387) +Olimpijske igre (revision 6466965) +Šećer (revision 6343298) +Kristalizacija (revision 6233584) +Oblik (revision 6321736) +Latinski jezik (revision 6537991) +Švedska (revision 6501885) +Profesor (revision 6165140) +Protaktinij (revision 6165375) +Elektronska konfiguracija (revision 6258661) +Električni naboj (revision 6258714) +Pravilo desne ruke (revision 6360671) +Toplina (revision 6184471) +Chipset (revision 5087057) +Okretno magnetsko polje (revision 5286306) +Vodik (revision 6458324) +Baterija (revision 6273256) +Višestanični organizmi (revision 6191476) +Moment sile (revision 6463811) +Stari Rim (revision 6503914) +Kuk (revision 6232853) == End of Parsed pages == -- Wikipedia parsing ended at: 2021-03-16 19:18:55.485669 +- Wikipedia parsing ended at: 2022-12-14 18:02:00.880876 -49 characters appeared 643453 times. +50 characters appeared 898290 times. -First 31 characters: -[ 0] Char a: 10.677081309746011 % -[ 1] Char i: 9.900023777960474 % -[ 2] Char e: 9.741037806957152 % -[ 3] Char o: 8.583843730622128 % -[ 4] Char n: 6.852404138297591 % -[ 5] Char t: 5.517885533209108 % -[ 6] Char r: 5.292383437484944 % -[ 7] Char j: 5.03952891664193 % -[ 8] Char s: 4.730104607484929 % -[ 9] Char k: 4.032773178460587 % -[10] Char l: 3.9395262746463224 % -[11] Char m: 3.8557594727198414 % -[12] Char u: 3.7656207990327184 % -[13] Char v: 3.0636270248176634 % -[14] Char p: 2.654583940085756 % -[15] Char d: 2.6340696212466175 % -[16] Char z: 1.8657151338170777 % -[17] Char g: 1.5614194043698606 % -[18] Char č: 1.1537750231951673 % -[19] Char b: 1.1304632972416013 % -[20] Char c: 1.081042438220041 % -[21] Char h: 0.7697531909867543 % -[22] Char f: 0.4845730768214617 % -[23] Char š: 0.4174353060751912 % -[24] Char ž: 0.365217039939203 % -[25] Char ć: 0.35123000436706336 % -[26] Char đ: 0.22596833024323454 % -[27] Char y: 0.14857340007739495 % -[28] Char w: 0.06558365568269944 % -[29] Char x: 0.04988709354063157 % -[30] Char q: 0.030149832233278887 % +Most Frequent characters: +[ 0] Char a: 10.740852063364837 % +[ 1] Char i: 10.043304500773692 % +[ 2] Char e: 9.422124258313017 % +[ 3] Char o: 8.747509156285831 % +[ 4] Char n: 6.774092998920171 % +[ 5] Char r: 5.484086430885348 % +[ 6] Char t: 5.289049193467588 % +[ 7] Char j: 5.0933440203052465 % +[ 8] Char s: 4.709169644546861 % +[ 9] Char u: 3.9080920415456037 % +[10] Char l: 3.856327021340547 % +[11] Char k: 3.8457513720513417 % +[12] Char m: 3.492190717919603 % +[13] Char v: 3.127497801378174 % +[14] Char d: 2.828707878302107 % +[15] Char p: 2.820470004118937 % +[16] Char z: 1.9050640661701677 % +[17] Char g: 1.606162820470004 % +[18] Char b: 1.2042881474802125 % +[19] Char č: 1.092965523383317 % +[20] Char c: 1.0381947923276447 % +[21] Char h: 0.7429671932226787 % +[22] Char š: 0.46811163432744435 % +[23] Char f: 0.44996604659965045 % +[24] Char ž: 0.42870342539714346 % +[25] Char ć: 0.38027808391499407 % +[26] Char đ: 0.20772801656480647 % +[27] Char y: 0.1414910552271538 % +[28] Char w: 0.05911231339545135 % +[29] Char x: 0.038072337441138165 % +[30] Char q: 0.030613721626646183 % -The first 31 characters have an accumulated ratio of 0.9998103979622444. +The first 31 characters have an accumulated ratio of 0.9997628828106736. +The first 4 characters have an accumulated ratio of 0.3895378997873738. +All characters whose order is over 19 have an accumulated ratio of 0.039852386200447516. -725 sequences found. +809 sequences found. -First 512 (typical positive ratio): 0.9990568119867879 -Next 512 (512-1024): 0.00365217039939203 -Rest: -4.0440741033709315e-17 +First 410 (typical positive ratio): 0.9950584932401488 +Next 135 (545-410): 0.0039469726326971655 +Rest: 0.000994534127154001 -- Processing end: 2021-03-16 19:18:56.030353 +- Processing end: 2022-12-14 18:02:01.282903 diff --git a/script/BuildLangModelLogs/LangCzechModel.log b/script/BuildLangModelLogs/LangCzechModel.log index 7d7cbd3..b2083b5 100644 --- a/script/BuildLangModelLogs/LangCzechModel.log +++ b/script/BuildLangModelLogs/LangCzechModel.log @@ -1,158 +1,244 @@ = Logs of language model for Czech (cs) = - Generated by BuildLangModel.py -- Started: 2021-03-16 18:42:56.950279 +- Started: 2022-12-14 17:50:48.302160 - Maximum depth: 4 -- Max number of pages: 100 +- Max number of pages: 200 == Parsed pages == -Sociální fobie (revision 19562865) -Adaptace (revision 18611473) -Agorafobie (revision 19426793) -Alkoholismus (revision 19586776) -Alprazolam (revision 19373957) -Americká psychiatrická společnost (revision 18200634) -Antidepresivum (revision 19057482) -Asertivita (revision 19469246) -Atenolol (revision 12051880) -Benzodiazepiny (revision 19464603) -Beta-blokátor (revision 19342461) -Blud (revision 18085659) -Bohatství (revision 16529725) -Bupropion (revision 12028550) -Citalopram (revision 17641873) -Clonazepam (revision 19414205) -Crohnova nemoc (revision 19441068) -DSM-IV (revision 18200634) -Deprese (psychologie) (revision 19554049) -Diagnostický a statistický manuál mentálních poruch (revision 18200634) -Diagnóza (medicína) (revision 18672900) -Dichotomické myšlení (revision 19472610) -Digital object identifier (revision 19452419) -Dopamin (revision 19339677) -Dystymie (revision 17683683) -Důkaz kruhem (revision 16799597) -Elektivní mutismus (revision 19334050) -Emoce (revision 19268819) -Escitalopram (revision 19342010) -Fluoxetin (revision 19342014) -Fluvoxamin (revision 19342014) -Gen (revision 18766924) -Generalizovaná úzkostná porucha (revision 19465410) -Halucinace (revision 19181320) -Hněv (revision 19602111) -Inteligence (revision 19472417) -International Standard Book Number (revision 19411852) -International Standard Serial Number (revision 17477154) -Interpersonální psychoterapie (revision 17446502) -Introverze (revision 19273893) -Iracionalita (revision 16731536) -Jana Vyskočilová (revision 19609212) -Ján Praško (revision 18740907) -Ján Praško Pavlov (revision 18740907) -Kognitivní omyl (revision 19618239) -Kognitivní psychologie (revision 16289048) -Kognitivní restrukturalizace (revision 19284546) -Kognitivně behaviorální terapie (revision 19475205) -Komorbidita (revision 17525950) -Lymská borelióza (revision 19051205) -Medical Subject Headings (revision 18009832) -Meditace (revision 18651670) -Mezinárodní klasifikace nemocí (revision 19575331) -Michael Liebowitz (revision 17336961) -Moclobemid (revision 19562865) -Moritova terapie (revision 16391634) -Musturbace (revision 19562865) -NDRI (revision 19412768) -Nervozita (revision 18799061) -Noradrenalin (revision 19376674) -Obsedantně kompulzivní porucha (revision 19461977) -Panická ataka (revision 18158083) -Panická porucha (revision 18158083) -Paranoia (revision 19271797) -Paroxetin (revision 19342014) -Pohlavnost (revision 19553039) -Pravděpodobnost (revision 19370061) -Predestinace (revision 15390515) -Profese (revision 19148432) -Propanolol (revision 19342521) -Psychiatr (revision 18661359) -Psychické trauma (revision 17566056) -Psychoaktivní droga (revision 19150920) -Psychodynamická léčba (revision 19562865) -Psychofarmaka (revision 19341820) -Psycholog (revision 18812730) -Psychoterapie (revision 18403501) +Sociální fobie (revision 22020472) +Agorafobie (revision 21926310) +DSM (revision 20942105) +Elektivní mutismus (revision 21924233) +Dopamin (revision 21859773) +Stres (revision 21742296) +Ján Praško (revision 22127468) +Escitalopram (revision 20547840) +Nervová soustava (revision 21341170) +Psychiatrie (revision 20502746) +Diagnostický a statistický manuál duševních poruch (revision 20942105) +Trávení (revision 21915709) +Emoce (revision 21879452) +22. prosinec (revision 21760502) +Hustota (revision 21244853) +Národní knihovna České republiky (revision 22177708) +Molární hmotnost (revision 19281991) +Mezinárodní nechráněný název (revision 21323694) +SSRI (revision 19342041) +Aminokyselina (revision 20868507) +Indikace (lékařství) (revision 20833751) +Virtual International Authority File (revision 21184542) +Slovenská národní knihovna (revision 20271931) +Pocení (revision 20339134) +Adrenalin (revision 21886991) +SNRI (revision 21517969) +Oxid uhelnatý (revision 21253816) +Psychologie (kniha) (revision 21019981) +Citalopram (revision 21240956) +Kortizon (revision 19998861) +Doktor medicíny (revision 22022290) +Druhá světová válka (revision 22212726) +Sebevědomí (sociální psychologie) (revision 21574634) +1956 (revision 22217131) +Hypothalamus (revision 21171603) +Farmakoterapie (revision 21795495) +Glutathion disulfid (revision 20827558) +PubChem (revision 20361169) +Ústa (revision 21522196) +Psychologie (revision 21840396) +Oktopamin (revision 20220450) +Mezinárodní standardní identifikátor jména (revision 20820441) PubMed (revision 17045891) -RIMA (revision 13950874) -Remise (revision 19427721) -Richard Heimberg (revision 19562865) -Schizofrenie (revision 19507435) -Sebevražda (revision 19464374) -Selektivní abstrakce (revision 17523049) -Selektivní inhibitor zpětného vychytávání serotoninu (revision 19342041) -Serotonin (revision 19186450) -Sertralin (revision 19342014) -Skupinová psychoterapie (revision 15430379) -Skupinová terapie (revision 15430379) -Sociální chování (revision 18867179) +The Lancet (revision 19241411) +Glycin (revision 21789768) +Glukóza (revision 21664603) +Afektivní poruchy (revision 21599448) +Terapie (revision 21310530) +Diagnóza (medicína) (revision 20229540) +2-arachidonoylglycerol (revision 20792362) +Organismus (revision 21881526) +Homeostáza (revision 20811648) +Hormon (revision 21350645) +Foniatrie (revision 17398936) +Puls (tep) (revision 20667506) +Úzkostná porucha (revision 21100459) +Library of Congress Control Number (revision 19355161) +Fototerapie (revision 21243206) +Neverbální komunikace (revision 21879485) +Ottův slovník naučný (revision 21909503) +Alkoholismus (revision 22196482) +Deprese (revision 22209864) +Autorita (knihovnictví) (revision 21919206) +Internet Archive (revision 21709929) +Serin (revision 20365965) +Tuky (revision 21806123) +Glukokortikoidy (revision 20998627) +Kognitivně behaviorální terapie (revision 21301071) +Sociální komunikace (revision 21879503) +Dohlížet a trestat (revision 20304185) +Mars (planeta) (revision 21861507) +Miroslav Štěpánek (historik) (revision 22080013) +Digital object identifier (revision 21882829) +Francouzská národní knihovna (revision 20503017) +Tlusté střevo (revision 21695653) +Polytematický strukturovaný heslář (revision 20359962) +Souborný katalog České republiky (revision 21215720) +Standardní teplota a tlak (revision 20345487) +Olomouc (revision 22219519) +2001 (revision 21769254) +International Standard Serial Number (revision 21989664) +Cenzura (revision 22170065) +Srdce (revision 21752723) +Přírodní vědy (revision 20697083) +Steroidy (revision 20599619) +Databáze (revision 21914035) +Farmakodynamika (revision 21795495) +Duševní porucha (revision 21595942) +Vitiligo (revision 22217229) +Synapse (revision 20882185) +Jan Otto (revision 22197603) +Praha (revision 22204542) +Polymerizace (revision 20878217) +Washington, D.C. (revision 22030685) +Česko (revision 22170403) +Rakousko-Uhersko (revision 22071686) +Dunningův–Krugerův efekt (revision 21506228) +Klášter Zlatá Koruna (revision 21558244) +Národní knihovna Izraele (revision 20491004) +Tachykardie (revision 20455710) +Bulimie (revision 22026405) +Svobodný a otevřený software (revision 22217209) +Gesto (revision 21276755) +Apatie (revision 22029703) +Václav Havel (revision 22193432) +1901 (revision 21865040) +Antipsychotikum (revision 20223009) +Obratlovci (revision 22222019) +Stavová rovnice (revision 20396700) +Relativní atomová hmotnost (revision 21403202) +Atom (revision 22205446) +Mozek (revision 22201687) +Hynek Bulín mladší (revision 22055619) +Hmotnost (revision 21195721) +Dobré jméno (revision 20229915) +Tyramin (revision 21871510) +Evropský fond pro regionální rozvoj (revision 21036326) +Národní parlamentní knihovna Japonska (revision 21018639) +International Standard Book Number (revision 21443136) +Mezinárodní organizace pro normalizaci (revision 21272544) +Paměť (revision 22110149) +Antidepresivum (revision 21517969) +Vágnost (revision 22029954) +Karen Horneyová (revision 22010958) +Glykogen (revision 21277115) +1994 (revision 21877310) +Molární objem (revision 20492685) +Voda (revision 21909723) +DNA (revision 21778968) +Termoterapie (revision 20180487) +Populace (revision 21355483) +Lege artis (lékařství) (revision 21317439) +Pórovitost (revision 21767560) +Měření krevního tlaku (revision 20380281) +Kontraindikace (revision 20849480) +Ministerstvo kultury České republiky (revision 21650558) +James Lovelock (revision 22094240) +Právní forma (revision 20943476) +27. červenec (revision 22198826) +Seznam národních knihoven (revision 20048304) +WorldCat (revision 21510754) +Puberta (revision 21909913) +Knihovnictví (revision 21783979) +Nobelova cena za fyziologii a lékařství (revision 22203422) +Kulturní průmysl (revision 20478874) +Elektrický náboj (revision 22028562) +Spojené státy americké (revision 22179989) +Masožravé rostliny (revision 20560168) +Psychoterapie (revision 21818892) +Nula (revision 20188675) +Lecitin (revision 21332399) +Zrak (revision 21329169) +Dolní končetina (revision 20862515) +Doktor teologie (revision 21466542) +Jazyk (lingvistika) (revision 22109800) +Aerobní organismy (revision 20312976) +Bezvědomí (revision 22201907) +Česká terminologická databáze knihovnictví a informační vědy (revision 22188254) +Smích (revision 21848703) +Těhotenství (revision 21789011) +Neobehaviorismus (revision 21269447) +Webová stránka (revision 21216729) +Latina (revision 21868129) +Sekunda (revision 22206940) +Lipidy ve sportovní výživě (revision 20171966) +Interneuron (revision 20425790) +Mirtazapin (revision 20332657) +Histamin (revision 21538976) +Trimethylglycin (revision 21738055) +Mozkomíšní mok (revision 21632570) +Denis McQuail (revision 22085880) +The New England Journal of Medicine (revision 21227571) +Prodloužená mícha (revision 21104807) +Kanabinoidní receptor 1 (revision 20631149) == End of Parsed pages == -- Wikipedia parsing ended at: 2021-03-16 18:50:25.563305 +- Wikipedia parsing ended at: 2022-12-14 18:07:45.112214 -48 characters appeared 495093 times. +70 characters appeared 1502766 times. -First 41 characters: -[ 0] Char o: 8.197651754316865 % -[ 1] Char e: 8.02455296277669 % -[ 2] Char n: 6.99807914876599 % -[ 3] Char a: 6.436164518585397 % -[ 4] Char i: 5.469477451711093 % -[ 5] Char t: 5.3903004082061345 % -[ 6] Char s: 4.630443169263149 % -[ 7] Char v: 3.9471372045252107 % -[ 8] Char r: 3.7742403952388743 % -[ 9] Char p: 3.6326508352976106 % -[10] Char l: 3.626389385428596 % -[11] Char k: 3.4845978432334936 % -[12] Char í: 3.306247513093499 % -[13] Char d: 3.2319180436806825 % -[14] Char c: 3.084269016124243 % -[15] Char u: 3.0539716780483666 % -[16] Char m: 2.917835638960761 % -[17] Char h: 2.260989349475755 % -[18] Char z: 2.074559729182194 % -[19] Char á: 2.05597736182899 % -[20] Char y: 2.00184611780009 % -[21] Char j: 1.8560149305282037 % -[22] Char b: 1.743510815139782 % -[23] Char ě: 1.2797595603250298 % -[24] Char é: 1.2238104760115776 % -[25] Char č: 0.9543661493901145 % -[26] Char ž: 0.9283104386448606 % -[27] Char ř: 0.8905397571769345 % -[28] Char ý: 0.7972239559032344 % -[29] Char š: 0.6172577677325269 % -[30] Char g: 0.5201043036358826 % -[31] Char f: 0.5120250134823154 % -[32] Char ů: 0.5093992441824061 % -[33] Char ú: 0.18077411718606404 % -[34] Char x: 0.1575461579945586 % -[35] Char w: 0.07291559363594315 % -[36] Char ň: 0.052313403744347016 % -[37] Char ó: 0.050495563459794425 % -[38] Char ť: 0.027469586522128164 % -[39] Char q: 0.010301094945798063 % -[40] Char ď: 0.010099112691958885 % +Most Frequent characters: +[ 0] Char o: 8.127878858052417 % +[ 1] Char e: 7.5461515631841545 % +[ 2] Char a: 6.932283535826603 % +[ 3] Char n: 6.812437864577718 % +[ 4] Char t: 5.253645610826969 % +[ 5] Char i: 5.075307799085153 % +[ 6] Char s: 4.692147679678673 % +[ 7] Char r: 4.318702978374544 % +[ 8] Char v: 4.279375498247898 % +[ 9] Char l: 4.187012482315942 % +[10] Char k: 4.013133115867673 % +[11] Char m: 3.1770082634289034 % +[12] Char u: 3.10440880349968 % +[13] Char p: 3.088970604871284 % +[14] Char d: 3.0580942076144924 % +[15] Char í: 2.921679090423925 % +[16] Char c: 2.9060412599167136 % +[17] Char h: 2.2894449302153492 % +[18] Char á: 2.054012401132312 % +[19] Char z: 2.0474245491314016 % +[20] Char y: 1.9050870195359757 % +[21] Char j: 1.7528344399593814 % +[22] Char b: 1.5339713568180275 % +[23] Char ě: 1.320431790445086 % +[24] Char é: 1.2140279990364435 % +[25] Char ř: 0.976466063246041 % +[26] Char ý: 0.952842957586211 % +[27] Char č: 0.9467209133025367 % +[28] Char ž: 0.7350445777985395 % +[29] Char š: 0.6022228344266506 % +[30] Char g: 0.5773353935343227 % +[31] Char f: 0.5282924953053236 % +[32] Char ů: 0.5040039500494422 % +[33] Char ú: 0.14852611783870542 % +[34] Char x: 0.1266331551286095 % +[35] Char w: 0.11179385213666 % +[36] Char ň: 0.06787483879725786 % +[37] Char ó: 0.042122326430062966 % +[38] Char ť: 0.02821463887258562 % +[39] Char ď: 0.012909528163400024 % +[40] Char q: 0.01031431373879899 % -The first 41 characters have an accumulated ratio of 0.9999353656787715. +The first 41 characters have an accumulated ratio of 0.9998283165842187. +The first 6 characters have an accumulated ratio of 0.39747705231553015. +All characters whose order is over 27 have an accumulated ratio of 0.03495288022220359. -1037 sequences found. +1359 sequences found. -First 512 (typical positive ratio): 0.9751874547460189 -Next 512 (512-1024): 0.009283104386448606 -Rest: 3.158667139656693e-05 +First 747 (typical positive ratio): 0.995024712172107 +Next 199 (946-747): 0.003977666094002408 +Rest: 0.000997621733890619 -- Processing end: 2021-03-16 18:50:26.412061 +- Processing end: 2022-12-14 18:07:45.526609 diff --git a/script/BuildLangModelLogs/LangDanishModel.log b/script/BuildLangModelLogs/LangDanishModel.log index 1396f6e..e9dc7cb 100644 --- a/script/BuildLangModelLogs/LangDanishModel.log +++ b/script/BuildLangModelLogs/LangDanishModel.log @@ -1,240 +1,246 @@ = Logs of language model for Danish (da) = - Generated by BuildLangModel.py -- Started: 2022-11-30 20:49:10.182568 -- Maximum depth: 2 +- Started: 2022-12-14 17:51:19.203816 +- Maximum depth: 4 - Max number of pages: 200 == Parsed pages == Forside (revision 10000691) -15. januar (revision 10515606) -IC4 (revision 11317878) -VM i fodbold 2022 (mænd) (revision 11344039) -28. november (revision 9410945) -Forenede Nationer (revision 11199108) -Middelaldercentret (revision 11339897) -Vilhelm Erobreren (revision 11279565) -Casper & Mandrilaftalen (revision 11221713) -Nikolaj Lie Kaas (revision 11322663) -Stig Hoffmeyer (revision 11340274) -Rock and Roll Hall of Fame (revision 8408189) -Anwar Ibrahim (revision 11342876) -Afrikamesterskabet i håndbold 2022 (kvinder) (revision 11341917) -1940 (revision 11263756) -1937 (revision 11303923) -1934 (revision 11224625) -Danmarksdemokraterne (revision 11335570) -The Julekalender (revision 11341242) -Ruslands invasion af Ukraine 2022 (revision 11335164) -25. november (revision 10378454) -The Jimi Hendrix Experience (revision 10497780) -24. november (revision 6877891) -Vikingetidens rustning og våben (revision 11332607) -Torben Rechendorff (revision 11342962) -Thomas Edison (revision 11052704) -1947 (revision 11252357) -Eurovision Song Contest 2014 (revision 11333950) -29. november (revision 6877900) -Ukraine (revision 11334630) -1990 (revision 11340072) -Maurice Norman (revision 11342318) -Sergej Sjojgu (revision 11309097) -Færøerne (revision 11333678) -Fonograf (revision 11032483) -Folketingsvalget 2022 (revision 11339557) -Hans Magnus Enzensberger (revision 11341046) -Moderaterne (revision 11305861) -Hawaii (revision 11317011) -Mandan (indfødte amerikanere) (revision 11336303) -SI-præfiks (revision 11332802) -Encyklopædi (revision 11315276) -Storbritannien (revision 11329834) -1991 (revision 11250037) -Det Konservative Folkeparti (revision 11313857) -Wandsworth-skjoldet (revision 11341402) -Angolas håndboldlandshold (damer) (revision 11331888) -Shu-bi-dua (revision 11324736) -1877 (revision 11224901) -Kon-Tiki (revision 10615971) -Socialdemokratiet (revision 11325315) -Donatan (revision 10586146) -Adolf Hitler (revision 11317375) -Procent (revision 10764365) -1. juni (revision 10206137) -1863 (revision 11081613) -ISO 3166-1 alpha-3 (revision 11250626) -Senegals håndboldlandshold (herrer) (revision 8621578) -Billion (revision 11039345) -Lørdag (revision 11159889) -Sachsen (revision 11299889) -Vestindien (revision 11330329) -Folketingsvalget 1988 (revision 10970017) -Dogme 95 (revision 10973606) -Encyclopédie (revision 11314734) -Afrikamesterskabet i håndbold 2018 (mænd) (revision 11131830) -Mew (revision 11308840) -2. marts (revision 9423344) -Rajon (revision 11185598) -TheTVDB (revision 10969052) -Skueproces (revision 11322041) -New York Times (revision 10236433) -2006 (revision 11271490) -Jacinda Ardern (revision 11243495) -8. maj (revision 9423405) -7. juni (revision 10287352) -Ray Charles (revision 10893843) -Dansk Rock - fra pigtråd til punk (revision 10970784) -1950'erne (revision 10917112) -John Wesley Hyatt (revision 9405508) -Landsdel Hovedstaden (revision 10723037) -Zar-Rusland (revision 11328111) -1816 (revision 11198312) -Engelsk litteratur (revision 10817139) +1542 (revision 11188745) +1933 (revision 11307635) +Novozymes (revision 11354746) +Socialdemokratiet (revision 11347440) +Stenhugger (revision 8700817) +Joseph Kittinger (revision 11352089) +9. december (revision 10927383) +Serbien (revision 11346432) +Boeing 747 (revision 11352874) +Brandts (revision 11336216) +H.A. Brendekilde (revision 11353582) +Thomas Vinterberg (revision 11234643) +Dansk Folkeparti (revision 11353484) +Brandts Klædefabrik (revision 11236311) +DR (revision 11348788) 22. november (revision 10203064) -Maj (revision 11288718) -Progressiv rock (revision 11259601) -Maurice Setters (revision 10936371) -Minkkommissionen (revision 11337058) -Ragnhild Hveger (revision 11072132) -1961 (revision 11224941) -Montenegro (revision 11340028) -Socialkonservatisme (revision 8745187) -TV 2 (revision 11339141) -7. februar (revision 9423377) -Ar (enhed) (revision 11309905) -1881 (revision 11144791) -Etiopisk kalender (revision 9931290) -Ethelbert Nevin (revision 10591854) -The Moscow Times (revision 11329355) -1960'erne (revision 11261802) -15. november (revision 6877873) -Politikens Forlag (revision 11322941) -Island (revision 11219029) -Danmark (revision 11313400) -Det Kongelige Teater (revision 11319106) -20. juni (revision 10232768) -VM i fodbold 1958 (revision 11014260) -Næste folketingsvalg (revision 11338101) +Ronja Mannov Olesen (revision 11152936) +1936 (revision 11228066) +Mauretanien (revision 10951803) +Autoritetsdata (revision 11213971) +2. december (revision 6877732) +Robert for bedste kortfilm (revision 11335917) +1901 (revision 11156349) +2016 (revision 11247676) +Årets museum i Europa (revision 11179477) +Burundi (revision 11299715) +Fontæne (revision 8414688) +Jøde (revision 11265837) +Træskærer (revision 11068263) +1. Balkankrig (revision 10998772) +Boeing (revision 11343895) +Tiwanaku (revision 9882035) +Hydraulik (revision 11117680) +ISO 4217 (revision 11062181) +1560'erne (revision 6879801) +1547 (revision 11064387) +Rockwool International (revision 11344422) +2012 (revision 11307603) +14. december (revision 11027401) +Tjekkisk nationalbibliotek (revision 9639333) +Værktøj (revision 11168806) +Det Centrale Virksomhedsregister (revision 10893028) +Spildevand (revision 11179831) +Jyske Bank (revision 11344681) +Folketingsgruppe (revision 11218954) +Carlo Azeglio Ciampi (revision 11263151) +23. april (revision 10377526) +Robert (filmpris) (revision 11150101) +Persepolis (revision 11214098) +Mardy Fish (revision 11010900) +Flyulykken på Tenerife (revision 11315404) +14. århundrede (revision 10458876) +Folketingsvalg 1998 (revision 11229655) +Fredspolitisk Folkeparti (revision 11315698) +Angkor Wat (revision 11309772) +Christian 3. (revision 11313067) +Ambu (revision 11219384) +Vilhelm Lundstrøm (revision 11288776) +1587 (revision 11303772) +Erhvervspartiet (1918-24) (revision 7285252) +Faldskærm (revision 10593406) +1925 (revision 11269498) +Kunstindeks Danmark (revision 9867315) +Armensk kalender (revision 9393672) +Library of Congress Control Number (revision 8316539) +Kunsthal (revision 10915071) +Pyramide (revision 10817767) +Aarhus (revision 11354522) +Cannes Filmfestival (revision 10594651) +Fotografi (revision 11315556) +NASDAQ OMX (revision 9825397) +Folketingsvalg 1910 (revision 10579179) +Kali Yuga (revision 9981125) +Tertiærsektor (revision 11326422) +1921 (revision 11303917) +2019 (revision 11236823) +Airbus (revision 11343899) +Zar (revision 11185808) +Filmfestivalen i Berlin (revision 11172659) +Parthenon (revision 10855814) +15. december (revision 6877865) +Peter Ilsted (revision 10290102) +1768 (revision 10920509) +Filmproducer (revision 9510897) +Aeronaut (revision 10658273) +Maleri (revision 10591205) +Frise (revision 11056171) +Ab urbe condita (revision 11022075) Virtual International Authority File (revision 8702589) -Marmor (revision 11309004) -Oslo (revision 11290885) -1938 (revision 11336099) -Frie Grønne (revision 11294501) -Lottorp (revision 11223312) -1931 (revision 11236350) -1930 (revision 11252037) -Albanien (revision 11309379) -Holger Begtrup (revision 10289352) -1887 (revision 11250123) -Kristen Helveg Petersen (revision 10505239) -Benito Mussolini (revision 11311831) -Tamilrapporten (revision 10672604) -Internationale Valutafond (revision 10871884) -Ron Flowers (revision 10999963) -Scud-missil (revision 11072276) -1860'erne (revision 8151963) -11. november (revision 10903885) -10. november (revision 9286344) -1697 (revision 10865232) -Det Humanistiske Parti (revision 10898925) -1998 (revision 11342743) -Centrum-Demokraterne (revision 11201902) -Præstens Urskov (revision 10261164) -Kraghave (Tingsted Sogn) (revision 11124871) -Burkina Faso (revision 11309150) -Johannes Peter Frederik Königsfeldt (revision 10942128) -John Bardeen (revision 10622362) -Retsforbundet (revision 11333888) -Mykolaiv oblast (revision 11215109) -Folketingsvalget 1932 (revision 10529645) -Atassut (revision 11250468) -1780 (revision 10879041) -Pokalvindernes Europa Cup (revision 10533322) -Harmonium (revision 10648166) -Litra MA (revision 10707516) -14. oktober (revision 9764309) -Letland i Eurovision Song Contest (revision 11273114) -Den røde tråd (sang) (revision 11117198) -Peter A.G. Nielsen (revision 11311663) +5. april (revision 9423320) +1888 (revision 11334476) +Islamisk kalender (revision 11119207) +Tenochtitlán (revision 8126626) +1917 (revision 11188751) +15. århundrede (revision 9930209) +Novo Holdings A/S (revision 11347542) +Olaf Rude (revision 10261097) +Agern (revision 11006174) +Millennium (revision 11283969) +Fyns Kunstmuseum (revision 11336558) +Cambodja (revision 11312835) +Luftballon (revision 11177104) +Klassicisme (revision 11202332) +Cathay Pacific (revision 10948083) +Kähler Keramik (revision 11319553) +8. december (revision 10277754) +Jan Johansen (revision 11338910) +Los Angeles (revision 11320219) +1960 (revision 11280242) +Felix Baumgartner (revision 11311728) +Niels Vørsel (revision 10134831) +Lars Løkke Rasmussen (revision 11350812) +Genetisk modificeret organisme (revision 11291604) +Tresårig cyklus (revision 11318580) +Californien (revision 11283561) +Tampa (Florida) (revision 11192576) +Novo Nordisk A/S (revision 11347432) +Enhedslisten (revision 11339612) +Liberalt Centrum (revision 9533300) +1945 (revision 11336100) +Aalborg Tårnet (revision 10423446) +Demokratiske Republik Congo (revision 11311061) +Olaf Hansen (revision 9519311) +18. februar (revision 6877450) +Folketingsvalg 1950 (revision 11057309) +Gemeinsame Normdatei (revision 11281765) +Genmab (revision 11347556) +Lungekræft (revision 11050800) +Passagerfly (revision 9430888) +Weilbachs Kunstnerleksikon (revision 11254095) +Nobelprisen i fysiologi eller medicin (revision 11263700) +2010 (revision 11200088) +New York City (revision 11321497) +Peter Kropotkin (revision 11117737) +Sten (revision 11251764) +Jetfly (revision 11035140) +1537 (revision 11226704) +International Standard Name Identifier (revision 10880739) +Borobudur (revision 10236366) Internationalt Standardbognummer (revision 11037702) -Denys Sjmyhal (revision 11184932) -Souvenir (revision 10530474) -Kristendemokraterne (revision 11310458) -Edward Gibbon (revision 11316150) -19. november (revision 10910432) -Aarhus Hovedbanegård (revision 11254458) -Grækere (revision 11277065) -Moderaterna (revision 11275745) -Margrethe 2. (revision 11264709) -1978 (revision 11340075) -Demokratiske Republik Congos håndboldlandshold (damer) (revision 11330801) -Philip af Storbritannien (revision 11307679) -21. århundrede (revision 9838559) -Jørgen Christensen (handelsminister) (revision 9548745) -Holger Juul Hansen (revision 11316843) -Fodboldspiller (revision 11234361) -Parliamo italiano (revision 11322505) -Borgerlig (revision 10930991) -Mail (revision 10885336) -Disko (revision 10767773) -Tunesiens fodboldlandshold (revision 11334411) -6. december (revision 10378463) -Erhvervspartiet (1978-79) (revision 8449157) -Sovjetunionen (revision 11333771) -1567 (revision 10818742) -1875 (revision 11198318) -Hubble-teleskopet (revision 11304842) -Hærulfstenen (revision 11317806) -Frankrig (revision 11235194) -Coney Island (revision 11211594) -1952 (revision 11243498) +13. juli (revision 9999472) +Mogens Rukov (revision 10978255) +2000 (revision 11224916) +Frankrig (revision 11348467) +Kongeriget Serbien (revision 11346432) +Juli (revision 11210560) +Fritid (revision 10416768) +Gregorianske kalender (revision 11316358) +Den preussisk-østrigske krig (revision 11232782) +Middelalderen (revision 11303549) +Hardeknud (revision 10634401) +Dassault (revision 9635820) +Lars Lunøe (revision 9614843) +Siem Reap (revision 11292393) +La Francophonie (revision 11152531) +1860'erne (revision 8151963) +Nationale parlamentsbibliotek (revision 10001351) +Aktieselskab (revision 11170853) +Québec (revision 11341120) +Ungarsk (sprog) (revision 10541631) +Ballonskipper (revision 11023019) +Cigaret (revision 11200433) +1910'erne (revision 11187787) +Spaniens nationalbibliotek (revision 10986693) +Rømø (revision 11324180) +Radikale Venstre (revision 11350803) +Machu Picchu (revision 10066989) +Sven Ove Gade (revision 10357249) +1941 (revision 11340092) +Rumfartøj (revision 10730968) +Danskefilm.dk (revision 8887499) +Italiensk Østafrika (revision 11318135) +Kommuner i Burundi (revision 6658414) +Javnaðarflokkurin (revision 11340399) +1927 (revision 11276646) +Israel (revision 11338000) +Henri Dunant (revision 11035680) +1924 (revision 11303038) +GN Store Nord (revision 11343188) +1979 (revision 11307615) +Wiktionary (revision 8998237) +Danmark (revision 11313400) +Den Store Danske Encyklopædi (revision 11301417) +Filmdatabasen (revision 10594647) +Raps (revision 11323354) +Historisches Lexikon der Schweiz (revision 9045932) == End of Parsed pages == -- Wikipedia parsing ended at: 2022-11-30 20:52:37.002648 +- Wikipedia parsing ended at: 2022-12-14 17:54:15.597990 -63 characters appeared 1374958 times. +58 characters appeared 1099639 times. Most Frequent characters: -[ 0] Char e: 14.79056087531401 % -[ 1] Char r: 8.641427592697378 % -[ 2] Char n: 7.613105273033795 % -[ 3] Char t: 6.915483963873806 % -[ 4] Char a: 6.583692010955971 % -[ 5] Char i: 6.462524673480935 % -[ 6] Char s: 6.347902990491345 % -[ 7] Char d: 5.849924143137463 % -[ 8] Char l: 5.1523755634717565 % -[ 9] Char o: 4.9496784629057755 % -[10] Char g: 3.827389636628901 % -[11] Char m: 3.251226582921078 % -[12] Char k: 3.2378443559730554 % -[13] Char f: 2.605170485207548 % -[14] Char v: 2.205303725641074 % -[15] Char u: 1.978242244490377 % -[16] Char b: 1.8278376503136822 % -[17] Char p: 1.5923395478261881 % -[18] Char h: 1.5512473835564433 % -[19] Char ø: 0.88409973250092 % -[20] Char æ: 0.7078761678538544 % -[21] Char å: 0.7005304889312983 % -[22] Char y: 0.6576200873044848 % -[23] Char c: 0.648019794059164 % -[24] Char j: 0.646928851644923 % -[25] Char w: 0.14465896412835882 % -[26] Char z: 0.06814753614292218 % -[27] Char x: 0.03643747663564996 % -[28] Char é: 0.020946094353427522 % -[29] Char ó: 0.013600415430871343 % -[30] Char q: 0.013018579476609468 % +[ 0] Char e: 14.849327824858886 % +[ 1] Char r: 8.88082361575026 % +[ 2] Char n: 7.6897054396943 % +[ 3] Char t: 6.684466447625084 % +[ 4] Char a: 6.646726789428167 % +[ 5] Char i: 6.420288840246663 % +[ 6] Char s: 6.276696261227549 % +[ 7] Char d: 5.90411944283533 % +[ 8] Char l: 5.0308328460522045 % +[ 9] Char o: 4.926707765002878 % +[10] Char g: 3.798155576511928 % +[11] Char k: 3.3646496713921565 % +[12] Char m: 3.2427914979370502 % +[13] Char f: 2.589122430179359 % +[14] Char v: 2.0825016209865237 % +[15] Char u: 1.9628259819813594 % +[16] Char b: 1.8617018858007035 % +[17] Char p: 1.5523276275213957 % +[18] Char h: 1.4792127234483317 % +[19] Char ø: 0.9092984152071725 % +[20] Char y: 0.7653420804464011 % +[21] Char å: 0.6933184435983082 % +[22] Char c: 0.6798594811569979 % +[23] Char æ: 0.6712202822926433 % +[24] Char j: 0.6610351215262463 % +[25] Char w: 0.14322882327745742 % +[26] Char z: 0.08675574438520278 % +[27] Char x: 0.0398312537114453 % +[28] Char é: 0.03810341393857439 % +[29] Char q: 0.017460275599537667 % -The first 31 characters have an accumulated ratio of 0.9992516135038306. +The first 30 characters have an accumulated ratio of 0.999484376236201. +The first 4 characters have an accumulated ratio of 0.38104323327928524. +All characters whose order is over 20 have an accumulated ratio of 0.030308128394864135. -1079 sequences found. +1014 sequences found. -First 508 (typical positive ratio): 0.995012453333286 -Next 198 (706-508): 0.003993410296057376 -Rest: 0.0009941363706565953 +First 503 (typical positive ratio): 0.9950476836368553 +Next 175 (678-503): 0.003954982568821541 +Rest: 0.0009973337943232075 -- Processing end: 2022-11-30 20:52:37.084319 +- Processing end: 2022-12-14 17:54:15.676393 diff --git a/script/BuildLangModelLogs/LangEnglishModel.log b/script/BuildLangModelLogs/LangEnglishModel.log index 4348ae0..c3cc1c3 100644 --- a/script/BuildLangModelLogs/LangEnglishModel.log +++ b/script/BuildLangModelLogs/LangEnglishModel.log @@ -1,252 +1,255 @@ = Logs of language model for English (en) = - Generated by BuildLangModel.py -- Started: 2022-12-03 20:28:44.618364 -- Maximum depth: 2 +- Started: 2022-12-14 17:54:31.274511 +- Maximum depth: 4 - Max number of pages: 200 == Parsed pages == Marmot (revision 1116705550) -Hibernate (revision 1115607389) -JSTOR (identifier) (revision 1122926070) -Thirteen-lined ground squirrel (revision 1124658433) -French Alps (revision 1117472036) -INaturalist (revision 1122751314) -Texas antelope squirrel (revision 1121470154) -Himalayas (revision 1124238550) -Vancouver Island marmot (revision 1121598871) -Mount Rainier National Park (revision 1120235066) -Olympic marmot (revision 1121472039) -Root (revision 1117256593) -Durango chipmunk (revision 1121473683) -France (revision 1125268533) -Sciuromorpha (revision 1107286064) -Alps (revision 1124362400) -Yellow-cheeked chipmunk (revision 1121299976) -Washington ground squirrel (revision 1121468941) -Hopi chipmunk (revision 1121297258) -Mexican prairie dog (revision 1121472442) +Oxford English Dictionary (revision 1126934642) +Forrest's rock squirrel (revision 1121471379) +Johann Friedrich Blumenbach (revision 1124282696) +Black-tailed prairie dog (revision 1120101763) +Herodotus (revision 1126195293) +Gray-collared chipmunk (revision 1121473607) +Callospermophilus (revision 1015470924) +Siberia (revision 1125951683) +Asia Minor ground squirrel (revision 1121357197) +France (revision 1127134794) +Marmoset (revision 1110976265) +Eskimos (revision 1126440133) +Last Glacial Period (revision 1127412073) +Tropical ground squirrel (revision 1121471157) +Columbian ground squirrel (revision 1124139650) +Merriam's chipmunk (revision 1121301344) +Race in Singapore (revision 1118674650) +Buller's chipmunk (revision 1121473516) +Marca's marmoset (revision 1110797645) +Yellow ground squirrel (revision 1121469509) Antelope squirrel (revision 1089053714) -Deosai National Park (revision 1125376855) -Eutamias (revision 1010406609) -Eastern chipmunk (revision 1120765340) -Golden-mantled ground squirrel (revision 1121777526) -Tuolumne Meadows (revision 1094508214) -Cascade Range (revision 1114533492) -Mammal Species of the World (revision 1093112025) -Franklin's ground squirrel (revision 1121361872) -Ladakh (revision 1124124745) -Groundhog (revision 1117813429) -Natural reservoir (revision 1110806364) -Neotamias (revision 1117512650) -Yosemite National Park (revision 1125019703) -Ontario (revision 1125244433) -Russet ground squirrel (revision 1121469545) -Bat (revision 1125180714) -Wayback Machine (revision 1125067302) +Nail (anatomy) (revision 1123634076) +PMID (identifier) (revision 1125133244) +Siberian chipmunk (revision 1121472776) +List of ancient Greek playwrights (revision 1122231123) +Roy Harris (linguist) (revision 1087505131) +Wordnik (revision 1097144364) +Perseus Project (revision 1112478286) +Laüs (revision 1096210239) +Texas antelope squirrel (revision 1121470154) +AAVE (revision 1125269600) +Callimico (revision 1125614017) +Otospermophilus (revision 1093268410) +Sozopol (revision 1117454721) +Magnifying glass (revision 1126820579) +Oscar Peschel (revision 1086657308) +Ethnogenesis (revision 1123304848) +Hoary marmot (revision 1121363017) +Ictidomys (revision 1095023307) +Erechtheion (revision 1123163760) +Eastern Siberia (revision 1125951683) +Interim Register of Marine and Nonmarine Genera (revision 1093112130) +Spermophilus ralli (revision 1121469745) +Espíritu Santo antelope squirrel (revision 1121470113) +Oak (revision 1126949583) +Tamarin (revision 1120713888) +Alexandria (revision 1126013507) +Red Terror (revision 1125612607) +Göttingen (revision 1126739277) +1968 Winter Olympics (revision 1121518304) +Black-capped marmot (revision 1121471697) +Swift fox (revision 1123284998) +Anaximenes of Miletus (revision 1124180264) +Leninism (revision 1127169744) +July Revolution (revision 1118106788) +Don E. Wilson (revision 1101839421) +White-tailed prairie dog (revision 1121472368) +Concise Oxford English Dictionary (revision 1127006175) +Mountain plover (revision 1118572990) +Prisons in Russia (revision 1122751238) +Race in Brazil (revision 1124705196) +Pyrrhus of Epirus (revision 1126501014) +Turkey (revision 1127301063) +World Wide Web (revision 1119819405) +John F. Richards (revision 1091216340) +Mammal (revision 1127041212) +Schneider's marmoset (revision 1110797810) +Colorado chipmunk (revision 1121299151) +Aeschylus (revision 1117125663) +Uinta ground squirrel (revision 1121367964) Long-eared chipmunk (revision 1121298477) -Southern Idaho ground squirrel (revision 1121468339) -Moss (revision 1122019251) -Altai Mountains (revision 1124752508) -Townsend's ground squirrel (revision 1121468829) -Richardson's ground squirrel (revision 1122297225) -Utah prairie dog (revision 1125084849) -Yersinia pestis (revision 1121719480) +St. Bartholomew's Day massacre (revision 1120328739) +Long-tailed marmot (revision 1122462086) +Flag of Russia (revision 1126574449) +Johann Christian Polycarp Erxleben (revision 1123183621) +William Chester Minor (revision 1101752889) +Antarctic (revision 1124702474) +Anu Garg (revision 1082253113) +Commonwealth of Independent States (revision 1127257304) +Potential enlargement of the European Union (revision 1127421947) +Common marmoset (revision 1110796571) +Thirteen-lined ground squirrel (revision 1127159966) +Mill Hill (revision 1127109155) +California ground squirrel (revision 1121359049) +Round-tailed ground squirrel (revision 1121470819) +Golden lion tamarin (revision 1126498136) +Polyandry in nature (revision 1125188329) +October Revolution (revision 1126331974) +Spruce-fir forests (revision 1105538592) +Piute ground squirrel (revision 1121468740) +Flemish art (revision 1029307027) +Received Pronunciation (revision 1126918653) +Groundhog (revision 1126432353) +Taxonomy (biology) (revision 1126106840) +Menzbier's marmot (revision 1121471953) +Group of Seven (revision 1127242480) +Hopi chipmunk (revision 1121297258) +Global warming potential (revision 1124849235) +Monogenism (revision 1117320065) +Daurian ground squirrel (revision 1121469422) +Gray marmot (revision 1122462225) +Nuclear weapon (revision 1124805455) +Wordhunt (revision 1107342769) +Palmer's chipmunk (revision 1121473732) +Vendian (revision 1124816391) +Alberta (revision 1127108924) European ground squirrel (revision 1121469378) -Spermophilus relictus (revision 1121469745) +Gaullism (revision 1127187176) +Diccionario de la lengua española (revision 1122640046) +Franklin's ground squirrel (revision 1121361872) +Xerospermophilus (revision 1095542738) +Alípio de Miranda-Ribeiro (revision 1118371411) +Prairie dog town (revision 1125350300) +Charles Caldwell (physician) (revision 1124873172) +Timeline of French history (revision 1124169293) +Lodgepole chipmunk (revision 1121296771) +Geoffroy's tamarin (revision 1120714387) +Selinunte (revision 1127066618) +LibriVox (revision 1123211025) +Nominal GDP (revision 1126535284) +Caspian race (revision 1096592610) +Eastern chipmunk (revision 1120765340) +Natural history (revision 1125550833) +Encyclopedia of Life (revision 1123215390) +The Passing of the Great Race (revision 1125593849) +Least Concern (revision 1114094351) +New International Encyclopedia (revision 1122804470) +Pandosia (Lucania) (revision 1095669257) +Falcon (revision 1127211661) Least chipmunk (revision 1120765536) -Panamint chipmunk (revision 1121299808) -Catalogue of Life (revision 1118132647) -Gray marmot (revision 1122462225) -Columbian ground squirrel (revision 1124139650) -Alberni-Clayoquot Regional District (revision 1109499216) -La Tania (revision 1115267378) -Populus tremuloides (revision 1120966005) -Paradise River Waterfalls (revision 1054159583) -Long-tongued nectar bat (revision 1123039710) -Happy Isles (revision 1113517959) -Tourism in France (revision 1120671901) -Otospermophilus (revision 1093268410) -History of Canada (revision 1123782373) +Etymology (revision 1126207231) +Caucasus Mountains (revision 1112109454) +Cascade golden-mantled ground squirrel (revision 1121471310) +Ontario (revision 1126506635) +The Australian National Dictionary (revision 1117901185) +Cliff chipmunk (revision 1121473647) +Emile Berliner (revision 1124364621) +Henry II, Holy Roman Emperor (revision 1125851826) +Teat (revision 1113585530) +Deforestation of the Amazon rainforest (revision 1127204853) +Russian Far East (revision 1125785102) +FOSS (revision 1123158325) +Google Books (revision 1123968126) +The American Heritage Dictionary of the English Language (revision 1125678949) +Family (biology) (revision 1115174458) +Shared decision-making (revision 1105303711) +1968 Summer Paralympics (revision 1110266388) California chipmunk (revision 1121299691) -Mexican ground squirrel (revision 1121470340) -White-tailed antelope squirrel (revision 1121470211) -Sedentism (revision 1110063134) -Terabyte (revision 1123174616) -Tamias (revision 1121473202) -RECAP US Federal Court Documents (collection) (revision 1122929164) -Belding's ground squirrel (revision 1121468288) -Cannibalism (revision 1125092745) -Yellow-pine chipmunk (revision 1121473478) -Monoclonal antibody therapy (revision 1114372687) -Menzbier's marmot (revision 1121471953) -Black-footed ferret (revision 1123500226) -Floods in Bihar (revision 1119748410) -Mammal (revision 1124779293) -Alaska marmot (revision 1124026979) +Chromista (revision 1104191406) +List of heads of state of the Soviet Union (revision 1113841810) +Scrub plane (revision 895337184) +Dog (revision 1125590248) +Platypus (revision 1126576847) +Palo Alto, California (revision 1124234189) +Geoffrey Nunberg (revision 1101933840) +Soviet submarine K-431 (revision 1123390956) +African-American (revision 1127390519) +Varnish (revision 1104965513) +Barcode of Life Data System (revision 1090221883) +Lesson's saddle-back tamarin (revision 1120721208) +Molina's hog-nosed skunk (revision 1119016570) +Race Life of the Aryan Peoples (revision 1072651965) +Joan of Arc (revision 1127287571) +Talcott Williams (revision 1124147325) +Supreme Court of the Soviet Union (revision 1029671037) +DNA sequence (revision 1126520849) +Human science (revision 1125850795) +Luxembourgish phonology (revision 1073415697) +Karl Pearson (revision 1126931733) +Conservation status (revision 1126423906) Sierra Madre ground squirrel (revision 1121471267) -Computer security (revision 1125370428) -Kedarnath Temple (revision 1122647471) -Frog Creek Cabin (revision 1048164755) -Outline of botany (revision 1100540741) -Agriculture in Nepal (revision 1088978356) -Plant evolution (revision 1116709561) -Little ground squirrel (revision 1121469707) -Dicranales (revision 1110407415) -Ultrasound (revision 1117397225) -White-tailed prairie dog (revision 1121472368) -Espíritu Santo antelope squirrel (revision 1121470113) -Brown County, Wisconsin (revision 1122831345) -Timeline of audio formats (revision 1120236679) -List of mountain peaks of Uttarakhand (revision 1121014571) -Antiviral drug (revision 1118217791) -California ground squirrel (revision 1121359049) -Red-tailed chipmunk (revision 1121297616) -Bobak marmot (revision 1121471769) -National Register of Historic Places listings in the Northern Mariana Islands (revision 1115478435) -Spermophilus pallidicauda (revision 1121469669) -Yellow-bellied marmot (revision 1121472145) -Sexually transmitted infection (revision 1122774900) -List of Yosemite destinations (revision 1119350249) -Baitarani River (revision 1118320499) -Baja California rock squirrel (revision 1121471079) -Years of Lead (Italy) (revision 1123769084) -Snow leopard (revision 1122462489) -Coyote (revision 1125069820) -Villard-Reculas (revision 1077275360) -Vancouver Island (revision 1121908258) -Sciurotamias (revision 1120570732) -Canada 2021 Census (revision 1114664828) -Time in Canada (revision 1120998431) -Forrest's rock squirrel (revision 1121471379) -Via Lattea (revision 1110201667) -Phylogenetic tree (revision 1117394267) -Hibernation (revision 1115607389) -Altai wapiti (revision 1111750851) -Alpine chipmunk (revision 1121473423) -Schist (revision 1116202480) -Rodent (revision 1123634696) -Nepalese literature (revision 1117603265) -Unification of Nepal (revision 1125350055) -CBC News (revision 1124984918) -Harris's antelope squirrel (revision 1121470079) -Alpine meadow (revision 1114658726) -Himalayan marmot (revision 1113552191) -Merriam's ground squirrel (revision 1121468396) -Heliscomyidae (revision 1010405407) -Siberian chipmunk (revision 1121472776) -1980 eruption of Mount St. Helens (revision 1123425632) -Tarbagan marmot (revision 1121488248) -Uinta chipmunk (revision 1121367930) -Asia Minor ground squirrel (revision 1121357197) -San Bernardino National Forest (revision 1113614977) -British Columbia (revision 1124903693) -List of Web archiving initiatives (revision 1120507741) -2011 Kashgar attacks (revision 1124413350) -Genus (revision 1125331312) -IUCN Red List (revision 1123293379) -Attack rate (revision 1118026995) -Atlas of Living Australia (revision 1069034125) -Riparian zone (revision 1100819694) -Natural History Museum of Los Angeles County (revision 1118638991) -Flying squirrel typhus (revision 1108887986) -New Scientist (revision 1121186695) -Sonoma chipmunk (revision 1121298317) -Basic reproduction number (revision 1122698892) -Homeothermic (revision 1082125124) -Library Genesis (revision 1123879366) -Ecological succession (revision 1116584234) -Taurus ground squirrel (revision 1121469893) -Edmund Jaeger (revision 1042985886) -Wolverine (revision 1123904337) -Puget Sound (revision 1124438931) -List of highest points of European countries (revision 1125124917) -Amburiq Mosque (revision 1101963105) -Mohave ground squirrel (revision 1121470764) -Kali Gandaki Gorge (revision 1091465924) -Palmer's chipmunk (revision 1121473732) -Citizen Science Association (revision 1076637865) -Alpha male (revision 1123599649) -Thermotogota (revision 1108216914) Gray-footed chipmunk (revision 1121473564) -ISSN (identifier) (revision 1117323780) -The Daily Excelsior (revision 1073376573) -National Center for Biotechnology Information (revision 1117911694) -Haridwar (revision 1124587996) -Ground squirrel (revision 1106618817) +ISNI (identifier) (revision 1116919527) +New Mexico (revision 1127238323) +Archipelago (revision 1116401445) +2022 SCO summit (revision 1120811549) +The New York Times (revision 1127291077) +Partition of the Ottoman Empire (revision 1126544087) +Idaho (revision 1127080022) ISBN (identifier) (revision 1124259962) -Breton language (revision 1123193740) -Notocitellus (revision 1092528025) -Wayback Machine (Peabody's Improbable History) (revision 1125111405) -Social animal (revision 1118899517) -Conservation status (revision 1124721586) -Doi (identifier) (revision 1121872952) -Drop (liquid) (revision 1115117361) -Monogamy in animals (revision 1115061008) -Grand Slam (tennis) (revision 1125138113) -Synonym (taxonomy) (revision 1115465643) -Encyclopedia of Life (revision 1123215390) -Algonquian languages (revision 1118973728) -Circulatory system (revision 1123361226) -Kenneth Oppel (revision 1115838353) -Red-cheeked ground squirrel (revision 1121469468) -Prairie dog (revision 1125350300) -Zygomasseteric system (revision 1093682242) -Black-tailed prairie dog (revision 1120101763) -Scenic Beach State Park (revision 1085870429) -Fashion capital (revision 1122240170) -Herbivory (revision 1124405692) -Artemisia tridentata (revision 1097902309) -ARKive (revision 1028182358) -Emblem of Uttarakhand (revision 1085229611) -Northern Italy (revision 1122409316) -Bibcode (identifier) (revision 1119780351) -Squirrel (revision 1121741651) -Birch Bay State Park (revision 1068937174) -Whistling (revision 1124843854) -Gobiomyidae (revision 1090208761) +Spermophilus brevicauda (revision 1010428942) +Taurus ground squirrel (revision 1121469893) +Tajikistani somoni (revision 1120621502) +9/11 Commission Report (revision 1123122065) +Spermophilus (revision 1089055218) +Gatun Lake (revision 1124617227) +Canadian Oxford Dictionary (revision 1021304609) +Alexandre Dumas, père (revision 1127252593) +Crusafontia (revision 1045515255) +Mercosur (revision 1125969034) +Missionary (revision 1124709979) +Materialism (revision 1126420363) +Primate (revision 1127035196) +Agesilaus II (revision 1122309607) +Roosmalens' dwarf marmoset (revision 1110797884) +VIAF (identifier) (revision 1122669300) +Agostinho Neto (revision 1113402334) +Soviet Empire (revision 1124525265) +Rivers in Russia (revision 1120330182) == End of Parsed pages == -- Wikipedia parsing ended at: 2022-12-03 20:32:27.933336 +- Wikipedia parsing ended at: 2022-12-14 17:58:33.140597 -58 characters appeared 2027474 times. +59 characters appeared 2649878 times. Most Frequent characters: -[ 0] Char e: 11.847648847778073 % -[ 1] Char a: 8.861519309248848 % -[ 2] Char t: 8.523956410785045 % -[ 3] Char i: 7.880199696765532 % -[ 4] Char n: 7.477629799445023 % -[ 5] Char o: 7.206405606187798 % -[ 6] Char s: 6.8668698094278895 % -[ 7] Char r: 6.763489938711914 % -[ 8] Char l: 4.301066252884131 % -[ 9] Char h: 4.232754649381447 % -[10] Char d: 3.7247333381340524 % -[11] Char c: 3.556839693135399 % -[12] Char u: 2.763981190387645 % -[13] Char m: 2.7244739020081146 % -[14] Char p: 2.17398595493703 % -[15] Char f: 2.1424195821993277 % -[16] Char g: 2.0356364619225698 % -[17] Char b: 1.575457934355755 % -[18] Char y: 1.572005362337569 % -[19] Char w: 1.3260835897279077 % -[20] Char v: 1.1594230061643207 % -[21] Char k: 0.6102667654431081 % -[22] Char x: 0.2356133790125052 % -[23] Char z: 0.13746168878121248 % -[24] Char j: 0.1346503087092609 % -[25] Char q: 0.1320855409243226 % +[ 0] Char e: 11.955833438369616 % +[ 1] Char a: 8.65764386134003 % +[ 2] Char t: 8.565752838432562 % +[ 3] Char i: 7.92813857845531 % +[ 4] Char n: 7.520383957299166 % +[ 5] Char o: 7.336677386657047 % +[ 6] Char s: 6.807558687607505 % +[ 7] Char r: 6.7380083158545405 % +[ 8] Char h: 4.371823910383799 % +[ 9] Char l: 4.20785409743392 % +[10] Char d: 3.74643662840327 % +[11] Char c: 3.560050689126065 % +[12] Char u: 2.780052515625248 % +[13] Char m: 2.632498552763561 % +[14] Char p: 2.203950521495707 % +[15] Char f: 2.1648921195617308 % +[16] Char g: 1.993563477261972 % +[17] Char y: 1.5471278300359488 % +[18] Char b: 1.5069750380960933 % +[19] Char w: 1.383950506400672 % +[20] Char v: 1.0527654480696849 % +[21] Char k: 0.604707084628047 % +[22] Char x: 0.25389848136404775 % +[23] Char j: 0.14932762942293948 % +[24] Char z: 0.14351604111585514 % +[25] Char q: 0.12242073031286722 % -The first 26 characters have an accumulated ratio of 0.9996665801879581. +The first 26 characters have an accumulated ratio of 0.9993580836551719. +The first 4 characters have an accumulated ratio of 0.3710736871659752. +All characters whose order is over 18 have an accumulated ratio of 0.037105859213141135. -863 sequences found. +1047 sequences found. -First 369 (typical positive ratio): 0.9950424985513596 -Next 125 (494-369): 0.003963798368833871 -Rest: 0.0009937030798065072 +First 377 (typical positive ratio): 0.9950075198967843 +Next 160 (537-377): 0.003999516176216855 +Rest: 0.00099296392699888 -- Processing end: 2022-12-03 20:32:28.010953 +- Processing end: 2022-12-14 17:58:33.195443 diff --git a/script/BuildLangModelLogs/LangEsperantoModel.log b/script/BuildLangModelLogs/LangEsperantoModel.log index 05d0464..1bf27c1 100644 --- a/script/BuildLangModelLogs/LangEsperantoModel.log +++ b/script/BuildLangModelLogs/LangEsperantoModel.log @@ -1,157 +1,244 @@ = Logs of language model for Esperanto (eo) = - Generated by BuildLangModel.py -- Started: 2021-03-16 18:50:26.592918 +- Started: 2022-12-14 17:55:49.995383 - Maximum depth: 4 -- Max number of pages: 100 +- Max number of pages: 200 == Parsed pages == -Vikipedio:Ĉefpaĝo (revision 7070684) -1-a de marto (revision 7133709) -10-a de marto (revision 7140053) -1812 (revision 6759865) -1836 (revision 6759900) -1870 (revision 6759944) -2-a de marto (revision 7134407) -2013 (revision 7120546) -2021 (revision 7133381) -20a jarcento (revision 6911173) -4-a de aprilo (revision 7095124) -7-a de februaro (revision 7126938) -7-a de marto (revision 7140031) -9-a de junio (revision 7096958) -Advokato (revision 7015897) -Alĝerio (revision 7136438) -Amazona arbaro (revision 7057380) -Anglio (revision 6910536) -Antikva Egiptio (revision 6715674) -Batao (revision 6348833) -Biero en Germanio (revision 5158902) -Bjalistoko (revision 7095427) -Charles Dickens (revision 7139853) -David Copperfield (romano) (revision 6728487) -Decembro de 2020 (revision 7115650) -Demotika lingvo (revision 6581652) -Duolingo (revision 6996800) -Eduko (revision 7064206) -Ekvatora Gvineo (revision 7111153) -El Greco (revision 7130251) -Emmanuel Macron (revision 7076767) -Esperantisto (revision 6583368) -Esperanto (revision 7125932) -Esperanto kaj Libera Scio (revision 7106401) -Eŭropa Kosma Agentejo (revision 6998003) -Fabriko (revision 6775703) -Februaro de 2021 (revision 7139991) -Fluganta Spagetmonstro (revision 7072467) -Fondaĵo Vikimedio (revision 7097854) -Francaj Armitaj Fortoj (revision 6521662) -Francio (revision 7035760) -Grandduklando Flandrensis (revision 7064691) -Hieroglifoj (revision 6475302) -Honkongo (revision 7022513) -Infanlaboro (revision 7043683) -Internacia Fonetika Alfabeto (revision 6826202) -Irlanda lingvo (revision 7108415) -Januaro de 2021 (revision 7119168) -Kreismo (revision 7029678) -Landport (revision 6722661) -Libera scio (revision 6432924) -Listen to Wikipedia (revision 6980163) -Listo de originalaj romanoj en Esperanto (revision 7134297) -Marto de 2021 (revision 7140759) -Metroo de Parizo (revision 7129616) -Monda Komerca Organizaĵo (revision 7135765) -Mutzig (revision 7085274) -Namacu (revision 6342288) -Ngozi Okonjo-Iweala (revision 7138302) -Niĝerio (revision 7135950) -Novelo (revision 7099911) -Oktobrofesto (revision 6860497) -Oseta Vikipedio (revision 7061966) -Portsmouth (revision 6756801) -Rolulo (revision 7078410) -Romano (revision 7102617) -San-Marino (revision 7075794) -Sismo (revision 6757493) -Slovaka Vikipedio (revision 6973132) -Strasburgo (revision 7139993) -Svahila Vikipedio (revision 6655220) -Telegram (aplikaĵo) (revision 6982939) -Teodoro Obiang Nguema Mbasogo (revision 6521358) -Verkisto (revision 6694998) -Vikio (revision 6761946) -Vikipedio (revision 7075981) -Vikipedio en Esperanto (revision 7075983) +Vikipedio:Ĉefpaĝo (revision 7731360) Ĉeĥa Vikipedio (revision 5571847) -Ĉinio (revision 7133172) -Ĵurnalisto (revision 7129724) --771 (revision 6917193) --86 (revision 7120146) -1058 (revision 6758857) -11-a de marto (revision 7140194) -1101 (revision 6758901) -1105 (revision 6758905) -1131 (revision 6758935) -1157 (revision 6758962) -12-a de marto (revision 7141381) -1290 (revision 6759097) -13-a de marto (revision 7142227) -1389 (revision 6759315) -14-a de marto (revision 7142231) -1420 (revision 6759383) -1445 (revision 6759438) -1456 (revision 6759463) -1457 (revision 6759465) -1459 (revision 6759469) +Septembro de 2022 (revision 7751838) +Josef von Sternberg (revision 6628019) +Duolingo (revision 7583415) +Tertremo (revision 7614767) +Kataro (revision 7817970) +Hieroglifoj (revision 7802012) +Novembro de 2022 (revision 7832261) +Prezidanto de Peruo (revision 7831028) +Parizo (revision 7784770) +Heinrich XIII. Prinz Reuß (revision 7830303) +11-a arondismento de Parizo (revision 7688747) +Januaro de 2021 (revision 7552896) +Germana Regno (revision 7699806) +Norma datumaro (revision 6513828) +Ŝtatoj de Batalhalto (revision 7735826) +1969 (revision 6760056) +Hamad bin Ĥalifa Al Thani (revision 7802234) +Serblingva Vikipedio (revision 6932117) +Ŝtatestro (revision 7812797) +1993 (revision 7108703) +Bengallingva Vikipedio (revision 7316128) +.qa (revision 5919748) +Dana lingvo (revision 6773327) +José de la Riva Agüero (revision 7828689) +1951 (revision 7822348) +Ĵaŭdo (revision 6739560) +Taĝikio (revision 7730013) +Ŝtalo (revision 7611953) +Hieratika skribo (revision 7372130) +19-a de novembro (revision 7834686) +Azerbajĝano (revision 7765205) +Chuck Smith (revision 7672657) +Futbala Mondpokalo 2022 (revision 7836965) +José Bernardo de Tagle (revision 7828685) +Ĉinio (revision 7795007) +Historia urboparto de Limo (revision 7829362) +Francisco Pizarro (revision 7624618) +2020 (revision 7724069) +Martiniko (revision 7676941) +1950-aj jaroj (revision 7836779) +Aprilo de 2020 (revision 6961405) +Kontrolnumero de la Kongresa Biblioteko de Usono (revision 6510248) +Framasono (revision 7777142) +2027 (revision 7234078) +Peruo (revision 7837496) +2011 (revision 7534568) +Alma mater (revision 6740009) +Havaja lingvo (revision 7491016) +Moderna norma araba (revision 7490429) +Baluĉa lingvo (revision 7285034) +Angla lingvo (revision 7562773) +San-Paŭlo (revision 7676139) +Subdukcio (revision 7011255) +Gaŭloj (revision 7244672) +Ptolemeo (revision 7117561) +Norveglingva (bokmål) Vikipedio (revision 7325731) +Jemeno (revision 7664252) +Septembro de 2021 (revision 7552905) +Latvlingva Vikipedio (revision 7317460) +Latinlingva Vikipedio (revision 7254212) +Nacia Biblioteko de Hispanio (revision 7600762) +Meurthe-et-Moselle (revision 7291290) +Tertremo de Sendajo en 2011 (revision 7695661) +2016 (revision 7629713) +Antisemitismo (revision 7798917) +Oskar-premio (revision 7597140) +Shanghai-Express (revision 7744450) +28-a de julio (revision 7672933) +Android (revision 7828605) +Blanka Teleki (revision 6607101) +2019 (revision 7534565) +Okcitanlingva Vikipedio (revision 7317521) +Kantono Los-Anĝeleso (revision 7768299) +Finna lingvo (revision 7793584) +Konstruiganto (revision 6705572) +Danio (revision 7792103) +Sumero (revision 7803120) +Papiro (revision 7571097) +Akademio de Esperanto (revision 7822366) +Pola lingvo (revision 7820090) +Teodiceo (revision 7778171) +Februaro de 2020 (revision 7003017) +Natura fenomeno (revision 5879936) +Junio de 2022 (revision 7647823) +Roko (revision 7568259) +Isère (revision 7135564) +Krimoj kontraŭ la homeco (revision 7608270) +Decembro de 2020 (revision 7115650) +Judo (revision 7573815) +Holivudo (revision 7432115) +Hindia lingvo (revision 7757284) +Sismologio (revision 7106981) +Ideogramoj (revision 7704324) +Hispana (revision 7788151) +Système universitaire de documentation (revision 6131485) +2025 (revision 7120606) +22-a de novembro (revision 7828389) +Piramido de la Luvro (revision 7630641) +1906 (revision 7331467) +Rozeta ŝtono (revision 7267771) +10-a arondismento de Parizo (revision 7667691) +Jean-François Champollion (revision 7729249) +Ukraina lingvo (revision 7680312) +VIAF (revision 6620334) +Horzono (revision 7708921) +2024 (revision 7466694) +Reuß älterer Linie (revision 7830307) +Arablingva Vikipedio (revision 7830562) +Angla (revision 7562773) +2008 (revision 7534573) +Jura (departemento) (revision 7250513) +Egipta civilizo (revision 7799968) +Pedro Castillo (revision 7831022) +Thomas Young (revision 7203308) +Beloruslingva Vikipedio (revision 7810971) +Japanio (revision 7822972) +Momant-magnituda skalo (revision 4993757) +Dina Boluarte (revision 7832299) +Kirgizio (revision 7740383) +Demotika skribo (revision 7096660) +Lundo (revision 7400575) +Egipta lingvo (revision 6473910) +Anna Löwenstein (revision 7361028) +Nov-Ĵerzejo (revision 7193354) +Turingio (revision 7466586) +Enciklopedio (revision 7663790) +Ukrainlingva Vikipedio (revision 6926624) +Makedonlingva Vikipedio (revision 7316121) +Florido (revision 7546947) +Tibeto (revision 7730713) +Sklavo (revision 7570090) +Konferenco pri Aplikoj de Esperanto en Scienco kaj Tekniko (revision 7761527) +Slovaklingva Vikipedio (revision 6973132) +Gaston WARINGHIEN (revision 7472160) +1990-aj jaroj (revision 7836793) +Folio (revision 7722236) +Tajlanda suna kalendaro (revision 5091531) +Lipo (lando) (revision 7362353) +Juda demando (revision 7073445) +Gare de l'Est (revision 7792626) +Kalifornio (revision 7832672) +Afrikanslingva Vikipedio (revision 7127376) +1647 (revision 7816836) +NPIV (revision 7786706) +Esperanta Antologio (revision 7425395) +Faŭlto (revision 7034371) +32-a jarcento a.K. (revision 7121166) +Harry S. Truman (revision 7459878) +Rumanio (revision 7794309) +Huascarán (revision 7603050) +Kotono (revision 7768542) +Estonlingva Vikipedio (revision 6932120) +1998 (revision 6760088) +Zenono el Elajo (revision 7334004) +Okcidentazio (revision 7730075) +Nonio Marcelo (revision 7617559) +Valentin Melnikov (revision 7758466) +1825 (revision 6759885) +Politikisto (revision 7644750) +4-a jarcento (revision 7804795) +Municipo (revision 7795836) +Limo (revision 7760530) +Burgonjo-Franĉkonteo (revision 7778470) +Andamanoj kaj Nikobaroj (revision 7670688) +Egiptio (revision 7306335) +23-a de junio (revision 7836496) +Greka lingvo (revision 7682811) +Majoto (revision 7676958) +Karl Barthel (revision 7251507) +Place de la Bastille (revision 7181764) +Christine Lieberknecht (revision 7624363) +1842 (revision 6759907) +Luksemburgo (revision 7760190) +Ŝangrilao (revision 6275321) +Slovakio (revision 7790988) +Hebrea kalendaro (revision 7072955) +Germane (revision 7774973) +Astronomo (revision 5758622) +1685 (revision 7127859) +Moldavio (revision 7792125) +Afganio (revision 7804514) +August Robert Wolff (revision 6085672) +1712 (revision 6759741) == End of Parsed pages == -- Wikipedia parsing ended at: 2021-03-16 18:54:42.162702 +- Wikipedia parsing ended at: 2022-12-14 17:59:23.879681 -55 characters appeared 738091 times. +61 characters appeared 907830 times. -First 32 characters: -[ 0] Char a: 12.443858548607151 % -[ 1] Char o: 9.828462886012701 % -[ 2] Char e: 9.238969178597218 % -[ 3] Char i: 8.570894374812863 % -[ 4] Char n: 7.557604685601098 % -[ 5] Char r: 6.426172382538196 % -[ 6] Char t: 5.784923539238386 % -[ 7] Char l: 5.684935868341437 % -[ 8] Char s: 5.134326255163659 % -[ 9] Char k: 4.062778166919797 % -[10] Char d: 3.544278415534128 % -[11] Char j: 3.39619369427347 % -[12] Char u: 2.807783864049284 % -[13] Char m: 2.731370522062998 % -[14] Char p: 2.685847680028614 % -[15] Char g: 1.6155189536249595 % -[16] Char v: 1.417033942969092 % -[17] Char c: 1.328968921176386 % -[18] Char b: 1.1882003709569686 % -[19] Char f: 1.1564969631115947 % -[20] Char h: 0.6592683016050866 % -[21] Char z: 0.6408423893530744 % -[22] Char ĝ: 0.5576548149211953 % -[23] Char ŭ: 0.44980903438735875 % -[24] Char ĉ: 0.3391180762263732 % -[25] Char w: 0.15404604581277917 % -[26] Char y: 0.13819434189009214 % -[27] Char ŝ: 0.12938783971082157 % -[28] Char ĵ: 0.1166522827131072 % -[29] Char á: 0.04579381133220701 % -[30] Char é: 0.039155063535526106 % -[31] Char ĥ: 0.031025984600814804 % +Most Frequent characters: +[ 0] Char a: 13.19993831444213 % +[ 1] Char o: 9.679565557428154 % +[ 2] Char e: 9.106220327594373 % +[ 3] Char i: 8.34440368791514 % +[ 4] Char n: 7.355121553594836 % +[ 5] Char r: 6.154125772446383 % +[ 6] Char l: 5.914984082923015 % +[ 7] Char t: 5.666259101373605 % +[ 8] Char s: 5.044336494718174 % +[ 9] Char k: 4.068713305354526 % +[10] Char d: 3.571373495037617 % +[11] Char j: 3.538878424374608 % +[12] Char u: 2.9623387638654815 % +[13] Char m: 2.7548109227498543 % +[14] Char p: 2.654131280085479 % +[15] Char g: 1.579590892567992 % +[16] Char v: 1.5284800017624445 % +[17] Char c: 1.2349228379762731 % +[18] Char b: 1.139310223279689 % +[19] Char f: 1.0745403875174868 % +[20] Char z: 0.6510029410792769 % +[21] Char h: 0.6175164953790908 % +[22] Char ĝ: 0.5909696749391405 % +[23] Char ŭ: 0.4724452816055869 % +[24] Char ĉ: 0.3694524305211328 % +[25] Char ŝ: 0.17602414548979434 % +[26] Char y: 0.1180837822059196 % +[27] Char w: 0.10596697619598384 % +[28] Char ĵ: 0.09858673980811385 % +[29] Char ĥ: 0.042298668252866725 % -The first 32 characters have an accumulated ratio of 0.9990556719970846. +The first 30 characters have an accumulated ratio of 0.9981439256248417. +The first 3 characters have an accumulated ratio of 0.31985724199464655. +All characters whose order is over 19 have an accumulated ratio of 0.032423471354769064. -1066 sequences found. +1198 sequences found. -First 512 (typical positive ratio): 0.995442680189542 -Next 512 (512-1024): 0.004498090343873587 -Rest: 6.983124116715766e-05 +First 496 (typical positive ratio): 0.9950012527506046 +Next 287 (783-496): 0.004000631822044021 +Rest: 0.0009981154273513981 -- Processing end: 2021-03-16 18:54:42.252378 +- Processing end: 2022-12-14 17:59:24.035529 diff --git a/script/BuildLangModelLogs/LangEstonianModel.log b/script/BuildLangModelLogs/LangEstonianModel.log index 31acf96..6cda38d 100644 --- a/script/BuildLangModelLogs/LangEstonianModel.log +++ b/script/BuildLangModelLogs/LangEstonianModel.log @@ -1,160 +1,226 @@ = Logs of language model for Estonian (et) = - Generated by BuildLangModel.py -- Started: 2021-03-16 18:58:31.291439 +- Started: 2022-12-14 17:59:35.360086 - Maximum depth: 4 -- Max number of pages: 100 +- Max number of pages: 200 == Parsed pages == -Harilik pohl (revision 5703478) -A-vitamiin (revision 5556956) -Aasta keskmine sademete hulk (revision 5284375) -Aasta keskmine õhutemperatuur (revision 5542687) -Ahm (revision 5513665) -Ain Raal (revision 5662146) -Alalehed (revision 4983554) -Alamliik (revision 5278935) -Alaska (revision 5844590) -Aleksander Heintalu (revision 5754094) -Aleuudid (revision 4704649) -Ameerika jänes (revision 5843342) -Ameerika valgejänes (revision 5411720) -Anneli Sihvart (revision 3546469) -Arbutiin (revision 4451788) -Baribal (revision 5793838) -Bensoehape (revision 5172889) -Binaarne nomenklatuur (revision 5719069) -C-vitamiin (revision 5487089) -Droog (revision 5051359) -E-vitamiin (revision 5553995) -Eesti (revision 5807277) -Eesti Entsüklopeediakirjastus (revision 5697753) -Eesti köök (revision 5622964) -Ellips (revision 5425749) -Emakakael (botaanika) (revision 3521516) -Euraasia (revision 5843444) -Fenoloogia (revision 3512905) -Folaadid (revision 5695132) -Fosfor (revision 5817280) -Fotosüntees (revision 5849350) -Fruktoos (revision 5580398) -Glükoos (revision 5398752) -Gneiss (revision 4333338) -Graniit (revision 5788916) -Gröönimaa (revision 5704662) -Halljänes (revision 5844682) -Haned (revision 5655933) -Happeline keskkond (revision 2966453) -Heilongjiang (revision 5573413) -Hendrik Relve (revision 5776793) -Hiina (revision 5842572) -Holland (revision 5563481) -Hunt (revision 5833431) -Hõimkond (revision 5594301) -Hüdrofiilsus (revision 4309797) -Ida-Euroopa (revision 5852084) -Ida-sinilind (revision 3944751) -Ida-vöötorav (revision 5772003) -Igihaljus (revision 5718075) -Ilves (revision 5810469) -Imetaja (revision 5817468) -Immuunsus (revision 5465129) -Indiaanlased (revision 5715264) -Indrek Rohtmets (revision 5460729) -Itaalia (revision 5821960) -Jaapan (revision 5848576) -Jilin (revision 5551781) -Jood (revision 5506157) -Juurestik (revision 3341159) -Jääkaru (revision 5798648) -Jõhvikas (revision 5765158) -Kaalium (revision 5506158) -Kaheidulehelised (revision 4551109) -Kaheli õiekate (revision 3063362) -Kahesuguline õis (revision 3383221) -Kaitsestaatus (revision 5622492) -Kajakas (revision 5799897) -Kalorsus (revision 5843070) -Kaltsium (revision 5506160) -Kanada (revision 5846973) -Kanalised (revision 4824603) +Harilik pohl (revision 6214729) +Taiga (revision 5484072) Kanarbikulaadsed (revision 4318215) -Kanarbikulised (revision 5479568) -Karboksüülhapped (revision 5328337) -Karoteen (revision 5479578) -Kasvuperiood (revision 5279042) -Katteseemnetaimed (revision 5315975) -Kaukasus (revision 4476003) -Kesk-Euroopa (revision 5381871) -Kimalane (revision 5643935) -Kiudained (toit) (revision 5762236) +Happeline keskkond (revision 2966453) +Lipiidid (revision 6039358) +Rootsi (revision 6230381) +Tuhk (revision 5757120) +Euraasia (revision 5979562) +Skandinaavia poolsaar (revision 4991435) +Kaukasus (revision 5875774) +E-vitamiin (revision 6259630) +Rahvameditsiin (revision 6232590) +Lumepüü (revision 6104785) +Arktika (revision 6173211) +Armeenia (revision 6255780) +Aasia (revision 5829266) +Tundra (revision 6073824) +Manner (revision 5960299) +Kilomeeter (revision 5864020) +Põhja-Euroopa (revision 6205189) +Vulkaaniline tuhk (revision 5623096) +Euroopa (revision 6164900) +Sugukond (bioloogia) (revision 5633781) +Must meri (revision 6209529) +Kanalised (revision 5958228) +Mükoheterotroof (revision 3527932) +Skandinaavia (revision 6068693) +Antioksüdant (revision 5283121) +Mineraal (revision 6017794) +Maa (planeet) (revision 6267326) +Itaalia (revision 6249954) +Kaspia meri (revision 5599349) +Rapsiõli (revision 6220266) +Hulkrakne (revision 5976930) +Valgevene (revision 6249613) +Västmanlandi lään (revision 6039606) +Skogskyrkogården (revision 6000925) +Hüpofüüs (revision 6144291) +Metsatulekahju (revision 6201635) +Igijää (revision 5305291) +Aasovi meri (revision 5443749) +Skåne maakond (revision 5898845) +Soodsas seisundis (revision 5579863) +Läänemeri (revision 6160779) +Aafrika manner (revision 6197378) +Orgaaniline aine (revision 5443322) +Barentsi meri (revision 5544511) +Iraan (revision 6168382) +Süsinik (revision 5951019) +19. veebruar (revision 6058910) +USA dollar (revision 6104587) +Fennoskandia kilp (revision 6177694) +Väike-Kaukasus (revision 5482811) +Küttepuud (revision 6212731) +Rebane (revision 6247485) +Põlevkivi (revision 6259723) Klass (bioloogia) (revision 3489567) +Elbrus (revision 5852104) +Kask (revision 6202151) +Austraalia manner (revision 5442365) +Kivisüsi (revision 6130479) +Eulitoraal (revision 4240852) +Venemaa (revision 6252977) +Isomeer (revision 5258491) +Nurmenukulised (revision 5842619) +Meä keel (revision 6212690) +Binaarne nomenklatuur (revision 5719069) +Riik (bioloogia) (revision 6055648) +Linnud (revision 6218411) +Skulptor (revision 5941312) +Soome keel (revision 6212800) +Lipoproteiinid (revision 5758152) +Kaitsestaatus (revision 5622492) +Antioksüdandid (revision 5283121) +Pelagiaal (revision 3524195) +Suur-Kaukasus (revision 5162874) +Mälaren (revision 5790265) +Magma (revision 5876846) +Tahm (revision 5293173) +Hõimkond (revision 6055579) +Peaahelik (revision 5557886) +Mangaan (revision 6167758) +Gruusia (revision 6261387) +Miljard (revision 6192929) +Antarktis (revision 6241944) +Poolsaar (revision 5285634) +Põhja-Ameerika manner (revision 5482041) +Liik (bioloogia) (revision 6203064) +Selts (bioloogia) (revision 5233008) +Riigikirik (revision 5749017) +Maailmajagu (revision 5713103) +Steroidid (revision 5315278) +Vingugaas (revision 6037399) +Tuhaplokk (revision 6141202) +Litoraal (revision 3529895) +Rahvaarv (revision 5572845) +Lõuna-Ameerika manner (revision 5969300) +Triviaalnimetus (revision 5987887) +Norra meri (revision 5635004) +Parasvöötme okasmetsad (revision 3680619) +Hüdrotermid (revision 2106404) +Faasanlased (revision 5332197) +Atlandi ookean (revision 6248758) +Ruutkilomeeter (revision 5300255) +Vahad (revision 5657711) +Süda (revision 6226792) +Perekond (bioloogia) (revision 6200916) +Põhja-Ameerika (revision 6057090) +USGS (revision 6227151) +Alamhõimkond (revision 5416587) +Bentaal (revision 5842654) +Kaasani khaaniriik (revision 6023584) +Xaafuuni neem (revision 3164564) +Galenos (revision 6256167) +Medicina (revision 6133657) +Mootor (revision 6164617) +Maailmameri (revision 6100027) +Alamperekond (revision 3562833) +Virmalised (revision 6034517) +Süsinik-14 (revision 5438227) +Valgus (revision 6111722) +Naatriumkloriid (revision 5735729) +Söödav rannakarp (revision 5849789) +Mosambiigi väin (revision 5456772) +Saamid (revision 6267441) Kliima (revision 5719219) -Korea (revision 5555270) -Kroom (revision 5506123) -Kroonlehed (revision 3543291) -Kuusepüü (revision 5715613) -Kvertsetiin (revision 5610539) -Laanemets (revision 5751227) -Laanepüü (revision 5747330) -Laiuskraad (revision 4993978) -Leesikas (revision 5842030) -Lehed (revision 5725384) -Leheroots (revision 5532086) -Liik (bioloogia) (revision 5791564) -Liiv (revision 5675176) -Liivakivi (revision 5548801) -Linnaeus (revision 5635181) +Berüllium (revision 6265103) +Eesti (revision 6265105) +Läänepoolkera (revision 5702463) +Vahemeri (revision 6242683) +322 eKr (revision 5491330) +Kalorsus (revision 5843070) +Aafrika (revision 6267646) +Viskoossus (revision 5965904) +Tahkis (revision 5641786) +Sölkupi keel (revision 6212668) +Domeen (bioloogia) (revision 5948946) +Põder (revision 6137122) +Edela-Aasia (revision 5297944) +Zooloogia (revision 5707518) +Rohumaa (revision 5416639) +Vaikne ookean (revision 5614735) +Alepõllundus (revision 4784483) +Benguela hoovus (revision 5538476) +Ubikinoonid (revision 5553988) +Marss (revision 6220107) +Juriidiline isik (revision 6019028) +NCI (revision 5398920) +Rumeenia (revision 6177876) +Põldpisikas (revision 5427796) +Aserbaidžaani keel (revision 6224462) +Kesk-Euroopa (revision 6011387) +Västerås (revision 6261231) +Orgaaniline keemia (revision 6188582) +Alamselts (revision 3769559) +Tihumeeter (revision 5916725) +Soojusenergia (revision 5677639) +Lõuna-Aasia (revision 6239881) +Šelf (revision 5747738) +Svalbard (revision 6210637) +Ida-sinivutt (revision 4824435) +Plastilisus (revision 5162815) +Nafta (revision 6219123) +Keemiline ühend (revision 6245974) +Rannavöönd (revision 5285661) == End of Parsed pages == -- Wikipedia parsing ended at: 2021-03-16 19:01:52.570995 +- Wikipedia parsing ended at: 2022-12-14 18:02:05.521306 -55 characters appeared 482798 times. +60 characters appeared 605812 times. -First 34 characters: -[ 0] Char a: 12.61500669016856 % -[ 1] Char i: 10.380117564695794 % -[ 2] Char e: 10.063007717513328 % -[ 3] Char s: 8.719795856652263 % -[ 4] Char t: 6.619538606207979 % -[ 5] Char l: 6.04559256666349 % -[ 6] Char u: 5.504372429049002 % -[ 7] Char n: 5.077278696266347 % -[ 8] Char k: 4.702380705802427 % -[ 9] Char o: 4.470606754791859 % -[10] Char d: 4.163438953765343 % -[11] Char r: 3.6719290469306007 % -[12] Char m: 3.5747869709485123 % -[13] Char v: 2.4621063053285224 % -[14] Char p: 1.8848462503987176 % -[15] Char g: 1.8341003898110597 % -[16] Char h: 1.7551853984482124 % -[17] Char j: 1.7216309926718836 % -[18] Char ä: 1.033972800218725 % -[19] Char õ: 0.9384877319292955 % -[20] Char b: 0.8972696655744226 % -[21] Char ü: 0.6507897712915132 % -[22] Char f: 0.34610748180398426 % -[23] Char c: 0.30426803756436444 % -[24] Char ö: 0.24275162697442823 % -[25] Char y: 0.1056342404069611 % -[26] Char x: 0.05550975770405014 % -[27] Char w: 0.035211413468987034 % -[28] Char z: 0.025476493274620024 % -[29] Char q: 0.019884092311898558 % -[30] Char š: 0.017605706734493517 % -[31] Char é: 0.009527794232784725 % -[32] Char ō: 0.009113542309620172 % -[33] Char ž: 0.00869929038645562 % +Most Frequent characters: +[ 0] Char a: 12.587733488276893 % +[ 1] Char i: 10.41016024773362 % +[ 2] Char e: 10.314751110905695 % +[ 3] Char s: 8.609106455468032 % +[ 4] Char t: 6.5521977115012575 % +[ 5] Char l: 5.932203389830509 % +[ 6] Char u: 5.334163073692829 % +[ 7] Char n: 5.233141634698554 % +[ 8] Char k: 4.634110912296223 % +[ 9] Char o: 4.480763008986286 % +[10] Char r: 4.1257023631093475 % +[11] Char d: 4.057859533980839 % +[12] Char m: 3.8244537909450456 % +[13] Char v: 2.4172515565885124 % +[14] Char g: 1.8652651317570466 % +[15] Char p: 1.7193452754319822 % +[16] Char j: 1.6229457323394056 % +[17] Char h: 1.546684449961374 % +[18] Char ä: 1.1340151730239745 % +[19] Char õ: 0.9801720665817119 % +[20] Char b: 0.9091929509484792 % +[21] Char ü: 0.6977412134457555 % +[22] Char f: 0.2794596343420071 % +[23] Char c: 0.22878384713409441 % +[24] Char ö: 0.21475309171822282 % +[25] Char y: 0.07147431876555763 % +[26] Char w: 0.044403214198464214 % +[27] Char z: 0.04291760480148957 % +[28] Char š: 0.03879091203211557 % +[29] Char x: 0.021458802400744784 % +[30] Char ž: 0.020468396136095024 % +[31] Char q: 0.01436089083742151 % -The first 34 characters have an accumulated ratio of 0.9996603134230051. +The first 32 characters have an accumulated ratio of 0.9996583098386959. +The first 3 characters have an accumulated ratio of 0.33312644846916206. +All characters whose order is over 18 have an accumulated ratio of 0.03563976943342159. -869 sequences found. +915 sequences found. -First 512 (typical positive ratio): 0.9973685549586747 -Next 512 (512-1024): 8.69929038645562e-05 -Rest: -3.122502256758253e-17 +First 453 (typical positive ratio): 0.9950155799281164 +Next 161 (614-453): 0.003991392475357514 +Rest: 0.000993027596526086 -- Processing end: 2021-03-16 19:01:52.649852 +- Processing end: 2022-12-14 18:02:05.599161 diff --git a/script/BuildLangModelLogs/LangFinnishModel.log b/script/BuildLangModelLogs/LangFinnishModel.log index f7247f3..bc7bbef 100644 --- a/script/BuildLangModelLogs/LangFinnishModel.log +++ b/script/BuildLangModelLogs/LangFinnishModel.log @@ -1,157 +1,243 @@ = Logs of language model for Finnish (fi) = - Generated by BuildLangModel.py -- Started: 2021-03-16 19:01:52.812613 +- Started: 2022-12-14 17:58:36.527994 - Maximum depth: 4 -- Max number of pages: 100 +- Max number of pages: 200 == Parsed pages == -Yhdistynyt kuningaskunta (revision 19524940) -2014 (revision 19539649) -Aasia (revision 19400161) -Abhasia (revision 19547259) -Adolf Hitler (revision 19547632) -Advanced Level (revision 18652085) -Afrikka (revision 19621405) -Agatha Christie (revision 19512386) -Aikavyöhyke (revision 19555749) -Ajoneuvon kansallisuustunnus (revision 18193201) -Akrotiri ja Dhekelia (revision 18855449) -Alamaat (revision 19549275) -Alan Turing (revision 19549334) -Alankomaat (revision 19640525) -Albania (revision 19549481) -Alec Guinness (revision 19544530) -Alexander Fleming (revision 19522285) -Alfred Hitchcock (revision 19402710) -Alfred Tennyson (revision 19481118) -Allen Jones (revision 19591974) -Andorra (revision 19511981) -Andrew Lloyd Webber (revision 18151455) -Anglit (revision 19065858) -Anguilla (revision 19591105) -Anne Brontë (revision 19340812) -Anthony Eden (revision 19341665) -Antigua ja Barbuda (revision 18868418) -Arabian Lawrence (revision 19429776) -Argentiina (revision 19507062) -Armenia (revision 19633290) -Arthur Conan Doyle (revision 19393798) -Arts and crafts (revision 19285842) -Artsakhin tasavalta (revision 19508669) -Atlantin valtameri (revision 19417172) -Aurinko (revision 19558951) -Australia (revision 19585414) -Avara luonto (revision 19570513) -Azerbaidžan (revision 19618379) -BBC (revision 19151226) -BKT (revision 19395273) -Bahama (revision 19614968) -Bangladesh (revision 19529050) -Bank of England (revision 17954121) -Barbados (revision 19193877) -Barbara Hepworth (revision 19016157) -Bath (revision 19316232) -Beatrix Potter (revision 19590080) -Belfast (revision 19638226) -Belgia (revision 19623003) -Belize (revision 18839172) -Ben Nevis (revision 19287404) -Bengalin kieli (revision 19361714) -Benjamin Britten (revision 19284581) -Bermuda (revision 19508737) -Bertrand Russell (revision 19418381) -Bhutan (revision 19609977) -Big Ben (revision 19521754) -Big Brother (revision 19638747) -Birmingham (revision 19638219) -Birminghamin kansainvälinen lentoasema (revision 19638219) -Black Sabbath (revision 19637531) -Bodiamin linna (revision 19288333) -Boris Johnson (revision 18896646) -Bosnia ja Hertsegovina (revision 19317622) -Botswana (revision 19174485) -Brexit (revision 19428746) -Bristol (revision 19316368) -Bristolin kansainvälinen lentoasema (revision 19316368) -Britannia (provinssi) (revision 19620168) -Britannia (täsmennyssivu) (revision 19524940) -Britannian alahuone (revision 19561351) -Britannian avoin golfturnaus (revision 18803777) -Britannian kuninkaallinen perhe (revision 18877640) -Britannian merentakaiset alueet (revision 18985200) -Britannian talous (revision 19363886) -Britannian väestö (revision 19334304) -Britannian ylähuone (revision 19561348) -Britteinsaaret (revision 19149527) -Brittiläinen Antarktiksen alue (revision 19065469) -Brittiläinen Intia (revision 19532682) -Brittiläinen Intian valtameren alue (revision 19386472) -Brittiläinen imperiumi (revision 18932562) -Brittiläinen keittiö (revision 18379105) -Brittiläinen kulttuuri (revision 19490255) -Brittiläiset Neitsytsaaret (revision 19078289) -Brittiläiset merentakaiset alueet (revision 18985200) -Brunei (revision 19566565) -Bruttokansantuote (revision 19395273) -Bulgaria (revision 19361771) -Burma (revision 19618164) -Cambridge (revision 19030154) -Cambridgen yliopisto (revision 18847878) -Canterburyn tarinoita (revision 19505844) -Cardiff (revision 18124102) -Caymansaaret (revision 19078996) -Ceylonin dominio (revision 18848736) -Channel 4 (revision 19210598) -Charles Babbage (revision 19265262) -Charles Chaplin (revision 19446083) -Charles Darwin (revision 19338522) +Yhdistynyt kuningaskunta (revision 21066772) +Viidakkokirja (revision 20635233) +Jalkapallon EM (revision 21042497) +Sierra Leone (revision 21069376) +Suezin kriisi (revision 20508267) +Rooman valtakunta (revision 21050659) +Liettua (revision 21045326) +Avara luonto (revision 20695283) +Sambia (revision 21095544) +Itävalta (revision 20830864) +Vanguard-luokka (sukellusvene) (revision 20477212) +Luettelo lentokoneiden maatunnuksista (revision 20734195) +Eduardo Paolozzi (revision 20995322) +Asian Football Confederation (revision 20988614) +Bagheera (revision 16479904) +Euro (revision 20949849) +Juhani Rajalin (revision 19931124) +Riemukaari (revision 20935977) +Yhdysvallat (revision 21055552) +Vytautas Suuri (revision 20970704) +Egypti (revision 21070115) +LIBRIS (revision 20939446) +Vatikaanivaltio (revision 21017236) +Kansainvaellus (revision 21080725) +Saudi-Arabia (revision 20944836) +Suuri koalitio (revision 20232862) +Mosaiikki (revision 20906833) +Baloo (revision 20763454) +Kuwait (revision 21106233) +Eteläinen maakunta (Sambia) (revision 20422262) +Meri-ilmasto (revision 20901423) +Hellenismi (revision 20694245) +Kesäolympialaiset 1952 (revision 21066831) +Guatemala (revision 21008328) +Kuvataide (revision 20715591) +Luettelo valtioiden motoista (revision 19248110) +Televisio (revision 21039488) +Jimmy Carter (revision 20860817) +Yhdysvaltain presidentti (revision 20887192) +Viidakkokirja 2 (revision 20767875) +Madagaskar (revision 21095571) +Iranin panttivankikriisi (revision 21067629) +Kansallislaulu (revision 20872469) +Maanosaliittojen cup (revision 18212691) +Yle TV1 (revision 20803186) +Intia (revision 21059702) +Jugoslavia (revision 20463252) +Wings (revision 20640965) +YK:n vuosituhattavoitteet (revision 20974997) +Trove (revision 20444535) +Jarmo Heikkinen (revision 19570516) +Viidakkokirja (vuoden 1967 elokuva) (revision 20185382) +Malaria (revision 20928866) +Rein (revision 20504590) +Israel (revision 21100117) +Kaa (revision 15234624) +Australia (revision 21043933) +Eisenhowerin oppi (revision 20672159) +Malediivit (revision 21059705) +Kansainvälinen suuntanumero (revision 21106282) +Toinen maailmansota (revision 21060840) +Liettuan ilmavoimat (revision 21034693) +Royal Navy (revision 20975712) +Norja (revision 21098726) +Portugali (revision 21086736) +Siperia (revision 20948582) +Jalkapallon naisten maailmanmestaruuskilpailut (revision 20929745) +Botswana (revision 20996095) +Ilkka Rekiaro (revision 20852225) +Mesopotamia (revision 21039948) +Tšekin kansalliskirjasto (revision 20963608) +Avara luonto (albumi) (revision 17791872) +HMS Vengeance (S31) (revision 20088779) +Netflix (revision 21073023) +Uusi-Seelanti (revision 20832556) +Strategic Defense Initiative (revision 21072042) +1998 (revision 21076497) +Astute-luokka (revision 17775821) +Mosambik (revision 21095560) +Swazimaa (revision 21064923) +Camp Davidin kokous 2000 (revision 21077236) +Nisäkkäät (revision 21024984) +Via Appia (revision 19888295) +SNAC (revision 20340511) +Suomen kieli (revision 21076647) +Linnut (revision 21108366) +Uzbekistan (revision 20965691) +Assuanin pato (revision 20818327) +Kulta (revision 21029759) +Nato (revision 21049954) +Vanuatu (revision 20974348) +BBC (revision 20873802) +Karsinnat jalkapallon Euroopan-mestaruuskilpailuihin 2004 (revision 18159826) +Egon Schiele (revision 20539180) +Gootit (revision 20838853) +Solidus (revision 21048510) +HMS Vigilant (S30) (revision 20088769) +Margaret Thatcher (revision 20827653) +Dubbaus (revision 20867977) +Kupiškisin kunnallispiiri (revision 20885068) +Transnistria (revision 21035858) +Gemssi (revision 20173455) +Diocletianus (revision 20415313) +Britannian pääministeri (revision 20997931) +2005 (revision 21082318) +Joseph Haydn (revision 20916394) +Englannin kieli (revision 20829497) +UEFA (revision 20678496) +Tuomo Holopainen (revision 18251717) +Tasavalta (revision 20955758) +Moyamban piirikunta (revision 20907223) +Eliöyhteisö (revision 19996015) +Saksan jalkapallomaajoukkue (revision 21093030) +Valentine Strasser (revision 20928633) +Filippiinit (revision 21108354) +Sambian historia (revision 21066392) +Jalkapallo (revision 21048960) +Liettuan rautatiet (revision 20888119) +Galerius (revision 16059965) +Mongu (revision 20330931) +CANTIC (revision 20519473) +Yleisradio (revision 21019825) +Rudyard Kiplingin viidakkokirja (revision 20264888) +Kyproksen tasavalta (revision 21109502) +Trident (revision 20011988) +Frans II (Saksa) (revision 20111793) +Togo (revision 21069236) +Jamaika (revision 21055658) +Jalkapallon alle 20-vuotiaiden naisten maailmanmestaruuskilpailut (revision 19733995) +Maailmanpankki (revision 20797276) +BZÖ (revision 20746980) +Kongressin kirjaston nimitunniste (revision 20255813) +Kongon demokraattinen tasavalta (revision 21068165) +Upholder-luokka (revision 20081478) +Sierra Leonen talous (revision 20947953) +Benito Mussolini (revision 20954243) +USD (revision 21093482) +1940-luku (revision 20593365) +Gaius Marius (revision 20807306) +Malta (revision 21066820) +Maxentius (revision 19071853) +Baselin satama (revision 20652635) +Tunisia (revision 21075200) +Brittiläinen imperiumi (revision 21053583) +Kalulushi (revision 20327301) +Rolf Klemmt (revision 20592379) +Dag Hammarskjöld (revision 21022318) +Probus (revision 21058679) +Uskontunnustus (revision 20995790) +Veri (revision 21060523) +Vilhelm III Oranialainen (revision 20454645) +Confédération africaine de football (revision 20180632) +MTV3 HD (revision 21061331) +Caymansaaret (revision 20964785) +Pekka Lehtosaari (revision 20805538) +Radiomafia (revision 21009531) +Humlegården (revision 19328994) +WorldCat Identities (revision 19693265) +Gibraltar Football League (revision 21007057) +Brasilia (revision 21075071) +Malesia (revision 21044093) +Feodalismi (revision 20594613) +101 dalmatialaista (vuoden 1961 elokuva) (revision 19610112) +Ranskan kansalliskirjasto (revision 21096977) +Jenisei (revision 19985711) +Ranskan lähiöiden väkivaltaisuudet 2005 (revision 20199546) +Krusifiksi (revision 20852091) +Felipe González (revision 20832000) +Israelin kansalliskirjasto (revision 20854961) +Guadeloupe (revision 20300349) +Johann Friedrich Gmelin (revision 19923689) +Riemukas Robinsonin perhe (revision 20067208) +Kiinan naisten jalkapallomaajoukkue (revision 20879478) +Tonni (revision 20959628) +Koripallo (revision 20966291) +Pietari (kaupunki) (revision 21066797) +Lentotukialus (revision 21033679) +Miguel Asturias (revision 20902807) +Uhanalaisuusluokitus (revision 21085039) +Ranska (revision 21070595) +Allianssi (Ruotsi) (revision 20258184) +Operaatio Barbarossa (revision 21019632) +Somalia (revision 21069870) +National and University Library in Zagreb (revision 19632374) +Riimukivi G 319 (revision 20825382) == End of Parsed pages == -- Wikipedia parsing ended at: 2021-03-16 19:06:31.128554 +- Wikipedia parsing ended at: 2022-12-14 18:01:43.322869 -65 characters appeared 1138649 times. +77 characters appeared 2016658 times. -First 30 characters: -[ 0] Char a: 12.546096294819561 % -[ 1] Char i: 10.975375203420896 % -[ 2] Char n: 8.908627680698793 % -[ 3] Char t: 8.82282424171101 % -[ 4] Char e: 7.780448584243256 % -[ 5] Char s: 7.584865924442036 % -[ 6] Char l: 5.942217487566405 % -[ 7] Char o: 5.487731513398773 % -[ 8] Char u: 5.063017663915746 % -[ 9] Char k: 4.558296718303885 % -[10] Char r: 3.1709508373519846 % -[11] Char m: 3.0275352632813095 % -[12] Char ä: 2.9864339230087587 % -[13] Char v: 2.178195387691905 % -[14] Char j: 1.9307969356667418 % -[15] Char h: 1.9113001460502754 % -[16] Char p: 1.6500256005142937 % -[17] Char y: 1.593203875821258 % -[18] Char d: 1.2042341406350858 % -[19] Char b: 0.6837049872260899 % -[20] Char g: 0.5634747845912129 % -[21] Char c: 0.4858389196319498 % -[22] Char ö: 0.38018739752109737 % -[23] Char f: 0.21982191175682764 % -[24] Char w: 0.19382619226820558 % -[25] Char z: 0.0598955428758116 % -[26] Char x: 0.02564442598201904 % -[27] Char ž: 0.009484924678281015 % -[28] Char š: 0.009309277924979516 % -[29] Char q: 0.007201516885361511 % +Most Frequent characters: +[ 0] Char a: 12.938435768484293 % +[ 1] Char i: 11.156576871239448 % +[ 2] Char t: 8.911426726792545 % +[ 3] Char n: 8.601309691578841 % +[ 4] Char s: 7.843570897990635 % +[ 5] Char e: 7.611255850025141 % +[ 6] Char l: 6.102274158533573 % +[ 7] Char o: 5.542337867898276 % +[ 8] Char u: 5.199840528240287 % +[ 9] Char k: 4.71309463478686 % +[10] Char m: 3.0828231658516216 % +[11] Char ä: 3.0396328975959235 % +[12] Char r: 2.9510705335262597 % +[13] Char v: 2.383894542356711 % +[14] Char j: 2.031182282766835 % +[15] Char p: 1.7508174415295008 % +[16] Char h: 1.5967506637218607 % +[17] Char y: 1.5530149385765957 % +[18] Char d: 1.1181370366219756 % +[19] Char g: 0.4179191513880886 % +[20] Char ö: 0.4126629304522631 % +[21] Char b: 0.36823298744754934 % +[22] Char c: 0.23935640053990315 % +[23] Char f: 0.1879842789407029 % +[24] Char w: 0.08717392835076647 % +[25] Char z: 0.05424816701691611 % +[26] Char x: 0.023305885281490465 % +[27] Char š: 0.01398353116889428 % +[28] Char q: 0.013735596219091189 % -The first 30 characters have an accumulated ratio of 0.9996056730388382. +The first 29 characters have an accumulated ratio of 0.9994604935492284. +The first 3 characters have an accumulated ratio of 0.3300643936651628. +All characters whose order is over 16 have an accumulated ratio of 0.04489754832004237. -940 sequences found. +1166 sequences found. -First 512 (typical positive ratio): 0.9985812031154878 -Next 512 (512-1024): 9.484924678281016e-05 -Rest: 2.7321894746634712e-17 +First 398 (typical positive ratio): 0.9950171899546914 +Next 196 (594-398): 0.003983288176498734 +Rest: 0.0009995218688099161 -- Processing end: 2021-03-16 19:06:31.204594 +- Processing end: 2022-12-14 18:01:43.436524 diff --git a/script/BuildLangModelLogs/LangFrenchModel.log b/script/BuildLangModelLogs/LangFrenchModel.log index 6328353..3988325 100644 --- a/script/BuildLangModelLogs/LangFrenchModel.log +++ b/script/BuildLangModelLogs/LangFrenchModel.log @@ -1,159 +1,262 @@ = Logs of language model for French (fr) = - Generated by BuildLangModel.py -- Started: 2021-03-16 01:17:58.545030 +- Started: 2022-12-14 17:24:07.769026 - Maximum depth: 4 -- Max number of pages: 100 +- Max number of pages: 200 == Parsed pages == -Wikipédia:Accueil_principal (revision 164303621) -Bœuf (animal) (revision 178255345) -10 mars (revision 180841287) -12 mars (revision 180798998) -13 mars (revision 180904703) -1493 (revision 163870551) -14 mars (revision 180901488) -15 mars (revision 180904428) -1891 (revision 180890066) -1917 (revision 178369116) -1939 (revision 178458019) -2011 (revision 176114496) -45e parallèle nord (revision 180910832) -6 mars (revision 180750121) -7 mars (revision 180750121) -Absolutisme (revision 179767600) -Alassane Ouattara (revision 180842696) -Ambassadeur (revision 180674153) -Amiral de France (revision 177268292) -Amirautés de Bretagne (revision 175194082) -Aurora Cornu (revision 180901231) -Bata (Guinée équatoriale) (revision 180763894) -Bob Walkup (revision 180908319) -Bourgogne-Franche-Comté (revision 180662628) -Centre de données (revision 180741567) -Championnats du monde de ski acrobatique 2021 (revision 180882257) -Christophe Colomb (revision 180494940) -Claude Debussy (revision 179962158) -Couronne solaire (revision 180875717) -Crise présidentielle depuis 2019 au Venezuela (revision 180336636) -Critique musical (revision 174352172) -Côte d'Ivoire (revision 180838790) -Daniel Vachez (revision 180915214) -Degré Celsius (revision 179948881) -Deuxième République (Tchécoslovaquie) (revision 180896689) -Deuxième guerre civile libyenne (revision 180269091) -Empire romain (revision 180843240) -Empire russe (revision 179593986) -Excommunication (revision 178073962) -Explosions de Bata (revision 180862772) -Fatima Aziz (revision 180862495) -Fort du Lomont (revision 180886100) -Frankie de la Cruz (revision 180903250) -GINK (revision 179590111) -Giovanni Gastel (revision 180881061) -Goodwill Zwelithini kaBhekuzulu (revision 180806403) -Gouvernement de l'Église catholique (revision 176961659) -Guerre civile syrienne (revision 180897321) -Guerre civile yéménite (revision 180691885) -Guerre du Tigré (revision 180793174) -Guinée équatoriale (revision 180759310) -Hamed Bakayoko (revision 180904779) -Helena Fuchsová (revision 180909783) -Henri-Charles de Beaumanoir de Lavardin (revision 180903071) -Henry Darrow (revision 180905848) -Heure en France (revision 180854115) -Incendie du centre de données d'OVHcloud à Strasbourg (revision 180901025) -Innocent XI (revision 180108629) -Ivo Trumbić (revision 180827381) -Jean-Claude Fasquelle (revision 180871354) -Jean-Jacques Viton (revision 180889491) -Jean Frydman (revision 180909934) -Le Mans (revision 180520548) -Lieutenant général (revision 180899945) -Liste des ambassadeurs de France près le Saint-Siège (revision 180150184) -Manifestation des agriculteurs indiens de 2020-2021 (revision 180901643) -Manifestations de 2020-2021 en Arménie (revision 180901656) -Manifestations de 2020-2021 en Biélorussie (revision 180901634) -Manifestations de 2021 au Sénégal (revision 180900196) -Manifestations de 2021 en Birmanie (revision 180901671) -Manifestations de 2021 en Russie (revision 180897927) -Manifestations de Deraa (revision 180914771) -Mars 1891 (revision 155220626) -Mars 2021 (revision 180914744) -Marvin Hagler (revision 180908678) -Militaire (revision 178062901) -Murray Walker (revision 180862148) -OVHcloud (revision 180900746) -Obren Joksimović (revision 180901629) -Palais Farnèse (revision 180885444) -Pandémie de Covid-19 (revision 180845115) -Pays-Bas (revision 180853920) -Photosphère (revision 179722426) -Premier ministre ivoirien (revision 180838804) -Province de Bretagne (revision 176523092) -Président de la république de Côte d'Ivoire (revision 180747416) -Pôle Nord (revision 178839482) -Querelle des Franchises (revision 180092394) -Raoul Casadei (revision 180910155) -Rassemblement des houphouëtistes pour la démocratie et la paix (revision 180912125) -Roi des Français (revision 180882393) -Ronald DeFeo Jr. (revision 180915749) -Royaume de France (revision 180809662) -Révolte du Papier timbré (revision 180903105) +Bœuf (animal) (revision 197762352) +XXe siècle (revision 197314968) +Japon (revision 199440474) +Promenade du Bœuf Gras au Carnaval de Paris (revision 198921694) +Wagyu (revision 199077695) +Bœuf Gras (revision 188284665) +Hypoxie (revision 197200033) +Animal de trait (revision 192624489) +Chapon (revision 192387218) +Viande (revision 199230345) +Taureau (revision 197805532) +Bien-être animal (revision 197276347) +Charrue (revision 198345893) +Auvergne-Rhône-Alpes (revision 199364003) +Liste de races bovines (revision 197898563) +Accident nucléaire de Fukushima (revision 199486893) +Droit de veto au Conseil de sécurité des Nations unies (revision 196947109) +Élevage laitier (revision 190532865) +Emploi (revision 197176214) +Plantation (revision 199480678) +Promenade des bœufs gras de Bazas (revision 189976543) +Cardiopathie congénitale (revision 192423847) +Culture sur billons (revision 195101325) +Asthme (revision 198323744) +Viande de sanglier (revision 179794689) +Vous avez deux vaches (revision 194794493) +Moumou (revision 186126702) +Beefalo (revision 182751471) +Cancer du poumon (revision 199432202) +Année bissextile commençant un jeudi (revision 189317542) +Encyclopaedia Judaica (revision 198629701) +Vache de réforme (revision 198505707) +Roue (revision 198945438) +Intestin (revision 187979684) +Paris (revision 199407080) +Chien d'attelage (revision 199091106) +Poularde (revision 199412191) +Éléphant (revision 199431027) +Chartres (revision 199133376) +Freguesia (revision 196252400) +SIDA (revision 199056794) +Yves Klein (revision 199006394) +Voltaire (revision 199380285) +Anderlecht (revision 199480150) +Histoire de Bos taurus (revision 198033845) +Avril 1908 (revision 148523682) +Liste des villes du Japon par nombre d'habitants (revision 187069501) +Hori hori (revision 170153454) +Organisation mondiale du commerce (revision 198469590) +Juridique (revision 197813353) +Rochefort (Charente-Maritime) (revision 199467149) +Enjambeur (revision 185177634) +San Damiano d'Asti (revision 189018885) +Sarcloir (revision 198197352) +Équinoxe (revision 198251311) +Traîneau (revision 192374700) +The Atlantic (revision 194397713) +Madagascar (revision 199420783) +21 février (revision 199267739) +Henri Bergson (revision 199055378) +La traviata (revision 199487782) +Vache (revision 198433944) +Propagande nazie (revision 199398929) +Devon (race bovine) (revision 197767757) +Bâton fouisseur (revision 189798346) +Cas chrom (revision 111673866) +Freamunde (revision 177742159) +Préfecture d'Okayama (revision 196247485) +Bœuf Ōmi (revision 149332924) +Économie (activité humaine) (revision 199467073) +Octobre 1965 (revision 197623359) +Réception des ondes radioélectriques (revision 196887230) +Élections législatives japonaises de 1946 (revision 199224897) +Zezengorri (revision 142037030) +Infection urinaire (revision 198957431) +Végétarisme bouddhique (revision 185852036) +Taurillon (revision 191875389) +Bresse (revision 199356510) +Muscles (revision 199047991) +Mexico (revision 199324440) +Les Plaideurs (revision 198167903) +Semis direct sur couverture végétale (revision 170722304) +Grands Boulevards (revision 197923148) +Inventaire du patrimoine culturel immatériel en France (revision 198148530) +XIXe siècle (revision 199139537) +Élevage bovin (revision 198420240) +Insurrection de Budapest (revision 198673864) +Tauromachie (revision 199434508) +Grec ancien (revision 198516929) +Automutilation (revision 197397777) +Inflammatoire (revision 193742153) +Bos taurus (revision 199412667) +Pulvériseur (revision 186839949) +Labour (revision 196168038) +Machinisme agricole (revision 199031460) +Années 1980 (revision 199333913) +Respiration végétale (revision 197432202) +Avril 1915 (revision 158258956) +Bœuf de Matsusaka (revision 149332920) +Reille (agriculture) (revision 197676435) +Université du Colorado (revision 196119303) +Achille Millien (revision 198229129) +1940 (revision 199107985) +Fondation droit animal, éthique et sciences (revision 194147804) +Earthlings (revision 199150202) +Bergerac (Dordogne) (revision 199488125) +Gary Francione (revision 195818325) +Locomobile (revision 199386864) +Herse rotative (revision 198382851) +Dialogue du chapon et de la poularde (revision 165086172) +Lipide (revision 197487379) +Judaïsme (revision 199257400) +Promenade de la Vache enragée (revision 193039892) +Louis Pierre Louvel (revision 196412232) +5 mai (revision 197943843) +Années 1950 (revision 199051303) +Douleur (revision 199311261) +Épuration des eaux (revision 198836164) +Afeitado (revision 198850517) +Fièvre aphteuse (revision 192288205) +Liste des groupes de défense des animaux (revision 195785382) +Castration (revision 199351966) +Altitude (revision 197083929) +Décembre 2012 (revision 189589506) +Accident de décompression (revision 197317404) +Bœuf de Kobe (revision 192878601) +Moine bouddhique (revision 192276444) +28 octobre (revision 199307936) +Appellation d'origine contrôlée (revision 195566459) +Hyperoxie (revision 189893734) +Wolof (langue) (revision 198972423) +Poumons (revision 199475476) +Issoudun (revision 198654033) +Chiba (revision 194584167) +Turin (revision 199355679) +Symptôme (revision 194372998) +Loiret (département) (revision 198732270) +Sidney Poitier (revision 197881821) +Silhouette (art) (revision 198119381) +Domestication de Bos taurus (revision 198033845) +Les Échos (revision 195999786) +International Standard Book Number (revision 198513156) +Race Normande (revision 197806831) +Fumier (revision 198412120) +Espagne (revision 199335267) +Basic Books (revision 194697820) +IndyCar Series (revision 199173849) +Serfouette (revision 186632129) +Académie royale de danse (revision 198168760) +Il trovatore (revision 199375571) +Féminisme (revision 199415746) +Panzerkampfwagen VI Königstiger (revision 197699673) +Ur (Mésopotamie) (revision 199308474) +30 avril (revision 198763782) +Juifs (revision 196788531) +Mulet (revision 199468728) +Belgique (revision 199414084) +Djèliya (revision 195806261) +Lot (département) (revision 199454152) +Pensées (Pascal) (revision 199088482) +Thibaud Vaneck (revision 198822937) +Jeu du sabot (revision 193665949) +Faibles doses d'irradiation (revision 197214963) +Hippolyte et Aricie (revision 197873522) +Troc (revision 197420848) +Cerf élaphe (revision 198090991) +Œuf séminal (revision 186883685) +David Graeber (revision 199310008) +Medical Subject Headings (revision 190971390) +Voûte (revision 194124794) +Recensement de la population (revision 199399818) +Puy-de-Dôme (revision 199423242) +Semaine 13 (revision 169860242) +Slovénie (revision 199126837) +Pharmacogénétique (revision 188648659) +Équateur céleste (revision 181903536) +Carl Friedrich Cramer (revision 193538766) +Subdivisions du Cap-Vert (revision 185491136) +Rynn Berry (revision 195828915) +Herd-book (revision 195636266) +Allier (département) (revision 199092275) +European Underwater Federation (revision 194698355) +14 juin (revision 198391679) +CMAS (revision 186732686) +Bassin osseux (revision 198256708) +Pruneau (revision 196595161) +Serpentin (fête) (revision 179535119) +DORIS (système satellitaire) (revision 192256505) +Veau d'or (revision 196918081) +Noble Chemin octuple (revision 197438183) +Race bovine canadienne (revision 195317201) +Route nationale 23 (France) (revision 199357272) +Syndicat (revision 195521623) +Fils porteurs (revision 112617611) +Tadjikistan (revision 199420685) == End of Parsed pages == -- Wikipedia parsing ended at: 2021-03-16 01:24:27.092152 +- Wikipedia parsing ended at: 2022-12-14 17:27:44.115205 -57 characters appeared 1900431 times. +59 characters appeared 3416318 times. -First 38 characters: -[ 0] Char e: 14.210092342210793 % -[ 1] Char a: 8.0327567799094 % -[ 2] Char s: 7.818647454182762 % -[ 3] Char i: 7.531554684174274 % -[ 4] Char n: 7.491616375443256 % -[ 5] Char r: 7.05650455080979 % -[ 6] Char t: 6.771779664718161 % -[ 7] Char l: 5.854461435327039 % -[ 8] Char o: 5.412772155368966 % -[ 9] Char u: 5.014546700195903 % -[10] Char d: 4.239248886173716 % -[11] Char c: 3.238896860764742 % -[12] Char m: 2.8875028875028876 % -[13] Char p: 2.787104609428072 % -[14] Char é: 2.546790701688196 % -[15] Char v: 1.3356443880361877 % -[16] Char g: 1.1728392138414918 % -[17] Char f: 1.1096956427252553 % -[18] Char b: 1.084859171419536 % -[19] Char h: 0.9054261901642312 % -[20] Char q: 0.7540920980556516 % -[21] Char y: 0.42858698895145364 % -[22] Char x: 0.4087493836924361 % -[23] Char à: 0.39127966235027745 % -[24] Char è: 0.3704422838819194 % -[25] Char j: 0.35176231076003284 % -[26] Char k: 0.17332910271406854 % -[27] Char z: 0.11539487621492178 % -[28] Char ê: 0.10397641377140239 % -[29] Char ç: 0.09292628882606103 % -[30] Char ô: 0.07540394784130547 % -[31] Char w: 0.06340666932922058 % -[32] Char î: 0.031729644485908724 % -[33] Char û: 0.029309140926453 % -[34] Char â: 0.02504694987610705 % -[35] Char ï: 0.019942844544211285 % -[36] Char ù: 0.016259469562430837 % -[37] Char œ: 0.010839646374953892 % +Most Frequent characters: +[ 0] Char e: 14.285496841921624 % +[ 1] Char s: 7.930819086513609 % +[ 2] Char a: 7.75545484934365 % +[ 3] Char i: 7.446701390210161 % +[ 4] Char n: 7.317117434618206 % +[ 5] Char t: 6.9079342145549685 % +[ 6] Char r: 6.74972294733687 % +[ 7] Char l: 5.854490126504617 % +[ 8] Char o: 5.4769198886052175 % +[ 9] Char u: 5.377543893747596 % +[10] Char d: 4.170513400684596 % +[11] Char c: 3.40398639705086 % +[12] Char p: 2.9246106480719885 % +[13] Char m: 2.8418607401301634 % +[14] Char é: 2.4772869504536756 % +[15] Char v: 1.2920635608277684 % +[16] Char g: 1.2142604991689885 % +[17] Char f: 1.1114597645769508 % +[18] Char b: 0.9878471500603867 % +[19] Char h: 0.9766655211839179 % +[20] Char q: 0.8268258399832803 % +[21] Char x: 0.4753948549286103 % +[22] Char à: 0.3941377822556331 % +[23] Char è: 0.3814925893900978 % +[24] Char y: 0.3756383334338314 % +[25] Char j: 0.3037187990110991 % +[26] Char k: 0.13953619071760884 % +[27] Char ê: 0.11614843817232472 % +[28] Char z: 0.11450924650457013 % +[29] Char ç: 0.06255272489270612 % +[30] Char w: 0.05219069185011466 % +[31] Char ô: 0.04639497845341095 % +[32] Char â: 0.04455088782718705 % +[33] Char î: 0.038462461632670024 % +[34] Char œ: 0.03290091847421698 % +[35] Char ï: 0.02546601340975869 % +[36] Char û: 0.02057770968627628 % +[37] Char ù: 0.019084874417428354 % -The first 38 characters have an accumulated ratio of 0.9996521841624343. +The first 38 characters have an accumulated ratio of 0.9997233864060664. +The first 4 characters have an accumulated ratio of 0.3741847216798905. +All characters whose order is over 19 have an accumulated ratio of 0.03469583335040825. -1049 sequences found. +1255 sequences found. -First 512 (typical positive ratio): 0.997006678170155 -Next 512 (512-1024): 0.00010839646374953892 -Rest: 1.646491655585584e-05 +First 465 (typical positive ratio): 0.995025880940353 +Next 182 (647-465): 0.0039795148879469 +Rest: 0.000994604171700053 -- Processing end: 2021-03-16 01:24:27.266283 +- Processing end: 2022-12-14 17:27:44.209885 diff --git a/script/BuildLangModelLogs/LangGermanModel.log b/script/BuildLangModelLogs/LangGermanModel.log index 8d52ec6..d7f1c25 100644 --- a/script/BuildLangModelLogs/LangGermanModel.log +++ b/script/BuildLangModelLogs/LangGermanModel.log @@ -1,150 +1,257 @@ = Logs of language model for German (de) = - Generated by BuildLangModel.py -- Started: 2021-03-16 01:05:29.301622 +- Started: 2022-12-14 18:01:47.695976 - Maximum depth: 4 -- Max number of pages: 100 +- Max number of pages: 200 == Parsed pages == -Wikipedia:Hauptseite (revision 201839754) -1021 (revision 209824844) -1521 (revision 209838003) -16. März (revision 209315535) -1861 (revision 209842356) -1946 (revision 209524711) -1951 (revision 209835290) -Beyoncé (revision 209832932) -Bolivien (revision 209448707) -Bund der Schweizerinnen gegen das Frauenstimmrecht (revision 209693790) -Bundesgrenzschutz (revision 208691250) -Clara Weaver Parrish (revision 209287165) -Dornmühle (Fränkisch-Crumbach) (revision 209842366) -Edmund Weiskopf (revision 209843848) -Enrico Letta (revision 209811620) -Enzyklopädie (revision 209393223) -Ferdinand Magellan (revision 209566955) -Freie Inhalte (revision 207460431) -Geschichte der Bundesrepublik Deutschland (bis 1990) (revision 209662112) -Giovanni Gastel (revision 209840651) -Henry Darrow (revision 209836134) -Heribert von Köln (revision 208577962) -Homonhon (revision 207392862) -Internationales Olympisches Komitee (revision 209815926) -Jeanine Áñez (revision 209843969) -Jeanne d’Arc Mujawamariya (revision 209842628) -Kommunalwahlen in Hessen 2021 (revision 209834340) -Landtagswahl in Baden-Württemberg 2021 (revision 209842530) -Mark Lubotsky (revision 209830272) -Marvelous Marvin Hagler (revision 209843820) -Max Blokzijl (revision 209843982) -Molly Pitcher (revision 209843994) -Murray Walker (revision 209841073) -März 2021 (revision 209804897) -Nekrolog 2021 (revision 207237920) -Oscarverleihung 2021 (revision 209715006) -Thomas Bach (revision 209739384) -1. Dezember (revision 209839074) -1. Januar (revision 209777781) -1. November (revision 209796293) -10. Februar (revision 209675106) -10. Mai (revision 208810425) -10. März (revision 209821650) -11. Juli (revision 209510718) -11. März (revision 209819434) -11. November (revision 209630921) -12. Dezember (revision 209724301) -12. Mai (revision 208883973) -12. März (revision 209795040) -12. September (revision 209262794) -13. Dezember (revision 209710424) -13. Januar (revision 209629276) -13. März (revision 209795132) -13. Oktober (revision 209183744) -14. Februar (revision 209414444) -14. September (revision 209562392) -16. April (revision 209621904) -19. August (revision 208018991) -1920 (revision 209819215) -1921 (revision 209733600) -1923 (revision 209799201) -1924 (revision 209534204) -1925 (revision 209632533) -1926 (revision 209684778) -1927 (revision 209374750) -1929 (revision 209747684) -1930 (revision 209715589) -1931 (revision 209767120) -1933 (revision 209704894) -1934 (revision 209767120) -1936 (revision 209834629) -1939 (revision 209524711) -1940 (revision 209524711) -1941 (revision 209524711) -1942 (revision 209524711) -1944 (revision 209505481) -1945 (revision 209524711) -1947 (revision 209505481) -1948 (revision 209767120) -1950 (revision 209655464) -1952 (revision 209572541) -1954 (revision 209187815) -1955 (revision 209259419) -1957 (revision 209842142) -1965 (revision 209593366) -1980er (revision 209258403) -1990er (revision 209258403) -2. März (revision 209835819) -2. September (revision 209803579) -20. April (revision 209655478) -20. Jahrhundert (revision 207914301) -20. Januar (revision 209517100) +Deutschland (revision 228606777) +Europäische Weltraumorganisation (revision 228631712) +Drittes Deutschland (revision 228606789) +Nord-Ostsee-Kanal (revision 228177589) +Mobiles Einsatzkommando (revision 228450531) +Kap Verde (revision 228678091) +Carl Friedrich Gauß (revision 228774636) +Landgericht (revision 226082390) +Führerbunker (revision 228712614) +Gleichgeschlechtliche Ehe (revision 228511612) +Adolf Hitler (revision 228703587) +Braunkohle (revision 226387804) +Alleinvertretungsanspruch (revision 225813016) +Yukon (Territorium) (revision 225628885) +Ukraine (revision 228845131) +Reichstagswahl März 1933 (revision 227850752) +Diplomatie (revision 227049338) +Wolfgang Sartorius von Waltershausen (revision 226100655) +Eingetragene Partnerschaft-Gesetz (revision 223769401) +Kokerei (revision 228349930) +Arbeitsstelle Schacht (revision 216781123) +The Economist (revision 223242881) +Dresden (revision 228085168) +Prager Frieden (1866) (revision 228310704) +Souveränität (revision 228606540) +Verdeckter Ermittler (revision 228598315) +Kudensee (Gemeinde) (revision 223999678) +Präsidialkabinett (revision 223203158) +Heinrich Köhler (Architekt) (revision 228323082) +Nordsee (revision 228806907) +Pedra Badejo (revision 218295942) +Frankreich (revision 228628493) +Fritz Fischer (Historiker) (revision 224583221) +Magda Goebbels (revision 228021587) +Kilowattstunde (revision 228120231) +Rekonstruktionismus (revision 224403087) +Nordrhein-Westfalen (revision 228643403) +Der Standard (revision 228424043) +Wirbelschicht (revision 215906776) +Deutsches Reich (revision 228296021) +Legislaturperiode (revision 227735892) +The Times of Israel (revision 219517881) +Jens Frahm (revision 228560907) +Innenministerkonferenz (revision 226787756) +Mord (Deutschland) (revision 227089296) +Matthias Maurer (revision 227875909) +Bundesrepublik Deutschland (revision 228606777) +Berghof (Obersalzberg) (revision 228474755) +Niedersachsen (revision 228703722) +Beschuldigter (revision 225097643) +Magnetometer (revision 218588734) +Villeseen (revision 223042618) +Königreich Preußen (revision 228606517) +Espargos (revision 219889004) +Reichsritterschaft (revision 228363926) +Friedensrede vom 17. Mai 1933 (revision 225886430) +Hessen (revision 228606788) +Jütland (revision 228394509) +Volkspartij voor Vrijheid en Democratie (revision 226870900) +Österreichische Forschungsförderungsgesellschaft (revision 224880189) +Grundrechte (revision 226071146) +Land (Deutschland) (revision 228675273) +Vietnam (revision 226752482) +Wattstunde (revision 228120231) +Cyanwasserstoff (revision 228235306) +Meteosat (revision 228834931) +Baden-Württemberg (revision 228806297) +Weltkrieg (revision 228580588) +Todesfolge (revision 145499017) +Tunis (revision 228800561) +Süddeutscher Bund (revision 227461496) +Territorialkommando Schleswig-Holstein (revision 221409368) +Totschlag (Deutschland) (revision 227541298) +Erik Quistgaard (revision 223686958) +Bremen (revision 228615505) +Wetterau (revision 227117387) +Internet Archive (revision 228310076) +Hoher Meißner (revision 226631071) +Liste der Mitgliedstaaten im Deutschen Bund (revision 224823406) +Xylit (Kohle) (revision 206643095) +MacTutor History of Mathematics archive (revision 228718820) +Berlin-Tempelhof (revision 228805360) +Mars Express (revision 222991893) +Peilung (revision 225351085) +Gaußstein (Garlste) (revision 203812700) +Gaußturm (revision 225175634) +Norwegische Kirche (revision 226534082) +Sicherungsverwahrung (revision 228641206) +Deutsche Staatsangehörigkeit (revision 228705305) +Adjutant (revision 219998309) +Afrika-Cup 2013 (revision 227673847) +Liste der Straßen und Plätze in Berlin-Tempelhof (revision 228847763) +Ceuta (revision 226278282) +Ian Kershaw (revision 228835386) +Krimkrieg (revision 228783275) +Gasplanet (revision 227221507) +Unionspolitik Preußens (revision 228606763) +Asche (revision 226787217) +United States Army Air Forces (revision 228309083) +Beschwerde (deutsches Recht) (revision 214028503) +Leipziger Neuseenland (revision 221106485) +Baden (Land) (revision 227966043) +Bernd Freytag von Loringhoven (revision 226707396) +Radium (revision 228320807) +Peter Altmaier (revision 227105204) +Hans Krebs (Offizier) (revision 228713044) +Nordkorea (revision 228189588) +Helmuth Weidling (revision 227613393) +Bosnien und Herzegowina (revision 228852382) +Parthenstein (revision 227664873) +Wolfram Siemann (revision 228568083) +Panamakanal (revision 228117717) +Polizeipräsidium Frankfurt (revision 226038807) +Ordentliche Gerichtsbarkeit (Deutschland) (revision 225830061) +Gesamtdeutschland (revision 219527044) +Tirpitz (Schiff, 1941) (revision 228794616) +European Communications Satellite (revision 217651630) +Hauptverfahren (revision 218232627) +Deutsche Reichsgründung (revision 228606592) +Jeremy Gray (revision 213540756) +Legitimation (Politikwissenschaft) (revision 225956199) +Sergipe (revision 224297586) +Jean-Jacques Dordain (revision 222991624) +Sven Felix Kellerhoff (revision 228772596) +Bundesregierung (Deutschland) (revision 228697382) +Ochse (revision 228719196) +Saint Charles (Missouri) (revision 224936649) +Deutsch-Dänischer Krieg (revision 228197313) +Voßstraße (revision 228686339) +Partido Africano da Independência de Cabo Verde (revision 227072061) +Deutschsozialistische Partei (revision 187114420) +Schweizer Radio und Fernsehen (revision 226501521) +Saarland (revision 228639211) +Rheinland-Pfalz (revision 228606703) +Elektromagnetische Induktion (revision 228108340) +Motorradbau in Zschopau (revision 228827895) +Ostermoor (Brunsbüttel) (revision 219522219) +Die Mitte (revision 228380591) +Johann Benedict Listing (revision 226896760) +Rassentheorie (revision 228579154) +Petschaft (revision 228247141) +Albrecht von Roon (revision 228247856) +Transnistrien (revision 228611915) +Eheurkunde (revision 202526493) +Freiheitsstrafe (Deutschland) (revision 223745601) +Gemeinsame Normdatei (revision 228265239) +José Maria Neves (revision 228300549) +Tito Paris (revision 185252119) +Republik Venedig (revision 228315027) +Satellitenrundfunk (revision 227353304) +Deutscher Bund (revision 228606520) +Hera (Weltraummission) (revision 227275109) +Pazifismus (revision 228017031) +Brandenburg (revision 228781446) +Verteidigungsbezirkskommando 72 (revision 194243774) +Andros (Griechenland) (revision 228178371) +Zeitschrift des Architekten- und Ingenieur-Vereins zu Hannover (revision 211997745) +Potsdamer Abkommen (revision 228673933) +Geomagnetik (revision 217876160) +Waldfriedhof Blumenthal (revision 224504027) +Gagausen (revision 228276606) +Chinesische Sprache (revision 227498878) +Alliierter Kontrollrat (revision 228771218) +Tag (revision 227420267) +Volkssouveränität (revision 214749567) +Neuzeit (revision 226916078) +Strafrecht (revision 226756439) +Coladeira (revision 219447528) +Wärme (revision 228787931) +Mars Telecommunications Orbiter (revision 193518640) +Deutschlandradio Kultur (revision 226585090) +Satellitenfernsehempfänger (revision 219434329) +Wilhelm Keitel (revision 228713047) +Deutsches Filminstitut (revision 226886230) +Bad Nauheim (revision 228590902) +Lotharii Regnum (revision 207527682) +Glyptik (revision 226675680) +Kraft (revision 228696624) +Louvre (revision 228822258) +Erwachsenenstrafrecht (revision 199484874) +Archive.today (revision 227419265) +England (revision 228567914) +Untersee (Brühl) (revision 225655608) +Ära Adenauer (revision 226970446) +Schlitten (revision 228844670) +Michael Kotulla (revision 227619359) +Menschenwürde (revision 228266910) +Sevdalinka (revision 227976551) +NEEMO (revision 219617837) +Palais Batthyány (revision 203778661) +Rhein (revision 228807016) +Santa Cruz (Concelho) (revision 204607767) +Richard Friedländer (revision 221137334) +Hürth-Knapsack (revision 226821270) +Oberthal (Saar) (revision 228328803) +Marke (Recht) (revision 225403469) +Otto Neugebauer (revision 222692022) +Hecksche Formel (revision 224432126) == End of Parsed pages == -- Wikipedia parsing ended at: 2021-03-16 01:10:34.749053 +- Wikipedia parsing ended at: 2022-12-14 18:05:20.300233 -59 characters appeared 3848604 times. +60 characters appeared 4279054 times. -First 31 characters: -[ 0] Char e: 13.62925362027374 % -[ 1] Char r: 9.404189155340482 % -[ 2] Char i: 8.18457809636949 % -[ 3] Char n: 7.829540269666611 % -[ 4] Char s: 6.804155480792516 % -[ 5] Char a: 6.737923673103287 % -[ 6] Char t: 5.6408765360115 % -[ 7] Char h: 4.424695292111114 % -[ 8] Char u: 4.194118178955279 % -[ 9] Char l: 4.1823216937881895 % -[10] Char d: 4.112010484840737 % -[11] Char o: 3.6970808116397533 % -[12] Char c: 3.4451453046351355 % -[13] Char m: 2.8236732072200725 % -[14] Char g: 2.3015618130626065 % -[15] Char b: 2.0475736137051253 % -[16] Char k: 1.9373258459431004 % -[17] Char p: 1.6796479970399656 % -[18] Char f: 1.6060368902594293 % -[19] Char z: 1.0385064298639195 % -[20] Char w: 0.9370410673584499 % -[21] Char v: 0.7894031186373033 % -[22] Char j: 0.6687879553209424 % -[23] Char ä: 0.5280616036360197 % -[24] Char y: 0.35885739348605367 % -[25] Char ü: 0.33731711550473886 % -[26] Char ö: 0.27194276158316105 % -[27] Char ß: 0.13979094757475696 % -[28] Char x: 0.09044838076351841 % -[29] Char é: 0.04185933392991329 % -[30] Char q: 0.02814007364748361 % +Most Frequent characters: +[ 0] Char e: 15.976334956277718 % +[ 1] Char n: 9.628436565652128 % +[ 2] Char i: 7.824299483016574 % +[ 3] Char r: 7.8062814818415465 % +[ 4] Char s: 6.514266938440132 % +[ 5] Char t: 6.32656189896178 % +[ 6] Char a: 5.785998494059668 % +[ 7] Char d: 5.297806477786913 % +[ 8] Char h: 4.162158271431022 % +[ 9] Char u: 3.9845956606296626 % +[10] Char l: 3.6868429330408077 % +[11] Char g: 3.0796292825470304 % +[12] Char c: 2.77853469481806 % +[13] Char o: 2.7206714381262773 % +[14] Char m: 2.5168179695792574 % +[15] Char b: 2.0866761672089207 % +[16] Char f: 1.6615822095257502 % +[17] Char k: 1.465861379641388 % +[18] Char w: 1.3530327030226774 % +[19] Char z: 1.1850516492664034 % +[20] Char p: 1.0030020654097844 % +[21] Char v: 1.0002911858555654 % +[22] Char ä: 0.5755477729423373 % +[23] Char ü: 0.5585580364258081 % +[24] Char ö: 0.30097773947232265 % +[25] Char j: 0.2529764756415787 % +[26] Char ß: 0.20866761672089204 % +[27] Char y: 0.13566082596760873 % +[28] Char x: 0.06228012079305379 % +[29] Char q: 0.028604453227278736 % -The first 31 characters have an accumulated ratio of 0.9991186414606439. +The first 30 characters have an accumulated ratio of 0.9996800694732993. +The first 3 characters have an accumulated ratio of 0.3342907100494642. +All characters whose order is over 20 have an accumulated ratio of 0.031235642270464454. -1337 sequences found. +1194 sequences found. -First 512 (typical positive ratio): 0.9936565191798025 -Next 512 (512-1024): 0.0033731711550473885 -Rest: 0.00017862552962171364 +First 494 (typical positive ratio): 0.9950332887709427 +Next 153 (647-494): 0.003970985671306937 +Rest: 0.0009957255577504043 -- Processing end: 2021-03-16 01:10:34.853392 +- Processing end: 2022-12-14 18:05:20.372064 diff --git a/script/BuildLangModelLogs/LangGreekModel.log b/script/BuildLangModelLogs/LangGreekModel.log index ee210e2..3f69b67 100644 --- a/script/BuildLangModelLogs/LangGreekModel.log +++ b/script/BuildLangModelLogs/LangGreekModel.log @@ -1,174 +1,255 @@ = Logs of language model for Greek (el) = - Generated by BuildLangModel.py -- Started: 2021-03-16 18:54:42.415198 +- Started: 2022-12-14 18:02:21.368215 - Maximum depth: 4 -- Max number of pages: 100 +- Max number of pages: 200 == Parsed pages == -Πύλη:Κύρια (revision 7950664) -16 Μαρτίου (revision 8737120) -1797 (revision 8019834) -1839 (revision 8019704) -1900 (revision 7952521) -1901 (revision 7905277) -1935 (revision 8290828) -Mars 2020 (revision 8718725) -Perseverance (ρόβερ) (revision 8718754) -The Economist (revision 8341010) -Wiki (revision 8595867) -Wikimedia (revision 8518678) -Άρθουρ Έβανς (revision 8502931) -Άρθρουρ Γουέλσλεϋ, Δούκας του Ουέλλινγκτον (revision 8423158) -Αγγλική γλώσσα (revision 8702613) -Αδόλφος Χίτλερ (revision 8722090) -Αντισφαίριση (revision 8557812) -Αρειανό ελικόπτερο Ingenuity (revision 8718783) -Αυστραλιανό Όπεν (revision 8078988) -Βέρμαχτ (revision 8711795) -Βραβεία Νόμπελ Λογοτεχνίας (revision 8519145) -Γαλλία (revision 8680274) -Γενικός Διευθυντής του Παγκόσμιου Οργανισμού Εμπορίου (revision 8694448) -Γερμανία (revision 8724575) -Εγκυκλοπαίδεια (revision 8687200) -Ελεύθερο περιεχόμενο (revision 8707719) -Ελληνική Βικιπαίδεια (revision 8731090) -Κνωσός (revision 8697910) -Κρήτη (revision 8735869) -Λονδίνο (revision 8666776) -Ναόμι Οσάκα (revision 8736512) -Νγκόζι Οκόντζο-Ιουεάλα (revision 8716446) -Νόβακ Τζόκοβιτς (revision 8735633) -Ουίλιαμ Μπάντινγκ (revision 8298356) -Παγκόσμιος Οργανισμός Εμπορίου (revision 8694448) -Πατριάρχης Σερβίας Πορφύριος (revision 8716966) -Σερβική Ορθόδοξη Εκκλησία (revision 8703081) -Συλί Προυντόμ (revision 8736464) -Συνθήκη των Βερσαλλιών (revision 7991516) -10 Μαρτίου (revision 8726574) -1185 (revision 8532989) -1190 (revision 8729267) -11 Μαρτίου (revision 8730381) -1244 (revision 7906151) -12 Μαρτίου (revision 8730152) -13 Μαρτίου (revision 8544014) -1405 (revision 7906083) -1410 (revision 7906088) -1465 (revision 7905889) -1473 (revision 8687951) -1478 (revision 7905905) -14 Μαρτίου (revision 8096796) -15 Μαρτίου (revision 8734431) -1670 (revision 8120689) -1751 (revision 8019900) -1782 (revision 8019823) -1789 (revision 8019786) -1792 (revision 8019828) -1794 (revision 8019829) -17 Μαρτίου (revision 8233521) -1802 (revision 8019791) -1812 (revision 8019794) -1815 (revision 8728979) -1859 (revision 8019719) -1872 (revision 8019620) -1888 (revision 8678352) -1892 (revision 8019578) -1894 (revision 8019646) -1898 (revision 7905275) -18 Μαρτίου (revision 8666328) -1906 (revision 8019564) -1908 (revision 8110859) -1911 (revision 8234911) -1912 (revision 7905254) -1919 (revision 8188234) -1920 (revision 8689556) -1921 (revision 8019599) -1923 (revision 8640393) -1924 (revision 8019604) -1925 (revision 8424340) -1926 (revision 8019613) -1927 (revision 7905236) -1930 (revision 8019616) -1937 (revision 7905218) -1939 (revision 8731642) -1940 (revision 8503734) -1944 (revision 8556801) -1945 (revision 8699418) -1948 (revision 8707830) -1953 (revision 8660010) -1955 (revision 8733996) -1956 (revision 8637553) -1957 (revision 8582051) -1959 (revision 8621124) -1964 (revision 8701289) -1966 (revision 8596642) -1967 (revision 8657263) -1968 (revision 8640882) -1969 (revision 8709383) -1970 (revision 8645926) +Πύλη:Κύρια (revision 9720674) +Ελληνική γλώσσα (revision 9770893) +Μεσαιωνική ελληνική γλώσσα (revision 9633519) +Ελλάδα (revision 9814581) +Λογοτεχνία (revision 9710342) +Αμπέμπε Μπικίλα (revision 9629134) +Σι Τζινπίνγκ (revision 9813216) +Αναγέννηση (revision 9814535) +1927 (revision 9796714) +Ρίσι Σούνακ (revision 9807035) +Χιτζάμπ (revision 9689462) +Προφορά της κλασικής αρχαίας ελληνικής γλώσσας (revision 9656423) +Σουηδικές γενικές εκλογές 2022 (revision 9744475) +24 Φεβρουαρίου (revision 9778301) +Σουηδία (revision 9724663) +Système universitaire de documentation (revision 9519040) +17 Φεβρουαρίου (revision 9792444) +National Diet Library (revision 9533181) +Σουηδικές γενικές εκλογές (2018) (revision 9722309) +Σαν Φρανσίσκο (revision 9695407) +Αθλητισμός (revision 9775022) +Sveriges Television (revision 8951040) +Ελληνικό σύστημα αρίθμησης (revision 8810370) +Ανανεώστε την Ευρώπη (revision 9774364) +Μάρμαρο (revision 9388577) +Ελληνικό φωνητικό αλφάβητο (revision 9069946) +Φιλελεύθεροι (Σουηδία) (revision 9744487) +Αιθιοπία (revision 9690129) +Κατάλογος χωρών κατά δείκτη ανθρώπινης ανάπτυξης (revision 9782519) +Ινδία (revision 9739007) +Νικόλαος Κουζάνος (revision 8518280) +ISO 639-1 (revision 9336842) +Κομμουνιστικό Κόμμα της Κίνας (revision 9172509) +Αττική διάλεκτος (revision 8901811) +1908 (revision 9742502) +Περιφέρεια Βορείου Αιγαίου (revision 9621030) +Γένοβα (revision 9771381) +Κίνα (revision 9794230) +Φουμίο Κισίντα (revision 9559868) +Μεσόγειος (revision 9815947) +The New York Times (revision 9532084) +Χοντρός και Λιγνός (revision 9758430) +Ελληνικό Ίδρυμα Πολιτισμού (revision 9772666) +Αρχαία ελληνική λογοτεχνία (revision 8735369) +Ξενοφών Στρατηγός (revision 8585203) +Χριστιανοδημοκράτες (Σουηδία) (revision 9745040) +Ντονάτο Μπραμάντε (revision 9502757) +Γλωσσικό ζήτημα (revision 9715638) +Θεοτόκος (revision 9620713) +Νόρλαντ (revision 9701764) +Αθηναϊκή δημοκρατία (revision 9752241) +Μονή Οσίου Λουκά (revision 9745951) +Σαρία (revision 9196257) +Θερινή Ώρα Κεντρικής Ευρώπης (revision 9158347) +Φρυγική γλώσσα (revision 9496315) +Τονισμός (revision 8997917) +Διεθνής πρότυπος αριθμός βιβλίου (revision 9525547) +International Standard Serial Number (revision 9426410) +Βόρεια Κορέα (revision 9781851) +Αργεντινή (revision 9722648) +Ιαπωνία (revision 9738909) +Μωάμεθ (revision 9582333) +Πανελλήνιο Σοσιαλιστικό Κίνημα - Κίνημα Αλλαγής (revision 9815357) +Νι (revision 8925493) +Επιστήμη (revision 9447489) +Άηχο ουρανικό κλειστό (revision 9359209) +Ιωτακισμός (revision 9177317) +Μέγαρα (revision 9815572) +Μάχη του Μαντζικέρτ (revision 9451708) +Τόκιο (revision 9785709) +Βραβείο Φραντς Κάφκα (revision 7803694) +Χειμαρριώτικη διάλεκτος της ελληνικής γλώσσας (revision 9814483) +20 Ιανουαρίου (revision 9568511) +Έθνος κράτος (revision 9565285) +Εμανουέλ Ριβά (revision 9560857) +Ιδιωτικότητα (revision 9543906) +Μαγκνταλένα Άντερσον (revision 9724325) +G7 (revision 9708501) +Ιταλία (revision 9814442) +Αντίς Αμπέμπα (revision 9703571) +1964 (revision 9811809) +Μύκονος (revision 9769477) +Πυκνότητα πληθυσμού (revision 8112343) +Μαραθώνιος στους Ολυμπιακούς Αγώνες (revision 9644285) +Ομηρική διάλεκτος (revision 8274384) +Βραβεία Νόμπελ Λογοτεχνίας (revision 9704908) +International Standard Name Identifier (revision 6861942) +Ουλφ Κρίστερσον (revision 9804758) +Καππαδοκική διάλεκτος (revision 9776884) +Θανάσης Βέγγος (revision 9807820) +Library of Congress Control Number (revision 9767699) +Αγγλική γλώσσα (revision 9779698) +Ab urbe condita (revision 9523077) +Πινγίν (revision 9679522) +Ρότζερ Μουρ (revision 9814722) +Index Librorum Prohibitorum (revision 9760596) +Νοσταλγία (revision 9734746) +Τουρκία (revision 9804483) +Ωδή (revision 8960684) +Αρχαϊκά ελληνικά αλφάβητα (revision 9512358) +21 Οκτωβρίου (revision 9123772) +Μυστικό Συμβούλιο (Ηνωμένο Βασίλειο) (revision 9717054) +Κολούτσιο Σαλουτάτι (revision 7531799) +Αφγανιστάν (revision 9704097) +Ολυμπιακοί Αγώνες (revision 9767748) +Μάο Τσετούνγκ (revision 9685825) +Γλώσσα (revision 9625715) +Ελληνιστική περίοδος (revision 9683985) +Σονέτο (revision 9736846) +Ουίλιαμ Χέιγκ (revision 9655093) +Ιράν (revision 9804479) +Γιάννης Μιχαλόπουλος (revision 9334986) +Διεθνής Αερολιμένας Αθηνών «Ελευθέριος Βενιζέλος» (revision 9805318) +Γαλλία (revision 9809487) +Πανεπιστήμιο της Οξφόρδης (revision 9496775) +Μεταφυσική (revision 9716423) +Κλασικισμός (revision 8324313) +Φικχ (revision 8308138) +Ποίηση (revision 9750581) +Παμφυλιακή διάλεκτος (revision 9496240) +Ιωάννης Φουντούλης (revision 9326532) +Χέγκελ (revision 9658098) +13 Σεπτεμβρίου (revision 9331818) +Λοκρική διάλεκτος (revision 7574882) +Λειτουργία (μουσική) (revision 8677674) +Οργανισμός για την Ασφάλεια και τη Συνεργασία στην Ευρώπη (revision 9794797) +Bibliothèque nationale de France (revision 9636186) +Αστικοποίηση (revision 9718574) +Σουηδικές γενικές εκλογές 2014 (revision 9042837) +Δέλτα (revision 7065781) +Integrated Authority File (revision 8518544) +Σκυρόδεμα (revision 9415646) +Ρωσικός φορμαλισμός (revision 8011923) +Γάμμα (revision 9541292) +Τζιχαντισμός (revision 9239251) +Ντόμινικ Ράαμπ (revision 9735261) +Νεοελληνική λογοτεχνία (revision 9660986) +Σχέδιο Μάρσαλ (revision 9590277) +Μεξικό (revision 9672139) +1929 (revision 9808487) +Φι (revision 8977317) +PubMed Identifier (revision 6403885) +Ισλαμισμός (revision 9710191) +Τζορτζ Μπέρκλεϋ (revision 9115917) +Γιέστρικλαντ (revision 8835873) +Βυζαντινή αυτοκρατορία (revision 9724182) +Αρχαία Αθήνα (revision 9791090) +Αλέξανδρος Πάλλης (revision 9452344) +Σιδηροδρομικός σταθμός Μεγάρων (revision 9788113) +Σιμπούγια (revision 9448722) +Άγιος Κήρυκος Ικαρίας (revision 9379289) +Ελληνική φιλοσοφία (revision 9012645) +Πτολεμαίοι (revision 9760181) +Φιλελεύθερο Δημοκρατικό Κόμμα (Ιαπωνία) (revision 9562975) +Χιόνι (revision 9366620) +1188 (revision 7906380) +Σλοβενική γλώσσα (revision 9736905) +Μιχάλης Παπακωνσταντίνου (revision 9228156) +Ιερές συντομογραφίες (revision 9636386) +Virtual International Authority File (revision 9547787) +Οστρακισμός (revision 9758182) +Λετονία (revision 9810145) +Κατάλογος χωρών ανά κατά κεφαλήν ΑΕΠ (ονομαστικό) (revision 9707678) +Τριττύα (revision 8997097) +Ώρα Κεντρικής Ευρώπης (revision 9158323) +Βασιλική του Αγίου Πέτρου (revision 9746265) +Αντέλ Ενέλ (revision 9781473) +Τουρκμενιστάν (revision 9786546) +Νέα ελληνική γλώσσα (revision 9741111) +Ξι (revision 8385158) +Ίαν Φλέμινγκ (revision 9814725) +Ελένη Βιτάλη (revision 9664460) +Μεγαρικό ψήφισμα (revision 9614286) +Τσου Εν Λάι (revision 8409932) +Κάρλος Μένεμ (revision 9734248) +Αλεξάνδρεια (revision 9806897) +1409 (revision 8687955) +Λιοντάρι (revision 9715144) +Κατωιταλική διάλεκτος (revision 9712613) +Χερσόνησος η Ταυρική (revision 9764259) +Περιφέρεια των λαών του νότου (revision 9641762) +Θερινοί Ολυμπιακοί Αγώνες 2012 (revision 9675153) +Δημοκρατία της Ιρλανδίας (revision 9672626) +Νόβι Σαντ (revision 9793157) == End of Parsed pages == -- Wikipedia parsing ended at: 2021-03-16 18:58:31.004638 +- Wikipedia parsing ended at: 2022-12-14 18:05:43.615460 -62 characters appeared 801479 times. +63 characters appeared 1890130 times. -First 47 characters: -[ 0] Char α: 8.791371951105393 % -[ 1] Char ο: 8.656870610458913 % -[ 2] Char τ: 7.436002690026814 % -[ 3] Char ι: 6.335661944979219 % -[ 4] Char ν: 5.906455440504367 % -[ 5] Char ε: 5.323907426145913 % -[ 6] Char ρ: 5.098698780629311 % -[ 7] Char ς: 4.129740142910793 % -[ 8] Char κ: 4.033542987402041 % -[ 9] Char σ: 3.9103956560309125 % -[10] Char υ: 3.7128858023728633 % -[11] Char η: 3.4742020689250745 % -[12] Char λ: 3.4385180397739674 % -[13] Char π: 3.329220104332116 % -[14] Char μ: 3.3050148537890576 % -[15] Char ί: 2.7370648513560556 % -[16] Char ό: 2.185958708837038 % -[17] Char γ: 2.095251403966916 % -[18] Char ά: 1.8429678132552443 % -[19] Char έ: 1.6417148796163092 % -[20] Char δ: 1.4553094965682194 % -[21] Char β: 1.2000314418718394 % -[22] Char ω: 1.121801070271336 % -[23] Char ή: 1.0494348573075527 % -[24] Char χ: 0.9217958299593626 % -[25] Char ύ: 0.8777522555176118 % -[26] Char φ: 0.8600350102747546 % -[27] Char θ: 0.7800578680165045 % -[28] Char ώ: 0.617732966178777 % -[29] Char ζ: 0.4195992658572464 % -[30] Char e: 0.30456194111137036 % -[31] Char ξ: 0.28696946520120925 % -[32] Char i: 0.25203405204627943 % -[33] Char a: 0.23631311612656103 % -[34] Char n: 0.21647479222786872 % -[35] Char r: 0.1978841616561382 % -[36] Char o: 0.18915030836740576 % -[37] Char s: 0.17779629909205355 % -[38] Char t: 0.16269920983581604 % -[39] Char l: 0.14585534992183202 % -[40] Char d: 0.11665932607092637 % -[41] Char c: 0.10468147013209328 % -[42] Char h: 0.09257884486056403 % -[43] Char u: 0.08409453023722394 % -[44] Char m: 0.08247252891217362 % -[45] Char ΐ: 0.07161759696760614 % -[46] Char ψ: 0.06774974765402461 % +Most Frequent characters: +[ 0] Char α: 9.080327808140181 % +[ 1] Char ο: 8.038970864437896 % +[ 2] Char τ: 8.01706760910625 % +[ 3] Char ι: 6.784189447286694 % +[ 4] Char ν: 6.04582753567215 % +[ 5] Char ε: 5.970330083115976 % +[ 6] Char κ: 4.376312740393518 % +[ 7] Char ρ: 4.336050959457815 % +[ 8] Char σ: 4.289493315274611 % +[ 9] Char η: 3.964806653510605 % +[10] Char ς: 3.6513890578954884 % +[11] Char π: 3.44119187569109 % +[12] Char μ: 3.2560723336490085 % +[13] Char υ: 3.0358758392279896 % +[14] Char λ: 2.856734721950342 % +[15] Char ί: 2.368461428578987 % +[16] Char ό: 1.9997037240824707 % +[17] Char ά: 1.8429420198610678 % +[18] Char γ: 1.7440070259717586 % +[19] Char έ: 1.6751228751461538 % +[20] Char δ: 1.4865115097903319 % +[21] Char ή: 1.4056176030220144 % +[22] Char ω: 1.3927613444577887 % +[23] Char χ: 1.180553718527297 % +[24] Char ύ: 0.9674995899752927 % +[25] Char θ: 0.9332162338040242 % +[26] Char ώ: 0.7774068450318232 % +[27] Char φ: 0.7632808325353283 % +[28] Char β: 0.6926507700528536 % +[29] Char ξ: 0.37256696629332375 % +[30] Char ζ: 0.32436922328093837 % +[31] Char e: 0.2777057662700449 % +[32] Char a: 0.25241650045234987 % +[33] Char i: 0.24442763196182274 % +[34] Char n: 0.20702279737372561 % +[35] Char o: 0.19522466708639089 % +[36] Char r: 0.19321422336029795 % +[37] Char t: 0.17660160941310915 % +[38] Char s: 0.17363885023781434 % +[39] Char l: 0.12708120605461 % +[40] Char c: 0.11030987286588753 % +[41] Char ψ: 0.10856396120901737 % -The first 47 characters have an accumulated ratio of 0.9947858895866266. +The first 42 characters have an accumulated ratio of 0.9913751964150612. +The first 5 characters have an accumulated ratio of 0.3796638326464317. +All characters whose order is over 27 have an accumulated ratio of 0.03455794045912186. -1390 sequences found. +1629 sequences found. -First 512 (typical positive ratio): 0.9624941725288916 -Next 512 (512-1024): 0.00617732966178777 -Rest: 0.0016086054433421051 +First 850 (typical positive ratio): 0.9950192468921002 +Next 252 (1102-850): 0.003986534258695218 +Rest: 0.0009942188492045867 -- Processing end: 2021-03-16 18:58:31.125842 +- Processing end: 2022-12-14 18:05:43.760018 diff --git a/script/BuildLangModelLogs/LangHebrewModel.log b/script/BuildLangModelLogs/LangHebrewModel.log index 296d071..c5db8fd 100644 --- a/script/BuildLangModelLogs/LangHebrewModel.log +++ b/script/BuildLangModelLogs/LangHebrewModel.log @@ -1,191 +1,285 @@ = Logs of language model for Hebrew (he) = - Generated by BuildLangModel.py -- Started: 2021-03-17 23:11:33.477881 +- Started: 2022-12-14 18:02:26.153331 - Maximum depth: 4 -- Max number of pages: 100 +- Max number of pages: 200 == Parsed pages == -יהדות_בוקרשט (revision 30791735) -10 בנובמבר (revision 30632714) -13 בנובמבר (revision 30810192) -1522 (revision 24674491) -1533 (revision 24674506) -1550 (revision 24674532) -1594 (revision 25165542) -15 בנובמבר (revision 29890141) -1648 (revision 26810233) -1649 (revision 28942371) -1694 (revision 25165654) -1715 (revision 25165678) -1730 (revision 28089168) -1764 (revision 25165736) -1801 (revision 27881514) -1804 (revision 30643161) -1808 (revision 25165782) -1812 (revision 25165786) -1815 (revision 28059812) -1818 (revision 25165792) -1819 (revision 25165793) -1832 (revision 25165806) -1846 (revision 30789696) -1847 (revision 27881515) -1848 (revision 25165827) -1852 (revision 25165831) -1857 (revision 26643435) -1864 (revision 25165844) -1865 (revision 28700557) -1866 (revision 29383815) -1867 (revision 30295888) +יהדות_בוקרשט (revision 35182799) +1941 (revision 32659307) +משורר (revision 34978171) +ברוך טרקטין (revision 34395928) +אלכסנדר איפסילנטי (הנכד) (revision 34666729) +ולאכים (revision 34292795) +בנקאי (revision 34060002) +יוליו באראש (revision 34028115) +מלבי"ם (revision 34861779) 1873 (revision 30716465) -1876 (revision 25165858) -1877 (revision 27881506) +1866 (revision 32949259) +רומנית (revision 35154129) +לוסיאן-זאב הרשקוביץ (revision 34958893) +בראונשווייג (revision 32618810) +7 בינואר (revision 35038892) +שיטת ספריית הקונגרס (revision 30163525) +הספרייה הלאומית (revision 35173909) +סולטאן עות'מאני (revision 32081871) +תרבות (revision 35199868) +18 בינואר (revision 35165787) +ג' בניסן (revision 34787426) +ט"ו בכסלו (revision 35018392) +9 ביוני (revision 34842776) +דוקטורט (revision 34620353) +רומניה (revision 35063882) +שירה (revision 35157099) +הספרייה הלאומית של הולנד (revision 34603407) +27 במאי (revision 34455916) +מצבות (revision 35144605) +ארומנים (revision 35094875) +רוסית (revision 34696856) +כ"ד בכסלו (revision 33760689) +קומניה (revision 29898700) +ט"ו בסיוון (revision 34566812) +האוניברסיטה העברית בירושלים (revision 35179648) +ארומנית (revision 34006415) +כ"ט בחשוון (revision 35107904) +20 ביולי (revision 35167023) +השאל-החכר (revision 34945174) +ט"ז בשבט (revision 34850940) +י"ט באלול (revision 33761052) +1819 (revision 31056217) +נתן יונתן (revision 34941382) +תנועת ההשכלה היהודית (revision 34878783) +צ (revision 34299508) +אשראי (revision 32660739) +ה' באלול (revision 34228892) +שולחן ערוך (revision 34808175) +ניירות ערך (revision 33891839) +WorldCat (revision 34980710) +כ"ח בתמוז (revision 34968773) +ט"ז בכסלו (revision 33760684) +ב' בסיוון (revision 34787055) +מתרגם (revision 34685301) +הספרייה הלאומית של יוון (revision 34171107) +ספר ישעיהו (revision 34925239) +אוקראינית (revision 33750001) +18 באוקטובר (revision 34922728) +נפאלית (revision 32665998) +פזמונאי (revision 35069127) +י"ג באדר (revision 33760841) +יוונית (revision 34408463) +2007 (revision 34939693) +17 באוקטובר (revision 34652852) +סופר (revision 33825252) +אוניברסיטה (revision 34910290) +מוזיאון סטרומה (revision 33970393) +שנות ה-20 של המאה ה-20 (revision 35228618) +ג'ון גאוור (revision 31804220) +ויקישיתוף (revision 34805938) +המלחמות הנפוליאוניות (revision 35221756) +חלפנות כספים (revision 32924808) +ירושלים (revision 35190979) 1878 (revision 25165861) -1880 (revision 25165863) -1881 (revision 25165864) -1893 (revision 25165878) -1894 (revision 25165879) -1899 (revision 26643326) -18 במרץ (revision 30657076) -1912 (revision 27740363) -1913 (revision 25165902) -1918 (revision 25287021) -1919 (revision 25165908) -1920 (revision 25165910) -1921 (revision 30598446) -1931 (revision 25007812) -1938 (revision 25039793) -1940 (revision 24662839) -1941 (revision 27376428) -1942 (revision 30379738) -1945 (revision 29947601) -1948 (revision 30229979) -1949 (revision 30109414) -1976 (revision 24662876) -1977 (revision 26724861) -1978 (revision 25556703) -1997 (revision 30561757) -2007 (revision 30900003) -2008 (revision 30260606) -21 בינואר (revision 30483313) -22 במרץ (revision 29335566) -23 בדצמבר (revision 30888068) -23 במאי (revision 28677021) -28 באוקטובר (revision 30665513) -28 במאי (revision 30647380) -3 ביולי (revision 30712685) -3 בספטמבר (revision 30732448) -6 בפברואר (revision 30820717) -9 בדצמבר (revision 30650579) -Wayback Machine (revision 30422443) -אבן בניין (revision 28384131) -אברהם גולדפדן (revision 30588411) -אברהם לייבה זיסו (revision 29770127) -אדוארד ג'י רובינסון (revision 30271581) -אדולף שטרן (revision 28829344) -אהרון טאובס (revision 28125670) -אוניברסיטת בוקרשט (revision 30812551) -אוסטרו-הונגריה (revision 30392668) -אופניים (revision 30755077) -אזרחות (revision 30231926) -איסאק פלץ (revision 30750428) -אירופה (revision 30734576) -אלי ברקוביץ (revision 26435411) -אליאס שוורצפלד (revision 27528306) -אליעזר רוקח (revision 30860048) -אלכסנדר איפסילנטי (הנכד) (revision 30132231) -אלכסנדר שפרן (revision 30626532) -אלכסנדרו יואן קוזה (revision 30812553) -אלכסנדרו רובוט (revision 30725110) -אמנות (revision 30463855) -אמנציפציה ליהודים (revision 30769017) -אנג'ליקה רוזיאנו (revision 29943550) -אנטישמי (revision 30734529) -אפיית מצות (revision 30898230) -ארץ ישראל (revision 30777728) -אשר אהרנפלד (revision 30497378) -בוגדן פטריצ'ייקו חאשדאו (revision 29548438) -בוקובינה (revision 29870803) +ח' בסיוון (revision 34253160) +תסאליה (revision 32688328) +הצי המלכותי הבריטי (revision 34965822) +בלגיה (revision 35228590) +נסיכות מולדובה (revision 34307667) +TheMarker (revision 34197118) +מין דקדוקי (revision 34934476) +עיצור שפתי-שיני, חוכך, קולי (revision 34340638) +בנקאות (revision 34942533) +אפירוס (revision 34599005) +ישראל (revision 35213935) +27 בספטמבר (revision 34865166) +הספרייה הלאומית של צרפת (revision 34954915) +18 בנובמבר (revision 35165842) +קובץ בקרה משולב (revision 34980719) +אנציקלופדיה בריטניקה (revision 35145787) +הונגרית (revision 35067792) +30 ביוני (revision 35039061) +כ"ח בסיוון (revision 35123107) +טרגובישטה (רומניה) (revision 32714440) +בארבו שטירביי, שליט ולאכיה (revision 34183794) +משה שוורצפלד (revision 34298498) +13 בדצמבר (revision 35221687) +אנגלית (revision 35222539) +קרואטית (revision 32668378) +קונסטנטין פון נויראט (revision 34422308) +24 ביוני (revision 34627765) +ט"ז בניסן (revision 33760884) +לדינו (revision 35171060) +1792 (revision 27194308) +יוון (revision 35181767) +יערנות (revision 34963747) +כלכלה (revision 35229503) +האימפריה הרוסית (revision 34914018) +בוסנית (revision 34023564) +ג'ון מילטון (revision 35161378) +וולוצ'יסק (revision 34336310) +ממלכת רומניה (revision 35006227) +מנהל עסקים (revision 35028930) +ולוניה (revision 34063116) +יותם ראובני (revision 34897926) +הונג קונג (revision 35229624) +היסטוריון (revision 35146170) +עלפון (revision 32648914) +כ"ג באייר (revision 34415334) +6 בנובמבר (revision 35181020) +ציוני (revision 35070795) +י"ט באדר (revision 33760843) +עיצור דו-שפתי, סותם, אטום (revision 34216053) +יאשי (revision 34273547) +כל ישראל חברים (revision 35165601) +גאלאץ (revision 35110599) +1835 (revision 33984802) +ט' בניסן (revision 33760896) +גאלית סקוטית (revision 32661252) +י"ז בכסלו (revision 34317340) +באר שבע (revision 35210525) +יוונים (revision 34012584) +זואולוגיה (revision 35092786) +19 בפברואר (revision 35181055) +1 במאי (revision 34764938) +בוקרשט (revision 35173617) +האלפבית הקירילי הרומני (revision 34211902) +המערכה באוקיינוס האטלנטי (1939–1945) (revision 34788521) +ספריית הדיאט הלאומית (revision 35186323) +סנט וינסנט והגרנדינים (revision 34648727) +סוציולוגיה (revision 35120370) +אקונומטריקה (revision 34331016) +חגי ישראל ומועדיו (revision 35194032) +המאה ה-15 (revision 34496188) +צרפת (revision 35151383) +תואר שני (revision 35157090) +רומיאו ויוליה (revision 35155065) +פלופונסוס (מחוז) (revision 32871989) +רוברט בלייק (revision 34552047) +תכתיב וינה (revision 34846816) +קול פורטר (revision 33606049) +פאלי (revision 32666914) +בסרביה (revision 34966095) +חבר הכנסת (revision 35079939) +דת (revision 35160267) +אוניברסיטת בר-אילן (revision 34956943) +תנועות פתוחות (revision 31195015) +צה"ל (revision 35176984) +אהוד מנור (revision 35186961) +טרנסילבניה (revision 34937962) +רוסיה (revision 35232525) +כ"ג בשבט (revision 33760818) +נאסדאק (revision 34914110) +1916 (revision 32659290) +30 באוקטובר (revision 35156558) +1842 (revision 25165819) +שולחן ערוך הרב (revision 34711882) +ח' בכסלו (revision 35165253) +משה פיינשטיין (revision 35179391) +שר (revision 34558902) +יום טוב (revision 34527567) +עיראק (revision 35225563) +הקומדיה האלוהית (revision 34865795) +מדעי המידע (revision 34996637) +יום רביעי (revision 34936301) +שפה (revision 35136988) +בודג'אק (revision 34262461) +נדל"ן (revision 34581527) +כ"ה באדר א' (revision 34900966) +שער הניצחון בפריז (revision 33232994) +יצחק שמיר (revision 35187769) +תענית ציבור (revision 35122402) +קהלת (revision 34842035) +אינטלקטואל (revision 33438959) +ו' באדר א' (revision 33760856) +11 ביוני (revision 34595786) +הילך חוקי (revision 33456729) +ספירת העומר (revision 35204075) +נסיכות מוסקבה (revision 35016301) +יחזקאל (revision 35170404) +בודהה (מושג) (revision 34729970) +בית הספר רנה קסין (revision 34948521) == End of Parsed pages == -- Wikipedia parsing ended at: 2021-03-17 23:15:54.330136 +- Wikipedia parsing ended at: 2022-12-14 18:05:54.188909 -79 characters appeared 538173 times. +79 characters appeared 1358022 times. -First 64 characters: -[ 0] Char י: 12.444697151287782 % -[ 1] Char ו: 10.995534893054835 % -[ 2] Char ר: 7.513197429079496 % -[ 3] Char ה: 7.070960453237156 % -[ 4] Char ב: 6.162702328061794 % -[ 5] Char ל: 5.905164324483019 % -[ 6] Char א: 5.503063141406202 % -[ 7] Char מ: 5.107465443268243 % -[ 8] Char ת: 4.564517357801302 % -[ 9] Char נ: 4.524381565035778 % -[10] Char ש: 3.590295313960381 % -[11] Char פ: 2.6872399767361053 % -[12] Char ד: 2.6859392797483337 % -[13] Char ק: 2.389751994247203 % -[14] Char ט: 2.2137862731872464 % -[15] Char ס: 2.155998164159109 % -[16] Char ם: 2.073310998507915 % -[17] Char ע: 1.9315350268408114 % -[18] Char ח: 1.9018048099774607 % -[19] Char ג: 1.6840309714534172 % -[20] Char ן: 1.4670003883509577 % -[21] Char כ: 1.3939755431803529 % -[22] Char צ: 1.2600037534398791 % -[23] Char ז: 0.8928355751774987 % -[24] Char ץ: 0.2668286963485719 % -[25] Char ך: 0.24397359213487116 % -[26] Char ף: 0.1663034005793676 % -[27] Char i: 0.10963017468360546 % -[28] Char e: 0.10925854697281358 % -[29] Char a: 0.10814366384043793 % -[30] Char r: 0.08268716565119395 % -[31] Char n: 0.08250135179579801 % -[32] Char o: 0.0707950789058537 % -[33] Char t: 0.062247641557640385 % -[34] Char l: 0.05685903975115808 % -[35] Char u: 0.0510988102338839 % -[36] Char s: 0.04199393131948277 % -[37] Char c: 0.038091840356168 % -[38] Char d: 0.03103091385112222 % -[39] Char h: 0.03084509999572628 % -[40] Char m: 0.022855104213700798 % -[41] Char g: 0.022297662647512977 % -[42] Char b: 0.015050922287071259 % -[43] Char N: 0.015050922287071259 % -[44] Char B: 0.014679294576279374 % -[45] Char S: 0.01430766686548749 % -[46] Char C: 0.01393603915469561 % -[47] Char v: 0.01393603915469561 % -[48] Char A: 0.013192783733111842 % -[49] Char E: 0.012449528311528077 % -[50] Char p: 0.010963017468360547 % -[51] Char I: 0.010963017468360547 % -[52] Char M: 0.01021976204677678 % -[53] Char f: 0.010033948191380837 % -[54] Char z: 0.009662320480588956 % -[55] Char R: 0.008733251203609248 % -[56] Char P: 0.008547437348213307 % -[57] Char T: 0.008361623492817365 % -[58] Char L: 0.007804181926629541 % -[59] Char F: 0.0076183680712336 % -[60] Char H: 0.007432554215837659 % -[61] Char k: 0.007246740360441716 % -[62] Char y: 0.0070609265050457755 % -[63] Char w: 0.0070609265050457755 % +Most Frequent characters: +[ 0] Char י: 11.821826155982745 % +[ 1] Char ו: 11.115872938730007 % +[ 2] Char ה: 8.562600605881201 % +[ 3] Char ר: 6.418747266244582 % +[ 4] Char ל: 6.179575883159478 % +[ 5] Char ת: 5.768168704188886 % +[ 6] Char ב: 5.370310642979274 % +[ 7] Char מ: 5.15993113513625 % +[ 8] Char א: 4.54315173097343 % +[ 9] Char ש: 4.304274893926608 % +[10] Char נ: 3.6775545609717666 % +[11] Char ם: 2.6959062518869357 % +[12] Char ע: 2.59642332745714 % +[13] Char ד: 2.5235231829823084 % +[14] Char פ: 2.23722443377206 % +[15] Char ק: 2.21947803496556 % +[16] Char ח: 2.1406869697250857 % +[17] Char ס: 2.012559443072351 % +[18] Char כ: 1.908142872501329 % +[19] Char ט: 1.556896721849867 % +[20] Char ג: 1.4355437540776217 % +[21] Char צ: 1.257343400916922 % +[22] Char ן: 1.2326751702107919 % +[23] Char ז: 0.8078661464983631 % +[24] Char ך: 0.3750307432427457 % +[25] Char ף: 0.1990394853691619 % +[26] Char e: 0.17253034192376854 % +[27] Char a: 0.154636670098128 % +[28] Char ץ: 0.14778847470806805 % +[29] Char i: 0.14764120168892697 % +[30] Char r: 0.11619841210230761 % +[31] Char o: 0.10198656575519395 % +[32] Char n: 0.10125020065948859 % +[33] Char t: 0.09057290677176069 % +[34] Char l: 0.07886470175004529 % +[35] Char s: 0.07636106042464702 % +[36] Char u: 0.06384285379765571 % +[37] Char c: 0.05242919481422245 % +[38] Char d: 0.039910988187231135 % +[39] Char m: 0.03475643251729354 % +[40] Char h: 0.033504611854594406 % +[41] Char M: 0.024962776744412093 % +[42] Char C: 0.022827317966866517 % +[43] Char g: 0.022238225890302218 % +[44] Char B: 0.022238225890302218 % +[45] Char p: 0.022090952871161144 % +[46] Char A: 0.021796406832878996 % +[47] Char b: 0.021649133813737922 % +[48] Char I: 0.020839132208462015 % +[49] Char v: 0.020765495698891474 % +[50] Char y: 0.02010276711275664 % +[51] Char k: 0.018998219469198583 % +[52] Char S: 0.018777309940486972 % +[53] Char D: 0.016936397201223544 % +[54] Char T: 0.01627366861508871 % +[55] Char R: 0.014727301914107429 % +[56] Char f: 0.013328208232267222 % +[57] Char P: 0.013254571722696687 % +[58] Char L: 0.013107298703555613 % +[59] Char E: 0.012002751059997556 % +[60] Char H: 0.011487295493003796 % +[61] Char V: 0.010235474830304663 % +[62] Char G: 0.010161838320734126 % +[63] Char N: 0.01008820181116359 % -The first 64 characters have an accumulated ratio of 0.9995094514217545. +The first 64 characters have an accumulated ratio of 0.9993151804609938. +The first 5 characters have an accumulated ratio of 0.4409862284999801. +All characters whose order is over 22 have an accumulated ratio of 0.031930999645072025. -1195 sequences found. +1605 sequences found. -First 512 (typical positive ratio): 0.9890483702848128 -Next 512 (512-1024): 0.04564517357801302 -Rest: 0.0004014423754119586 +First 698 (typical positive ratio): 0.9950195693248958 +Next 340 (1038-698): 0.003982282638035017 +Rest: 0.000998148037069213 -- Processing end: 2021-03-17 23:15:54.469267 +- Processing end: 2022-12-14 18:05:54.420188 diff --git a/script/BuildLangModelLogs/LangHindiModel.log b/script/BuildLangModelLogs/LangHindiModel.log index d91de96..f76e2a2 100644 --- a/script/BuildLangModelLogs/LangHindiModel.log +++ b/script/BuildLangModelLogs/LangHindiModel.log @@ -1,190 +1,286 @@ = Logs of language model for Hindi (hi) = - Generated by BuildLangModel.py -- Started: 2021-03-19 22:26:39.897264 +- Started: 2022-12-14 18:05:53.770737 - Maximum depth: 4 -- Max number of pages: 100 +- Max number of pages: 200 == Parsed pages == -मुखपृष्ठ (revision 5072364) -2020 विशाखपट्नम गैस रिसाव (revision 4964453) -अंग्रेज़ी विकिपीडिया (revision 4812878) -अंशुन बस दुर्घटना (revision 5080233) -अभिनव बिंद्रा (revision 5066925) -अम्फान महाचक्रवात (revision 4974141) -अल्बर्टा (revision 4815865) -अस्तित्ववाद (revision 5095575) -आज का आलेख १८ मार्च २०२१ (revision 5119127) -इंडोनेशिया (revision 5137321) -उत्तर प्रदेश (revision 5137498) -एयर इंडिया एक्सप्रेस उड़ान 1344 (revision 4958774) -कतिकी मेला (revision 4822652) -कनाडा (revision 5092431) -कश्मीर (revision 5101264) -कार्तिक पूर्णिमा (revision 5039499) -कालिंजर दुर्ग (revision 5070202) +मुखपृष्ठ (revision 5590190) +योशिहिडे सुगा (revision 5152776) +शिंजो आबे की हत्या (revision 5705526) +शिंजो आबे (revision 5704870) +मुखपृष्ठ/पूर्व प्रदर्शित (revision 5716128) +चार्ल्स पंचम (revision 5696826) +इंग्लैंड के चार्ल्स द्वितीय (revision 5166999) +बांदा जिला (revision 5381573) +शक्ति कपूर (revision 5473336) +श्रद्धा कपूर (revision 5715102) +काबुल बम धमाका 2021 (revision 5637967) कालीकट अंतर्राष्ट्रीय विमानक्षेत्र (revision 5053097) -किलर व्हेल (revision 4922729) -कोड़िकोड (revision 5106489) -कोरोनावायरस महामारी (revision 5118212) -कोसला (revision 4901745) -खजुराहो (revision 5123204) -गायक (revision 5128003) -गुइझोऊ (revision 4579091) -गुप्त वंश (revision 5101672) -ग्लेशियर नेशनल पार्क (revision 5135892) -घुम रेलवे स्टेशन (revision 4973755) -जापान के प्रधानमंत्री (revision 4960597) -जावा सागर (revision 3325350) -जुलाई (revision 4367726) -डल झील (revision 5109477) -तिरहुत (revision 5056746) -दरभंगा (revision 5139550) -धर्मराय स्वामी मंदिर (revision 5080095) -नेपाल (revision 5119140) -पक्षी (revision 4905207) -पाकिस्तान इंटरनेशनल एयरलाइंस उड़ान 8303 (revision 4972163) -पारिस्थितिकी तंत्र (revision 4960487) -पूर्वी भारत (revision 5008841) -पृथ्वीराज चौहान (revision 5141495) -प्रजातियां (revision 5084721) -प्रणब मुखर्जी (revision 5026060) -फल (revision 4887332) -फूल (revision 4985648) -बज्जिका (revision 5052110) -बांदा जिला (revision 5066719) -बिहार (revision 5141557) -बुन्देलखण्ड (revision 5142668) -बुलबुल (revision 4959703) -बेयरूत धमाका 2020 (revision 5128013) -बैटमैन (revision 5093040) -ब्रिटिश कोलम्बिया (revision 5134714) -भारत (revision 5112806) -भारत की स्वतंत्रता (revision 5102484) -भारत में कोरोनावायरस महामारी का आर्थिक प्रभाव (revision 5105591) -भालचंद्र नेमाडे (revision 5069123) -मधुबनी (revision 5127643) -मुखपृष्ठ/अन्य भाषाओं में (revision 4949624) -मुखपृष्ठ/आज का आलेख (revision 5072364) -मुखपृष्ठ/पिछला आलेख (revision 5072364) -मुखपृष्ठ/पूर्व प्रदर्शित (revision 4427327) -मुखपृष्ठ/बन्धु प्रकल्प (revision 4786332) -मुखपृष्ठ/वर्तमान (revision 5072364) -मुजफ्फरपुर (revision 5112424) -मैथिली (revision 5048285) -मोन्टाना (revision 4530336) -मोहम्मद ग़ोरी (revision 5053528) -योशिहिडे सुगा (revision 5131517) -राजपूत (revision 5142481) -राजीव गांधी खेल रत्न (revision 5035806) -रॉकी पर्वत शृंखला (revision 5063055) -लेबनान (revision 5087028) -विंध्य पर्वत (revision 4998895) -विधु विनोद चोपड़ा (revision 4920989) -विश्व धरोहर स्थल (revision 5050725) -वैशाली (revision 5134349) -शिंजो अबे (revision 4959991) -शिकारा (revision 4959995) -शिकारा (२०२० फ़िल्म) (revision 5110981) -शिवहर (revision 5002252) -श्रीनगर, जम्मू और कश्मीर (revision 5103394) -श्रीविजय एयर उड़ान 182 (revision 5112969) -संयुक्त राज्य अमेरिका (revision 5122291) -समस्तीपुर (revision 5137150) -सीतामढी (revision 5125137) -स्पेन के फ़िलिप पंचम (revision 4969865) -हाल की घटनाएँ (revision 2774346) -हिन्दी विकिपीडिया (revision 5131026) -हैब्सबर्ग राजवंश (revision 5036757) -२०२१ में निधन (revision 5077158) -2020 कोरोनावायरस महामारी (revision 5118212) -Chevron Phillips Chemical (revision 4964453) -Deccan Chronicle (revision 4976246) -Indian Standard Time (revision 5132803) -The Hindu (revision 5046686) -UTC+05:30 (revision 4947123) -अन्तर्राष्ट्रीय मानक क्रम संख्या (revision 4951625) -आंध्र प्रदेश (revision 5118459) +जापान (revision 5701019) +महाराष्ट्र (revision 5710908) +ग्लेशियर नेशनल पार्क (revision 5699185) +लिबरल डेमोक्रेटिक पार्टी (एलडीपी) (revision 5442181) +२ फ़रवरी (revision 4809849) +ए फ़्लाइंग जट्ट (revision 4966138) +एलिज़ाबेथ द्वितीय (revision 5688702) +यूनाइटेड किंगडम (revision 5677146) +साहित्य संगीत कला 2010 (revision 4802600) +सिलसिला है प्यार का (revision 4931261) +कौशम्बी जिला (revision 5598225) +आशा भोसले (revision 5362585) +बीस साल बाद (1988 फ़िल्म) (revision 5534510) +ओके जानू (revision 4955578) +कोमिटो (revision 4827585) +गौतम बुद्ध नगर जिला (revision 5329211) +आजमगढ़ जिला (revision 5713931) +वीर सावरकर अन्तर्राष्ट्रीय विमानक्षेत्र (revision 5714529) +आगरा मंडल (revision 4135962) +जूनीचीरो कोईजूमी (revision 5578694) +दोमोदेदोव हवाई अड्डा (revision 2474312) +मैरी १, इंग्लैंड की रानी (revision 5640329) +शिवा का इन्साफ (1985 फ़िल्म) (revision 4551691) +जॉर्ज द्वितीय, इंगलैंड (revision 4667449) +भारत (revision 5709618) +हिण्डौन एयर फ़ोर्स स्टेशन (revision 4807592) +उच्च सदन (revision 3981865) +खजुराहो (revision 5666589) +सिंहासन (1986 फ़िल्म) (revision 4887666) +द्वितीय विश्वयुद्ध (revision 5665367) +बुन्देलखण्ड (revision 5714881) +जवाहरलाल नेहरू विश्वविद्यालय (revision 5681612) +संयुक्त राज्य अमेरिका (revision 5682248) +एडवर्ड ७ (revision 5171217) +अभिनेत्री (revision 5628024) +महामहिम (revision 5700470) +हाइकु (revision 5293767) +मैरी प्रथम, इंग्लैंड की रानी (revision 5640329) +इराक (revision 5629115) +नागोर्नो-काराबाख़ (revision 4767627) +मुरादाबाद मंडल (revision 5567434) +हैब्सबर्ग राजवंश (revision 5396215) +इंटरनेशनल स्टैण्डर्ड नाम पहचानकर्ता (revision 5600788) +कैथोलिक कलीसिया (revision 5305904) +अकिता प्रीफ़ेक्चर (revision 4813195) +वर्चुअल अंतरराष्ट्रीय प्राधिकरण फ़ाइल (revision 5456330) +बस्ती मंडल (revision 4135986) +हेनरी ५ (revision 4834757) +मिनिस्ट्रो पिस्तारिनी हवाई अड्डा (revision 2496563) +एक विलन (revision 4820472) +जापानी संसद (revision 5661277) +जॉर्ज़ १, ग्रेट ब्रिटेन का महाराजा (revision 4832306) +हृदयाघात (revision 5628643) +फ्रांसिस प्रथम (फ्रांस का राजा) (revision 4531330) +जर्मन भाषा (revision 5641881) +जेम्स द्वितीय (revision 4832194) +अंग्रेजी गृहयुद्ध (revision 5377351) +खतरों के खिलाड़ी (1988 फ़िल्म) (revision 4575162) +हेनरी ७, इंग्लैंड का राजा (revision 5240329) +फुकुओका (revision 3614978) +रूढ़िवाद (revision 5639114) +नफ़रत की आँधी (1989 फ़िल्म) (revision 4601273) +गोरी तेरे प्यार में (revision 5509849) +The New York Times (revision 4764426) +सत्ता (revision 5172391) +तेजू विमानक्षेत्र (revision 3954126) +जापान के प्रधानमंत्री (revision 5579915) +संयुक्त प्रांत (revision 4531945) +शिंतो (revision 4661419) +महाराजगंज जिला (revision 5712666) +कुवैत (revision 5058032) +होक्काइदो (revision 5310896) +बेंगलुरु (revision 5694548) +आग और शोला (1986 फ़िल्म) (revision 5059482) +कालिंजर दुर्ग (revision 5671122) +वैदिक काल (revision 5714445) +अभिनेता (revision 5664417) +बिग बॉस 5 (revision 4489586) +तौहीन (1989 फ़िल्म) (revision 4556795) +जम्मू विमानक्षेत्र (revision 5441442) +जैन धर्म (revision 5707269) +रूस (revision 5661473) +तहसील (revision 5705360) +फैजाबाद जिला (revision 5673228) +भारत की स्वतंत्रता (revision 5658303) +मैरी ट्यूडर, फ्रांस की रानी (revision 4843021) +इतालवी भाषा (revision 5688238) +फेसबुक (revision 5691471) +जूहू (revision 2449125) +इसाबेला, पवित्र रोमन साम्राज्ञी (revision 5296814) +जोमो केन्याटा हवाई अड्डा (revision 4944242) +डच भाषा (revision 4551746) +लोकनायक जयप्रकाश विमानक्षेत्र (revision 5686951) +वफ़ादार (1985 फ़िल्म) (revision 4560799) +एकीकृत प्राधिकरण फ़ाइल (revision 5649772) +इज़राइल भगदड़ 2021 (revision 5517074) +गग्गल विमानक्षेत्र (revision 4730318) +जापानी आम चुनाव, 2017 (revision 4247626) +मानव विकास सूचकांक के अनुसार देशों की सूची (revision 5661118) +विलियम ४, युनाईटेड किंगडम का महाराजा (revision 5268152) +सच्चाई की ताकत (1989 फ़िल्म) (revision 4564022) +भारत के प्रधानमंत्री (revision 5710314) +बहरीन (revision 5441928) +येलहंका विमानक्षेत्र (revision 4579587) +नारुहितो (revision 4200571) +श्याम बेनेगल (revision 5477440) +शमशाद बेगम (revision 5698307) +कोकोस (कीलिंग) द्वीपसमूह (revision 4827218) +भौगोलिक निर्देशांक प्रणाली (revision 4781862) +डोरवल हवाई अड्डा (revision 3492688) +डैबोलिम विमानक्षेत्र (revision 5358770) +विशाल डडलानी (revision 4966043) +जेम्स ६ (revision 5299965) +बिग बॉस 6 (revision 5646933) +सांता क्रूज़ (revision 4541208) +चतुरभुज मन्दिर, ओरछा (revision 5715198) +तुलनात्मक राजनीति (revision 5633428) +गुड्डी मारुति (revision 5666173) +हिन्दी (revision 5678876) +क्षेत्रफल के अनुसार देशों की सूची (revision 5656111) +असेनबोना हवाई अड्डा (revision 2493424) +भारत के राज्य एवं केन्द्र शासित प्रदेश (revision 5715498) +मैदान (भूगोल) (revision 4910306) +वेस्टमिंस्टर ऐबी (revision 5017772) +राजनीति विज्ञान (revision 5693061) +अर्कोणम विमानक्षेत्र (revision 5379798) +सुपार्श्वनाथ (revision 4578861) +मेसिडोनियन भाषा (revision 4604616) +अक्ष शक्तियाँ (revision 5180714) +पुणे (revision 5690750) +बग़दाद (revision 5385579) +आगरा (revision 5692993) +पत्थर (revision 5614386) +शिव (revision 5714098) +विधवा (revision 5203038) +रूसी (revision 5574460) +फ़िरोज़ाबाद जिला (revision 5471871) +क्षेत्रफल के आधार पर भारत के राज्य और संघ क्षेत्र (revision 5646343) +उदयपुर हवाई अड्डा (revision 5442059) +बेल्जियम (revision 5684029) +शंकर महादेवन (revision 5669138) +महानगर (revision 5644444) +भारत–इराक़ सम्बन्ध (revision 5607175) +डाक सूचक संख्या (revision 5478479) +के के मेनन (revision 5382483) +इंटरनेट मूवी डेटाबेस (revision 5664809) +राजवंश (revision 5667109) +कीव-बोरिस्पिल (revision 5496443) +तमिल (revision 5381910) +लेडी जेन ग्रे (revision 4794538) +दैनिक भास्कर (revision 5564945) +स्विट्ज़रलैंड (revision 4806409) +१९८९ (revision 4566809) +अफ़्गानिस्तान (revision 5700573) +करिश्मा कपूर (revision 5647269) +विश्वविद्यालय अनुदान आयोग (भारत) (revision 5699560) +ब्रुनेई (revision 5451329) +भोपाल विमानक्षेत्र (revision 5023511) +लोकसभा के सभापति (revision 5692302) == End of Parsed pages == -- Wikipedia parsing ended at: 2021-03-19 22:31:01.818311 +- Wikipedia parsing ended at: 2022-12-14 18:09:07.192046 -80 characters appeared 80 times. +86 characters appeared 754011 times. Most Frequent characters: -[ 0] Char व: 1.25 % -[ 1] Char ि: 1.25 % -[ 2] Char श: 1.25 % -[ 3] Char ा: 1.25 % -[ 4] Char ख: 1.25 % -[ 5] Char प: 1.25 % -[ 6] Char त: 1.25 % -[ 7] Char ्: 1.25 % -[ 8] Char न: 1.25 % -[ 9] Char म: 1.25 % -[10] Char ग: 1.25 % -[11] Char ै: 1.25 % -[12] Char स: 1.25 % -[13] Char र: 1.25 % -[14] Char ज: 1.25 % -[15] Char े: 1.25 % -[16] Char भ: 1.25 % -[17] Char ी: 1.25 % -[18] Char क: 1.25 % -[19] Char ह: 1.25 % -[20] Char ई: 1.25 % -[21] Char ो: 1.25 % -[22] Char आ: 1.25 % -[23] Char ध: 1.25 % -[24] Char द: 1.25 % -[25] Char ं: 1.25 % -[26] Char ट: 1.25 % -[27] Char ु: 1.25 % -[28] Char ए: 1.25 % -[29] Char ल: 1.25 % -[30] Char ॉ: 1.25 % -[31] Char उ: 1.25 % -[32] Char य: 1.25 % -[33] Char ष: 1.25 % -[34] Char घ: 1.25 % -[35] Char थ: 1.25 % -[36] Char ।: 1.25 % -[37] Char इ: 1.25 % -[38] Char ौ: 1.25 % -[39] Char ृ: 1.25 % -[40] Char औ: 1.25 % -[41] Char ँ: 1.25 % -[42] Char फ: 1.25 % -[43] Char ू: 1.25 % -[44] Char ठ: 1.25 % -[45] Char ड: 1.25 % -[46] Char ब: 1.25 % -[47] Char च: 1.25 % -[48] Char अ: 1.25 % -[49] Char ण: 1.25 % -[50] Char छ: 1.25 % -[51] Char ़: 1.25 % -[52] Char ऊ: 1.25 % -[53] Char ऐ: 1.25 % -[54] Char ढ: 1.25 % -[55] Char ञ: 1.25 % -[56] Char ओ: 1.25 % -[57] Char ः: 1.25 % -[58] Char ऑ: 1.25 % -[59] Char १: 1.25 % -[60] Char ५: 1.25 % -[61] Char २: 1.25 % -[62] Char ०: 1.25 % -[63] Char ७: 1.25 % +[ 0] Char ा: 8.678918477316644 % +[ 1] Char र: 6.941145420955397 % +[ 2] Char क: 6.453088880666197 % +[ 3] Char ्: 6.20932585864132 % +[ 4] Char े: 5.404960935583168 % +[ 5] Char ि: 4.359087599517779 % +[ 6] Char न: 4.126730246640964 % +[ 7] Char स: 4.0563068708546695 % +[ 8] Char त: 3.6816439017467917 % +[ 9] Char ं: 3.5459694885087885 % +[10] Char ी: 3.479524834518329 % +[11] Char म: 3.342126308502131 % +[12] Char ह: 2.8874910312979516 % +[13] Char य: 2.7628244150284282 % +[14] Char प: 2.517602528345077 % +[15] Char ल: 2.4003628594277804 % +[16] Char व: 2.308719634063694 % +[17] Char ो: 2.187501243350561 % +[18] Char द: 1.9950637324919664 % +[19] Char ज: 1.7482503570902812 % +[20] Char ग: 1.359131365457533 % +[21] Char ै: 1.352367538404612 % +[22] Char ब: 1.3327391775451551 % +[23] Char ु: 1.327168967030985 % +[24] Char ।: 1.0637775841466504 % +[25] Char श: 1.0494541856816413 % +[26] Char अ: 0.8652393665344404 % +[27] Char थ: 0.8141791034878801 % +[28] Char ट: 0.814046479428019 % +[29] Char भ: 0.7987947125439815 % +[30] Char ध: 0.7616599757828466 % +[31] Char ू: 0.7194855247469865 % +[32] Char ष: 0.6787699383695994 % +[33] Char ए: 0.6718734872568172 % +[34] Char च: 0.6639160436651454 % +[35] Char ड: 0.6478685324219408 % +[36] Char इ: 0.5391168033357603 % +[37] Char औ: 0.5350054574800633 % +[38] Char आ: 0.5262522695292243 % +[39] Char उ: 0.47214165310585654 % +[40] Char ण: 0.46763243507057584 % +[41] Char ख: 0.40357501415761843 % +[42] Char ़: 0.3995962923617825 % +[43] Char फ: 0.34336369098063557 % +[44] Char ई: 0.3083509391772799 % +[45] Char ृ: 0.17851198457316936 % +[46] Char ँ: 0.16339284174899305 % +[47] Char ौ: 0.15861837559399 % +[48] Char ओ: 0.13302193204077925 % +[49] Char ॉ: 0.128114841825915 % +[50] Char छ: 0.12652335310758064 % +[51] Char ठ: 0.11631130049826859 % +[52] Char घ: 0.11604605237854619 % +[53] Char १: 0.11127158622354315 % +[54] Char ०: 0.09429570656131012 % +[55] Char २: 0.05822196227906489 % +[56] Char ढ: 0.0547737367226738 % +[57] Char ञ: 0.05464111266281261 % +[58] Char ९: 0.05411061642336783 % +[59] Char ५: 0.044826932233084135 % +[60] Char झ: 0.043765939754194565 % +[61] Char ऐ: 0.03660424052169001 % +[62] Char ॰: 0.036206368342106415 % +[63] Char ऑ: 0.03607374428224522 % +[64] Char ६: 0.03607374428224522 % +[65] Char ८: 0.035012751803355655 % +[66] Char ४: 0.0336865112047437 % +[67] Char ७: 0.03328863902516011 % +[68] Char ३: 0.03156452624696457 % +[69] Char ः: 0.030503533768075 % +[70] Char ऊ: 0.02785105257085109 % +[71] Char ऋ: 0.013262405986119566 % +[72] Char ऍ: 0.004111345855697065 % +[73] Char ॅ: 0.002387233077501522 % +[74] Char ळ: 0.0013262405986119567 % +[75] Char ॥: 0.0006631202993059783 % +[76] Char ॠ: 0.0005304962394447826 % +[77] Char ॄ: 0.000397872179583587 % +[78] Char ॆ: 0.000397872179583587 % +[79] Char ङ: 0.000397872179583587 % +[80] Char ऽ: 0.0002652481197223913 % +[81] Char ॓: 0.0002652481197223913 % +[82] Char ऎ: 0.00013262405986119564 % +[83] Char ॑: 0.00013262405986119564 % +[84] Char ॊ: 0.00013262405986119564 % +[85] Char ऱ: 0.00013262405986119564 % -The first 64 characters have an accumulated ratio of 0.7999999999999992. +The first 86 characters have an accumulated ratio of 0.9999999999999999. +The first 0 characters have an accumulated ratio of 0. +All characters whose order is over 41 have an accumulated ratio of 0.030491597602687492. -2113 sequences found. +2165 sequences found. -First 1356 (typical positive ratio): 0.9950083796268726 -Next 397 (1753-1356): 0.00399414702204226 -Rest: 0.000997473351085132 +First 1351 (typical positive ratio): 0.9950085486567504 +Next 387 (1738-1351): 0.003992816717832359 +Rest: 0.0009986346254172718 -- Processing end: 2021-03-19 22:31:02.178353 +- Processing end: 2022-12-14 18:09:07.547746 diff --git a/script/BuildLangModelLogs/LangHungarianModel.log b/script/BuildLangModelLogs/LangHungarianModel.log index 1e4ed44..6471763 100644 --- a/script/BuildLangModelLogs/LangHungarianModel.log +++ b/script/BuildLangModelLogs/LangHungarianModel.log @@ -1,157 +1,242 @@ = Logs of language model for Hungarian (hu) = - Generated by BuildLangModel.py -- Started: 2021-03-16 19:18:56.191449 +- Started: 2022-12-14 18:27:14.157910 - Maximum depth: 4 -- Max number of pages: 100 +- Max number of pages: 200 == Parsed pages == Kezdőlap (revision 21016160) -1621 (revision 19051984) -1771 (revision 21393041) -1821 (revision 23636828) -1831 (revision 22164941) -1848–49-es forradalom és szabadságharc (revision 23685544) -1858 (revision 22166952) -1871 (revision 23533908) -1921 (revision 23662365) -1924 (revision 23246889) -1941 (revision 23564803) -1946 (revision 23682260) -1971 (revision 23593882) -2003 (revision 23647007) -2021 (revision 23686129) -A Nyugat lánya (revision 21595643) -A magyar zászló és címer napja (revision 23134982) -A párizsi Notre-Dame (revision 23521460) -Abja-Paluoja (revision 23589245) -Antoine-Jean Gros (revision 23684575) -Arina Szjarhejevna Szabalenka (revision 23614779) -Aun Szan Szu Kji (revision 23588064) -Barbora Krejčíková (revision 23683559) -Bebe Daniels (revision 23684693) -Berlini Nemzetközi Filmfesztivál (revision 23601008) -Bohémélet (revision 23533579) -Borel–Lebesgue-tétel (revision 20175177) -Brüsszel (revision 23681873) -Claude Debussy (revision 23666304) -Covid19-koronavírus-járvány Magyarországon (revision 23684719) -Covid19-pandémia (revision 23672833) -Császár Angela (revision 23405485) -December 22. (revision 23636644) -EastEnders (revision 23674461) -Eigel Ernő (revision 23678820) -Elise Mertens (revision 23668277) -Első világháború (revision 23681284) -Enciklopédia (revision 23257786) -Fahd szaúdi király (revision 23684688) -Filip Polášek (revision 20343200) -Finnugor Kulturális Főváros (revision 23593480) -Georg Neumark (revision 23419386) -Giacomo Puccini (revision 23685245) -Gianni Schicchi (revision 21500522) -Gonda János (revision 23672147) -Halálozások 2021-ben (revision 23686337) -Heine-tétel (revision 15274788) -Heller Bernát (revision 21796754) -Henrietta (keresztnév) (revision 23599183) -Hmelnickiji terület (revision 21540657) -Ivan Dodig (revision 19700630) -Jankovics József (revision 23686084) -Jean Frydman (revision 23684355) -Jászai Mari-díj (revision 23683756) -Katona Gyula (matematikus) (revision 23651033) -Kew Gardens (revision 23635430) -Klasszikus gitár (revision 23640016) -Kombinatorika (revision 23457078) -Kurucz György (motorversenyző) (revision 23682502) -Landerer Lajos (revision 20960777) -Lucca (revision 22052809) -Lítium (revision 23671148) -Magyar Tudományos Akadémia (revision 23644040) -Magyar Wikipédia (revision 23672081) -Magyar nyelv (revision 21426463) -Magyarország (revision 23674944) -Magyarország címere (revision 23623029) -Magyarország nemzetiségei (revision 23600289) -Magyarország zászlaja (revision 23056847) -Mars (bolygó) (revision 23667637) -Mercury–Atlas–6 (revision 23639047) -Mianmar (revision 23673840) -Michael Jordan (revision 23621635) -Michal Polák (revision 23684810) -Mustárgáz (revision 23682684) -Március 16. (revision 23685754) -NASA (revision 23080317) -Nagy Dénes (filmrendező) (revision 23656475) -Newbery Medal (revision 23594588) -Nobel-békedíj (revision 23517207) -Novak Đoković (revision 23685551) -November 29. (revision 23652299) -Nyílt tartalom (revision 22335123) -Olaszország (revision 23657820) -Országos Rabbiképző – Zsidó Egyetem (revision 23624676) -Perseverance (revision 23666738) -Pillangókisasszony (revision 23430673) -Pánczél Lajos (revision 23532037) -Rajeev Ram (revision 23615665) -Richard Strauss (revision 23674657) -Révész László László (revision 23685649) -Spinosaurus (revision 23680682) -Szomszédok (revision 23682854) -Széchenyi-díj (revision 23683451) -Természetes fény (film) (revision 22147530) -Tiltott Város (revision 23663664) -Tosca (revision 23580069) -Ukrajna (revision 23683387) +World Chess Hall of Fame (revision 25593350) +Soproni népszavazás (revision 24780559) +1717 (revision 18910115) +December 10. (revision 24780074) +Hermann Nuber (revision 25593806) +Amerikai Egyesült Államok (revision 25588859) +Holdkomp (revision 25503795) +1962 (revision 25469682) +Fizikai Nobel-díj (revision 25524617) +1842 (revision 22164991) +Fekete dió (revision 25365402) +Gata Kamsky (revision 25593352) +Northeast Corridor (revision 24785538) +Április 12. (revision 25245433) +Victor Franz Hess (revision 21392942) +December 5. (revision 25579582) +1720 (revision 20323891) +Karl-Heinz Schnellinger (revision 19736536) +Március 26. (revision 25571341) +Február 23. (revision 24726044) +Georg Stollenwerk (revision 24541718) +Edgar Mitchell (revision 24527824) +Hans Cieslarczyk (revision 22742075) +Integrált katalógustár (revision 22941517) +Augusztus 24. (revision 25382451) +Október 10. (revision 25450877) +Aage Niels Bohr (revision 21379467) +Yukon (folyó) (revision 25343512) +1923 (revision 25236748) +2010-es évek (revision 24750677) +Springfield (Illinois) (revision 25583700) +Június 26. (revision 25049350) +Január 22. (revision 25289685) +1720-as évek (revision 14636336) +Fertőboz (revision 25319685) +November 12. (revision 25473030) +1962 a vasúti közlekedésben (revision 25468780) +Február 25. (revision 25598651) +Ortodox kereszténység (revision 25283530) +A fajok tudományos neve (revision 25593231) +Május 1. (revision 25388874) +Nem fenyegetett faj (revision 25534389) +Bernhard Klodt (revision 22812912) +1774 (revision 19760097) +Dió (faanyag) (revision 25574623) +Köpeczi Béla (revision 25504643) +Július 15. (revision 25580122) +Jézus Társasága (revision 25587951) +Nagy-Magyarország (revision 23834785) +1847 (revision 24629327) +Uwe Seeler (revision 25522695) +Napelem (revision 25566245) +Montana (állam) (revision 25485620) +1915 (revision 25197857) +Vaskeresztes (revision 25571798) +David J. Wineland (revision 24952192) +Flea (revision 25177576) +Kongresszusi Könyvtár (revision 23872247) +Február 26. (revision 25585428) +1901 a tudományban (revision 25104317) +Eurosids I (revision 25369224) +Eugene Cernan (revision 24059330) +Április 4. (revision 25601269) +1963 (revision 25322431) +1928 (revision 25573809) +Encyclopedia of Life (revision 20346925) +Sziklás-hegységi vapiti (revision 24810042) +Március 29. (revision 25538925) +Augusztus 1. (revision 25463746) +Carl Schlechter (revision 25593337) +Csapatkapitány (labdarúgás) (revision 23309932) +KGB (revision 25540813) +Friendship 7 (revision 25087269) +Július 17. (revision 25438188) +Csehszlovákia (revision 25579956) +Kickers Offenbach (revision 22500277) +Svéd Nemzeti Könyvtár (revision 25172138) +Pont (geometria) (revision 22470000) +Vörös lista (revision 25490853) +Herczeg Ferenc (író) (revision 25186273) +Május 13. (revision 25565946) +Világkereskedelmi Központ (revision 25468488) +1773 (revision 22506342) +Berber naptár (revision 23527855) +December 9. (revision 25337984) +Eördögh István (revision 24787391) +Alfred Schmidt (labdarúgó) (revision 18132126) +Dzsucse-naptár (revision 24065456) +Viktor Lvovics Korcsnoj (revision 25385907) +1957 (revision 24908642) +2011 (revision 25584698) +Japán (revision 25495017) +Toulouse (revision 25383155) +21. század (revision 25514145) +Charles Glover Barkla (revision 25048899) +Heinz Wewers (revision 20183927) +1783 (revision 23373247) +Egylaki (revision 25554962) +Magyar Revíziós Liga (revision 24654977) +Hátvéd (revision 24332639) +George Paget Thomson (revision 25049110) +Carl von Linné (revision 25481141) +Apollo–14 (revision 25179663) +1921 (revision 25577885) +Alekszandr Alekszandrovics Aljechin (revision 25489078) +Maia Csiburdanidze (revision 23849588) +Január 10. (revision 25379147) +1931 a tudományban (revision 25371616) +Augusztus 22. (revision 25382444) +Hindu naptár (revision 21328275) +Tömegközéppont (revision 24649352) +Névnap (revision 25257763) +1911 a tudományban (revision 20986691) +Aaron Nimzowitsch (revision 24666050) +Antigua és Barbuda (revision 25521675) +Kőszeg (revision 25575915) +1906 a tudományban (revision 22891535) +Évtized (revision 23405214) +1935 (revision 25551622) +Élő Árpád (revision 25390757) +Paul Keres (revision 25496518) +Furnér (revision 25537914) +December 11. (revision 25564942) +Kercaszomor (revision 24777431) +Május 10. (revision 25599956) +Vas (revision 25523573) +November 16. (revision 24780301) +U Thant (revision 25470688) +Teleki Pál (politikus) (revision 25590065) +Labdarúgó-világbajnokság (revision 25599349) +November 25. (revision 25533710) +Rönök (revision 25571486) +Barsi Márton (revision 25073301) +Nagytilaj (revision 25278066) +John Frusciante (revision 25021792) +Bulgária történelme (revision 23899925) +Mersevát (revision 23571517) +1886 (revision 25571255) +Magyar Katolikus Egyház (revision 25582598) +Horthy Miklós (kormányzó) (revision 25581360) +1781 (revision 19880763) +1882 (revision 25577810) +Luigi Cadorna (revision 24319357) +Budapesti Corvinus Egyetem (revision 25516942) +Nagybecskerek (revision 25358435) +December 16. (revision 25599587) +Nemzetközi Virtuális Katalógustár (revision 23032870) +Sepp Herberger (revision 24594437) +Union List of Artist Names (revision 22813546) +1909 a tudományban (revision 20844880) +Tengerszint feletti magasság (revision 25249902) +2014 (revision 25384048) +Ion Luca Caragiale (revision 25281609) +Titkosszolgálat (revision 24668132) +Kihalt növényfajok listája (revision 24650666) +1996 (revision 25532986) +Szeptember 4. (revision 25234305) +Lőrincz Márton (revision 25397376) +Július 1. (revision 25498542) +Rudi Hoffmann (revision 24588722) +Június 9. (revision 25589565) +Líbia (revision 25560538) +Orosz–török háború (1877–78) (revision 24153662) +Ónin-háború (revision 23998199) +Szeptember 16. (revision 25282517) +1860 (revision 25571258) +Paulo Vinícius (revision 25557514) +Sziget (revision 23768646) +Zsidó naptár (revision 24606083) +Január 30. (revision 25097760) +Languedoc-Roussillon (revision 25507545) +1980 (revision 25477733) +Palóczy László (revision 25083936) +1994 (revision 25534746) +Japán naptár (revision 24854752) +Német labdarúgó-bajnokság (első osztály) (revision 25357691) +Október 8. (revision 25500223) +Március 9. (revision 25535781) +Menyhért (revision 24242885) +Április 10. (revision 25232541) == End of Parsed pages == -- Wikipedia parsing ended at: 2021-03-16 19:23:30.841696 +- Wikipedia parsing ended at: 2022-12-14 18:30:39.652337 -56 characters appeared 1168905 times. +61 characters appeared 1465986 times. -First 32 characters: -[ 0] Char e: 9.498462236024313 % -[ 1] Char a: 9.06651952040585 % -[ 2] Char t: 7.768381519456244 % -[ 3] Char s: 6.3276314157266835 % -[ 4] Char l: 5.860613137936787 % -[ 5] Char n: 5.5261120450336 % -[ 6] Char r: 5.029493414777077 % -[ 7] Char i: 4.7589838352988485 % -[ 8] Char k: 4.502162280082642 % -[ 9] Char o: 4.291794457205675 % -[10] Char z: 4.136777582438265 % -[11] Char á: 3.7318687147372973 % -[12] Char é: 3.275287555447192 % -[13] Char m: 3.2307159264439798 % -[14] Char g: 3.089215975635317 % -[15] Char b: 2.120103857884088 % -[16] Char d: 2.0372913110988486 % -[17] Char y: 2.0071776577223983 % -[18] Char v: 1.8980156642327648 % -[19] Char u: 1.421757970066002 % -[20] Char h: 1.3363789187316335 % -[21] Char p: 1.229868979942767 % -[22] Char j: 1.147227533460803 % -[23] Char c: 1.0305371266270569 % -[24] Char ö: 1.0298527254139558 % -[25] Char f: 0.9665456132020994 % -[26] Char ó: 0.9550818928826551 % -[27] Char ő: 0.8821931636873827 % -[28] Char í: 0.6613026721589864 % -[29] Char ü: 0.46162861823672585 % -[30] Char ú: 0.293950321026944 % -[31] Char ű: 0.23611841851989682 % +Most Frequent characters: +[ 0] Char e: 9.402409027098486 % +[ 1] Char a: 9.114207093382884 % +[ 2] Char t: 7.237790811099151 % +[ 3] Char s: 6.428983632858704 % +[ 4] Char l: 5.9654048537980575 % +[ 5] Char n: 5.458169450458599 % +[ 6] Char r: 5.10993965835963 % +[ 7] Char i: 4.706252310731481 % +[ 8] Char k: 4.440833677811384 % +[ 9] Char o: 4.25856727144734 % +[10] Char z: 4.175346831415853 % +[11] Char á: 3.6273197697658777 % +[12] Char g: 3.3966900093179606 % +[13] Char m: 3.129838893413716 % +[14] Char é: 3.011147446155693 % +[15] Char b: 2.1867193820404833 % +[16] Char d: 2.0097736267604196 % +[17] Char y: 1.9813286075037553 % +[18] Char v: 1.9591592279871703 % +[19] Char u: 1.4463985331374243 % +[20] Char h: 1.4457846118585034 % +[21] Char p: 1.3751154513071748 % +[22] Char j: 1.2652235423803502 % +[23] Char c: 1.0636527224680181 % +[24] Char ó: 1.0247710414697002 % +[25] Char f: 1.0219742889768388 % +[26] Char ö: 0.9963260222130361 % +[27] Char ő: 0.8993946736189841 % +[28] Char í: 0.6218340420713431 % +[29] Char ü: 0.4633741386343389 % +[30] Char ú: 0.3121448635935132 % +[31] Char ű: 0.23056154697248132 % -The first 32 characters have an accumulated ratio of 0.998090520615448. +The first 32 characters have an accumulated ratio of 0.9976643706010834. +The first 5 characters have an accumulated ratio of 0.3814879541823728. +All characters whose order is over 25 have an accumulated ratio of 0.03523635287103696. -1122 sequences found. +1249 sequences found. -First 512 (typical positive ratio): 0.9736098834669349 -Next 512 (512-1024): 0.0023611841851989683 -Rest: 0.00010464608288375879 +First 728 (typical positive ratio): 0.9950146122040027 +Next 170 (898-728): 0.003990738185399856 +Rest: 0.0009946496105974756 -- Processing end: 2021-03-16 19:23:30.943714 +- Processing end: 2022-12-14 18:30:39.738789 diff --git a/script/BuildLangModelLogs/LangIrishModel.log b/script/BuildLangModelLogs/LangIrishModel.log index 37e867d..75a3db2 100644 --- a/script/BuildLangModelLogs/LangIrishModel.log +++ b/script/BuildLangModelLogs/LangIrishModel.log @@ -1,158 +1,221 @@ = Logs of language model for Irish (ga) = - Generated by BuildLangModel.py -- Started: 2021-03-16 19:06:31.364099 +- Started: 2022-12-14 18:05:56.518022 - Maximum depth: 4 -- Max number of pages: 100 +- Max number of pages: 200 == Parsed pages == Tracy Caldwell Dyson (revision 972597) -14 Lúnasa (revision 945830) -1969 (revision 950246) -Arcadia (revision 940778) -California (revision 977165) -Ceimic (revision 996644) -Ceimic fhisiciúil (revision 927461) -Ceimiceoir (revision 927503) -Fisiceoir (revision 880864) -IMDb (revision 941231) -Max Q (revision 910451) -Medal "For Merit in Space Exploration" (revision 972605) -NASA (revision 982342) -Ollscoil California, Davis (revision 972597) -Rúisis (revision 990076) -SAM (revision 976971) -Spáinnis (revision 976986) -Spásaire (revision 948727) -Stáisiún Idirnáisiúnta Spáis (revision 810459) -Stáit Aontaithe Mheiriceá (revision 976971) -Tointeálaí spáis (revision 884452) -10 Lúnasa (revision 649045) -11 Lúnasa (revision 855483) -12 Lúnasa (revision 970783) -13 Lúnasa (revision 843084) -1598 (revision 703178) -15 Lúnasa (revision 776986) -16 Lúnasa (revision 956751) -1740 (revision 868712) -1771 (revision 776762) -17 Lúnasa (revision 777131) -1823 (revision 884394) -1832 (revision 870502) -1898 (revision 881354) -18 Lúnasa (revision 777242) -1911 (revision 884923) -1956 (revision 922906) -1962 (revision 948322) -1966 (revision 983105) -1983 (revision 950195) -19 Lúnasa (revision 648524) -1 Lúnasa (revision 970005) -2001 (revision 953347) -2004 (revision 915512) -20 Lúnasa (revision 863369) -21 Lúnasa (revision 987631) -22 Lúnasa (revision 949242) -23 Lúnasa (revision 778453) -24 Lúnasa (revision 855482) -25 Lúnasa (revision 922966) -26 Lúnasa (revision 649051) -27 Lúnasa (revision 855881) -28 Lúnasa (revision 855201) -29 Lúnasa (revision 937884) -2 Lúnasa (revision 949578) -30 Lúnasa (revision 648308) -31 Lúnasa (revision 874664) -3 Lúnasa (revision 954861) -4 Lúnasa (revision 936315) -5 Lúnasa (revision 946408) -6 Lúnasa (revision 936316) -7 Lúnasa (revision 936317) -8 Lúnasa (revision 648745) -9 Lúnasa (revision 868992) -AK Parti (revision 980611) -An Phacastáin (revision 975474) -An Tuirc (revision 975987) -Aoidh Uí Néill (revision 945830) -Aoine (revision 871416) -Bertolt Brecht (revision 996168) -Czesław Miłosz (revision 968559) -Céadaoin (revision 841385) -Dan Boyle (revision 981683) -Domhnach (revision 717663) -Déardaoin (revision 841384) -Féilire (revision 648837) -Halle Berry (revision 916135) -Henry Bagenal (revision 936900) -Iúil (revision 931127) -Luan (revision 717791) -Lúnasa (revision 970011) -Meán Fómhair (revision 931128) -Mila Kunis (revision 916248) -Pápa Pius VII (revision 972523) -Satharn (revision 717929) +Tointeálaí spáis (revision 1049998) +Spásaire (revision 1066024) +Ceimiceoir (revision 1069325) +SAM (revision 1117044) +Fisiceoir (revision 1070391) +Stáit Aontaithe Mheiriceá (revision 1117044) +Rúisis (revision 1106700) +Ceimic (revision 1118628) +IMDb (revision 1120126) +14 Lúnasa (revision 1096367) +Príomhchathair (revision 1112302) +Dlí Raoult (revision 1000192) +Meath radaighníomhach (revision 1119072) +Comhghuaillithe (revision 1116753) +1929 (revision 1100654) +Vermont (revision 1058965) +Fyodor Dostoyevsky (revision 1118623) +Albert Einstein (revision 1112165) +Pápa Pius VII (revision 1012016) +10ú haois (revision 739954) +2005 (revision 1095195) +Madonna (revision 1114144) +1802 (revision 1120813) +1986 (revision 1116382) +An tAontas Sóivéadach (revision 1012309) +Benjamin Franklin (revision 998375) +Antoine Laurent Lavoisier (revision 1101121) +An Sciath (revision 1107011) +An Laidin (revision 1114200) +Géarchéim airgeadais 2007-2008 (revision 1107877) +Teirmidinimic (revision 814554) +Guglielmo Marconi (revision 1063391) +Ernst Mach (revision 1027300) +Fréamh an Eolais (revision 1024048) +1790 (revision 1095417) +Ionsaí ar Pearl Harbor (revision 1109393) +Cúmánaigh (revision 1111797) +Cogadh Réabhlóideach Mheiriceá (revision 1106269) +Thomas Edison (revision 977995) +Meicsiceo (revision 1105054) +Clement Coughlan (revision 1027709) +Robert Millikan (revision 995498) +Intleacht (revision 1118184) +Eamhnú (revision 685516) +Inneall (revision 656989) +Marie Curie (revision 972274) +Daniel Gabriel Fahrenheit (revision 992356) +Fisic (revision 1025283) +Gníomhaireacht Spáis na hEorpa (revision 1118858) +1956 (revision 1120816) +Bratach Stáit Aontaithe Mheiriceá (revision 885909) +Ór (revision 1034373) Walter Scott (revision 973708) -Áth Buí (revision 923034) -10 Bealtaine (revision 974318) -11 Feabhra (revision 885848) -11 Meitheamh (revision 937886) -11 Márta (revision 956107) -11 Nollaig (revision 949777) -13 Eanáir (revision 952269) -14 Eanáir (revision 952327) -15 Meitheamh (revision 770401) -16 Nollaig (revision 922996) -17 Meán Fómhair (revision 974321) -17 Márta (revision 959908) -1882 (revision 894229) -1886 (revision 876620) +Radaighníomhaíocht (revision 1119072) +Spásárthach (revision 758622) +Conradh na Náisiún (revision 1108801) +Niels Bohr (revision 1101167) +Oklahoma (revision 980194) +Liosta Institiúidí Pleanála Teanga ar fud an Domhain (revision 652223) +21 Lúnasa (revision 1096218) +Giúdachas (revision 1057667) +Stáisiún spáis (revision 823620) +Missouri (revision 1109999) +1958 (revision 1095248) +Titanic (scannán 1997) (revision 1073341) +An tAigéan Ciúin (revision 1110855) +3 Lúnasa (revision 1096165) +Henri Becquerel (revision 1056324) +Tony Scannell (revision 971451) +13 Lúnasa (revision 1106170) +Antoine Lavoisier (revision 1101121) +Discovery (revision 1070352) +28 Lúnasa (revision 1096170) +29 Lúnasa (revision 1094422) +1981 (revision 1100770) +Bunreacht na Stát Aontaithe (revision 1089293) +Tír (revision 1048967) +11 Lúnasa (revision 1095733) +An Téalainn (revision 997128) +An Ríocht Aontaithe (revision 1119694) +9 Lúnasa (revision 1096168) +Conradh Versailles (revision 1085221) +Pennsylvania (revision 1058964) +Harold Macmillan (revision 1030157) +19ú haois (revision 1083522) +Creideamh (revision 1049197) +1996 (revision 1100768) +Nicearagua (revision 1106117) +An Bheilg (revision 1112792) +Sean-Ghréigis (revision 1054688) +Gaeilge (revision 1113730) +Síocháin (revision 1039465) +1945 (revision 1120818) +Réabhlóid na Feabhra (revision 1068209) +Aigéad hidreaclórach (revision 1076385) +Poblacht na hÉireann (revision 1118646) +25 Lúnasa (revision 1089322) +Gnó (revision 1025310) +27 Lúnasa (revision 1096378) +8 Nollaig (revision 1120310) +Pápa Pius XI (revision 1059455) +Stair (revision 996477) +Contae Chorcaí (revision 1108805) +Veirgil (revision 973705) +Gabriel Fahrenheit (revision 992356) +Lárionad Taighde na dTeangacha Dúchasacha (An Fhionlainn) (revision 832954) +931 (revision 700763) +Dia (revision 1113800) +Lucht dóiteáin (revision 659094) +Aigéan (revision 1106787) +Breitheamh (revision 1067256) +An Bheilís (revision 975457) +Teocht (revision 1094881) +Benoît Fourneyron (revision 1034033) +S. Scott Bullock (revision 1072412) +1830 (revision 1095380) +Raidió (revision 1110288) +Georgia (stát S.A.M.) (revision 999135) +Aoine (revision 1051861) +Vicipéid (revision 1117521) +1780í (revision 1047267) +7 Lúnasa (revision 1096108) +1920í (revision 1047088) +An Fhionlainn (revision 1100573) +1942 (revision 1095266) +Poblacht (revision 828806) +Coilíneachas (revision 897710) +Caitliceachas (revision 1115927) +An Ghinéiv (revision 1059159) +Impireacht na Rúise (revision 1116927) +Ráta úis (revision 1110345) +Ciara Conway (revision 1022483) +1930í (revision 740221) +Dlí (revision 1025297) +Breatnais (revision 1120118) +1971 (revision 1120763) +John Lydon (revision 979043) +An India (revision 1119349) +1995 (revision 1120752) +An Seiseamhán (revision 1107012) +Fionlainnis (revision 1113969) +Leathré (revision 1094786) +Adam Mickiewicz (revision 1059506) +Morelos (revision 1008223) +An Chríostaíocht (revision 1111524) +Chadwick Boseman (revision 1026926) +Querétaro (revision 982615) +1913 (revision 1095297) +Cumadóir (revision 797877) +Idaho (revision 986069) +Panthéon (revision 1101065) +Hélène Carrère d'Encausse (revision 1097053) +An Domhan (revision 1070377) +Iriseoir (revision 1109397) +Pearl Harbor (revision 1116761) +8 Lúnasa (revision 1100776) +5 Feabhra (revision 1096085) +Aisteoir (revision 1058523) +Enid Blyton (revision 1020472) +Feithicil armúrtha troda (revision 685853) == End of Parsed pages == -- Wikipedia parsing ended at: 2021-03-16 19:09:36.532359 +- Wikipedia parsing ended at: 2022-12-14 18:08:23.899888 -42 characters appeared 213560 times. +50 characters appeared 510350 times. -First 31 characters: -[ 0] Char a: 15.363832178310547 % -[ 1] Char i: 10.505712680277206 % -[ 2] Char n: 8.10825997377786 % -[ 3] Char h: 7.447087469563589 % -[ 4] Char r: 6.299868889305113 % -[ 5] Char e: 6.046076044203034 % -[ 6] Char s: 5.528657051882375 % -[ 7] Char t: 4.9690953362052825 % -[ 8] Char c: 4.70593744146844 % -[ 9] Char l: 4.132328151339202 % -[10] Char o: 3.9469001685708935 % -[11] Char d: 3.2154897920958985 % -[12] Char g: 2.7795467315976774 % -[13] Char m: 2.6760629331335455 % -[14] Char á: 2.228413560591871 % -[15] Char u: 2.17550103015546 % -[16] Char b: 2.0130174189923205 % -[17] Char í: 1.7522007866641691 % -[18] Char é: 1.2207342198913653 % -[19] Char f: 1.1186551788724481 % -[20] Char ú: 1.0039333208466004 % -[21] Char ó: 0.8967035025285635 % -[22] Char p: 0.8475369919460574 % -[23] Char y: 0.2289754635699569 % -[24] Char v: 0.22101517138040833 % -[25] Char k: 0.17606293313354562 % -[26] Char w: 0.16295186364487732 % -[27] Char j: 0.09271399138415433 % -[28] Char z: 0.06836486233377037 % -[29] Char x: 0.03511893613036149 % -[30] Char q: 0.01311106948866829 % +Most Frequent characters: +[ 0] Char a: 15.64201038502988 % +[ 1] Char i: 10.403056725776427 % +[ 2] Char n: 8.308023905163124 % +[ 3] Char h: 7.609875575585383 % +[ 4] Char r: 6.313118448123836 % +[ 5] Char e: 6.009797198001372 % +[ 6] Char s: 5.175859704124621 % +[ 7] Char t: 4.867052023121388 % +[ 8] Char c: 4.800039188792005 % +[ 9] Char o: 4.058391300088174 % +[10] Char l: 4.055256196727735 % +[11] Char d: 3.1166846281963356 % +[12] Char g: 2.863329087880866 % +[13] Char m: 2.718722445380621 % +[14] Char u: 2.1199177035367884 % +[15] Char b: 1.9925541295189575 % +[16] Char á: 1.8597041246203587 % +[17] Char í: 1.8483393749387673 % +[18] Char é: 1.3028313902223965 % +[19] Char f: 1.1746840403644558 % +[20] Char ó: 0.9724698736161459 % +[21] Char ú: 0.954834917213677 % +[22] Char p: 0.8300186146762025 % +[23] Char k: 0.22847065739198588 % +[24] Char v: 0.2212207308709709 % +[25] Char y: 0.21612618791025767 % +[26] Char w: 0.11227588909571862 % +[27] Char j: 0.0944449887332223 % +[28] Char z: 0.05310081316743412 % +[29] Char x: 0.028215930243950228 % +[30] Char q: 0.017830900362496325 % -The first 31 characters have an accumulated ratio of 0.9997986514328528. +The first 31 characters have an accumulated ratio of 0.9996825707847552. +The first 3 characters have an accumulated ratio of 0.3435309101596943. +All characters whose order is over 19 have an accumulated ratio of 0.03729009503282062. -707 sequences found. +853 sequences found. -First 512 (typical positive ratio): 0.9976732191628278 -Next 512 (512-1024): 0.010039333208466004 -Rest: -3.5561831257524545e-17 +First 461 (typical positive ratio): 0.995039617055503 +Next 163 (624-461): 0.003960483178947816 +Rest: 0.0009998997655491504 -- Processing end: 2021-03-16 19:09:36.580170 +- Processing end: 2022-12-14 18:08:23.948646 diff --git a/script/BuildLangModelLogs/LangItalianModel.log b/script/BuildLangModelLogs/LangItalianModel.log index fa16c78..925fa23 100644 --- a/script/BuildLangModelLogs/LangItalianModel.log +++ b/script/BuildLangModelLogs/LangItalianModel.log @@ -1,162 +1,262 @@ = Logs of language model for Italian (it) = - Generated by BuildLangModel.py -- Started: 2021-03-16 01:25:53.681909 +- Started: 2022-12-14 18:06:10.141198 - Maximum depth: 4 -- Max number of pages: 100 +- Max number of pages: 200 == Parsed pages == -Pieve Ligure (revision 118508492) -010 (prefisso) (revision 94383168) -AMT (Genova) (revision 118888771) -Abbazia di San Colombano (revision 119100076) -Abbazia di San Fruttuoso (revision 119098176) -Acacia dealbata (revision 118537500) -Affresco (revision 119234348) -Agenzia nazionale per le nuove tecnologie, l'energia e lo sviluppo economico sostenibile (revision 119261985) -Agricoltura (revision 119211593) -Altitudine (revision 118983270) -Antica Roma (revision 118468482) -Anton Maria Maragliano (revision 116868790) -Appennino Ligure (revision 117194376) -Arcidiocesi di Genova (revision 119158953) -Area (revision 118021697) -Area naturale marina protetta Portofino (revision 117836953) -Arenzano (revision 118507675) -Austria (revision 119220244) -Avegno (revision 118656626) -Bargagli (revision 118656627) -Batteria di Punta Chiappa (revision 118356835) -Battesimo (revision 118993799) -Bogliasco (revision 118656629) -Bogliasco Pieve (revision 118656629) -Borzonasca (revision 118854360) -Busalla (revision 118656635) -Calcio (sport) (revision 118995232) -Calcio a 5 (revision 118431165) -Camogli (revision 118850151) -Campo Ligure (revision 119083085) -Campomorone (revision 119226877) -Cantiere navale (revision 115540115) -Carabinieri (revision 119285803) -Carasco (revision 118801735) -Caravella (revision 118751709) -Casarza Ligure (revision 118656643) -Casella (Italia) (revision 118797269) -Castello della Dragonara (revision 108868054) -Castiglione Chiavarese (revision 118656646) -Centrismo (revision 117397211) -Centro-destra (revision 117992364) -Centrolabrus melanocercus (revision 116914326) -Ceranesi (revision 118656648) -Cesare Lanza (revision 115376996) -Chiavari (revision 119146951) -Chiesa di San Michele Arcangelo (Pieve Ligure) (revision 119097578) -Chiesa di Santa Croce (Pieve Ligure) (revision 119097599) -Chilometro quadrato (revision 116585233) -Cicagna (revision 118656655) -Circondario di Genova (revision 113691033) -Città dell'olio (revision 118165836) -Città metropolitana di Genova (revision 119014943) -Città metropolitane d'Italia (revision 119240923) -Classificazione climatica dei comuni italiani (revision 118213893) -Classificazione sismica dell'Italia (revision 118461862) -Claudio Burlando (revision 119123207) -Codice catastale (revision 116588085) -Codice postale (revision 105346722) -Cogoleto (revision 118508042) -Cogorno (revision 118962627) -Compagnia di Gesù (revision 119271066) -Comune (Italia) (revision 118913656) -Comune medievale (revision 113420512) -Comuni d'Italia (revision 119120484) -Comuni della Liguria (revision 113527316) -Comunità montana Fontanabuona (revision 105560751) -Concilio di Trento (revision 118571991) -Congresso di Vienna (revision 118881415) -Coordinate geografiche (revision 118353691) -Corallo (revision 117035534) -Coreglia Ligure (revision 118656657) -Corona (copricapo) (revision 117780990) -Cristo degli abissi (revision 117435230) -Cristoforo Colombo (revision 119014639) -Croce (revision 117653124) -Crocefieschi (revision 118656658) -Crêuza (revision 119275449) -Davagna (revision 118656659) -Decreto del presidente della Repubblica (revision 119120849) -Democrazia Cristiana (revision 119162011) -Densità di popolazione (revision 119143170) -Dipartimento di Genova (revision 118450361) -Ebano (revision 116535223) -Erba sintetica (revision 114157150) -Etnico (onomastica) (revision 117289144) -Fascia (Italia) (revision 118955929) -Favale di Malvaro (revision 118656662) -Federico Barbarossa (revision 118793984) -Fermata ferroviaria (revision 119085486) -Ferrovia Genova-Pisa (revision 119025272) -Flora (revision 110652725) +Pieve Ligure (revision 130491730) +Saraceni (revision 130967096) +Servizio bibliotecario nazionale (revision 128634394) +Nuoto (revision 130784528) +Mele (Italia) (revision 130491516) +Bargagli (revision 130488122) +Propata (revision 130950866) +Hippocampus hippocampus (revision 128855725) +UTC+1 (revision 130216176) +Vobbia (revision 130492897) Floricoltura (revision 113487805) -Fontanigorda (revision 118803588) -Francesco Bossi (vescovo) (revision 117422608) -Frazione (geografia) (revision 119001222) -Fuso orario (revision 119022172) -Galleria (ingegneria) (revision 115407813) -Gas (revision 117414169) -Genova (revision 119208791) -Germania nazista (revision 119177156) -Giacomo il Maggiore (revision 118986303) +Napoleone Bonaparte (revision 130788600) +Associazione Sportiva Dilettantistica Bogliasco 1951 (revision 130333359) +Parco naturale regionale dell'Antola (revision 130931849) +1870 (revision 130372248) +1902 (revision 125027382) +Tennistavolo (revision 130771668) +WorldCat (revision 121980054) +Unione internazionale per la conservazione della natura (revision 130579833) +UTC+12 (revision 128054194) +Camerun (revision 130909445) +1982 (revision 130508604) +Rossiglione (Italia) (revision 130492121) +Regno (biologia) (revision 123949532) +Gemeinsame Normdatei (revision 130357700) +Pisa (revision 130953626) +Gorreto (revision 128865878) +Davagna (revision 130502042) +Città metropolitana di Genova (revision 130921453) +Anagrafe delle biblioteche italiane (revision 116281562) +Porpora (araldica) (revision 119872811) +Talea (revision 130808292) +Konrad Lorenz (revision 130114944) +Abbasidi (revision 129852637) +Bob (revision 130600755) +Montebruno (revision 130831738) +Abbazia di San Colombano (revision 130962956) +Pieve di Santa Maria Assunta (Bargagli) (revision 120699826) +Regno di Sardegna (1720-1861) (revision 130821005) +Henri Gatien Bertrand (revision 129713027) +Marocco (revision 130976656) +Centro Sportivo del Plebiscito Padova (revision 130265177) +Circondario di Genova (revision 113691033) +Neirone (revision 130524386) +Enciclopedia Britannica (revision 130474217) +Libia (revision 130922132) +Biathlon (revision 127467608) +Nomenclatura binomiale (revision 129036915) +Assedio di Tolone (1793) (revision 129912216) +Foce (Genova) (revision 128853193) +Anni 2010 (revision 130786165) +Cavalleria (revision 129435616) +Egitto (revision 130811280) +Puglia (revision 130967898) +Ischia (isola) (revision 130923064) +Monte Saraceno (Gargano) (revision 117548357) +Sestri Levante (revision 130753554) +Spagna (revision 130944960) +Squadra calcistica (revision 126640189) +Classificazione sismica dell'Italia (revision 130010934) +Olexandr Sadovyy (revision 126430322) +Età della pietra (revision 129628152) +Sceneggiato televisivo (revision 130814125) +Serra (revision 128743740) +Merovingi (revision 130719285) +Jonathan Del Galdo (revision 130511805) +Corrado II il Salico (revision 130323015) +Fascia (Italia) (revision 130488511) +Italia (revision 130967157) +Miglioramento genetico (revision 116201872) +846 (revision 117898042) +Storia della Corsica (revision 130939295) +Hippocampus (revision 130234158) +Germania (revision 130977070) +Hippocampus guttulatus (revision 128855778) +Portogallo (revision 130882014) +Partito Comunista Italiano (revision 130902795) +Cimitero monumentale di Staglieno (revision 129723258) +Liguria (revision 130931853) +Leivi (revision 130491448) +UNIMARC (revision 122719522) +Osservatorio astronomico regionale Parco Antola (revision 130931849) +Guinea Equatoriale (revision 130703184) +Bellagio (revision 130801761) +1633 (revision 117897530) +Animalia (revision 130961611) +Ordine di San Giuseppe (revision 126692776) +Serie A1 (pallanuoto femminile) (revision 129489313) +Ardesia (revision 130370291) +Angela Vinay (revision 130450983) +Toscana (revision 130973064) +Nuovo soggettario (revision 130799248) +Economia agraria (revision 130687019) +Ministero dell'interno (revision 130247524) +Rizoma (revision 129405738) +Nigel Nicolson (revision 130457869) +Bruno Migliorini (revision 130545485) +Nuoto di fondo (revision 128156928) +Uscio (revision 128866468) +Canapa (tessile) (revision 130674641) +Catalogo di biblioteca (revision 130913808) +Baiardo (cavallo) (revision 130036699) +Internet Archive (revision 130465900) +Ciad (revision 130966735) +Roma (revision 130964033) +Ucraina (revision 130954991) +Resistenza italiana (revision 129720700) +Monografia (revision 130533875) +UTC-4:30 (revision 110261259) +Golf (revision 130512259) +Strada statale 35 dei Giovi (revision 130498654) +Serie A 1954 (pallanuoto maschile) (revision 121285475) +Margotta (revision 124488424) +Childeberto II (revision 129457314) +UTC+5:45 (revision 110261348) +Serie A 1971 (pallanuoto maschile) (revision 123084377) +Passo del Faiallo (revision 130868911) +Agricoltura (revision 130664371) +Serie A1 2014-2015 (pallanuoto maschile) (revision 127921767) +Oceano Atlantico (revision 130174568) +Library of Congress Control Number (revision 124432708) +Anton Maria Maragliano (revision 127306518) +Edoardo Firpo (revision 120407241) +Golfo di Guinea (revision 105877770) +Pesci (revision 130922409) +Biblioteca Nazionale Centrale di Roma (revision 130883784) +Orero (revision 130491707) +Salata (Vobbia) (revision 121679261) +Famiglia (tassonomia) (revision 125790574) +Galafrone (revision 130036463) +Sindaco (Italia) (revision 130620519) +Portofino (revision 130448740) +Lingua ligure (revision 130648254) +Recco (revision 130550155) +Decreto del presidente della Repubblica (revision 130255311) +Istituto universitario europeo (revision 130730207) +Antoine Le Picard de Phélippeaux (revision 128694490) +Tempo coordinato universale (revision 129902409) +Traetto (insediamento musulmano) (revision 126515178) +San Lorenzo (revision 130456525) +2016 (revision 130154995) +Divisione Nazionale A 1941 (revision 129237507) +Micropropagazione (revision 116795180) +Pianura Padana (revision 130219238) +Tribogna (revision 130492831) +Castello Spinola-Mignacco (revision 124445993) +Ovoviviparità (revision 125010551) +Divisione Nazionale A 1935 (revision 121269974) +Città metropolitane d'Italia (revision 130658538) +Fagus (revision 130188270) +Sarzana (revision 130745855) +Serie A1 1989-1990 (pallanuoto maschile) (revision 123100527) +Gontrano (revision 128720521) +Energia rinnovabile in Spagna (revision 120037315) +Domanda (economia) (revision 127666035) +1991 (revision 128828387) +Libération (revision 123887926) +Semina (revision 127767841) +César Milstein (revision 126114451) +Ghazi (revision 129809539) +Marco Bucci (politico) (revision 130880400) +Stato civile (revision 115761461) +Provincia di Pavia (revision 130528307) +Avannotto (revision 128143398) +Codice postale (revision 129886472) +Rigenerazione (biologia) (revision 105794060) +1739 (revision 115672313) +Direttore sportivo (revision 125375205) +Mezzanego (revision 130491532) +Conflitti arabo-israeliani (revision 130803026) +Maga Melissa (revision 130036793) +Tiglieto (revision 130492776) +Congo (fiume) (revision 129895848) +Pera (revision 130882259) +Freestyle (sci) (revision 130548865) +Società Sportiva Lazio Nuoto (revision 129489808) +Luigi Antonio di Borbone-Condé (revision 130794537) +Cicagna (revision 130488438) +Svezia (revision 130663431) +Qanun (revision 126895709) +Malawi (revision 130654261) +Paolo Giacometti (revision 109685679) +Bagno di Romagna (revision 130856795) +Serie A1 2013-2014 (pallanuoto maschile) (revision 127921207) +Mstislav Rostropovich (revision 130914613) +1985 (revision 130730052) +Soverzene (revision 130273591) +Ostia (città antica) (revision 127960085) +Enrico I di Sassonia (revision 128717835) +Villa La Fonte (revision 127624982) +Serie A1 2006-2007 (pallanuoto femminile) (revision 125748236) +Reggio Emilia (revision 130551003) +Italo Insolera (revision 130479054) +Jacques François Dugommier (revision 129097799) +Neve (revision 125208328) == End of Parsed pages == -- Wikipedia parsing ended at: 2021-03-16 01:31:12.602302 +- Wikipedia parsing ended at: 2022-12-14 18:09:46.154107 -54 characters appeared 1487235 times. +61 characters appeared 3042550 times. -First 34 characters: -[ 0] Char i: 11.700840822062418 % -[ 1] Char e: 11.23655642854021 % -[ 2] Char a: 11.108197426768466 % -[ 3] Char o: 9.061513479712351 % -[ 4] Char n: 7.150383093458666 % -[ 5] Char l: 7.047440384337378 % -[ 6] Char t: 6.5587482812064 % -[ 7] Char r: 6.521363469794619 % -[ 8] Char s: 4.669067094305877 % -[ 9] Char c: 4.495120139049982 % -[10] Char d: 3.939861555167811 % -[11] Char u: 2.7531627483215497 % -[12] Char p: 2.6924460492121285 % -[13] Char m: 2.5125820734450173 % -[14] Char g: 1.9460273594959776 % -[15] Char v: 1.64123356429885 % -[16] Char f: 1.1068862688142762 % -[17] Char b: 1.0097933413347588 % -[18] Char z: 0.9880079476343685 % -[19] Char h: 0.7280624783574889 % -[20] Char q: 0.27574660359660713 % -[21] Char à: 0.2058854182425777 % -[22] Char è: 0.14859790147488458 % -[23] Char ò: 0.10186688721015845 % -[24] Char ù: 0.07302141221797497 % -[25] Char x: 0.06501998675394272 % -[26] Char k: 0.05291699025372587 % -[27] Char y: 0.04471384818135668 % -[28] Char w: 0.04115018810073727 % -[29] Char ì: 0.041015710361845974 % -[30] Char é: 0.024474948478216286 % -[31] Char j: 0.019028600053118707 % -[32] Char ö: 0.006791125814010562 % -[33] Char ó: 0.004505004252858493 % +Most Frequent characters: +[ 0] Char i: 11.854891456179848 % +[ 1] Char e: 11.189495653317119 % +[ 2] Char a: 11.161887232748846 % +[ 3] Char o: 8.833347027986393 % +[ 4] Char n: 7.316330052094461 % +[ 5] Char l: 7.06009761548701 % +[ 6] Char t: 6.632101362344087 % +[ 7] Char r: 6.4523508241442205 % +[ 8] Char s: 4.700826609258681 % +[ 9] Char c: 4.345269592940133 % +[10] Char d: 3.8846033754580858 % +[11] Char u: 2.771753956385269 % +[12] Char p: 2.735567205140425 % +[13] Char m: 2.5239683817850156 % +[14] Char g: 1.940773364447585 % +[15] Char v: 1.5149792115166554 % +[16] Char f: 1.110450115856765 % +[17] Char z: 1.006392664048249 % +[18] Char b: 0.9877241129973213 % +[19] Char h: 0.7594616357989187 % +[20] Char q: 0.26622405547977845 % +[21] Char à: 0.20476245254802714 % +[22] Char è: 0.16505891439746265 % +[23] Char ò: 0.09265254474043155 % +[24] Char k: 0.08660498594928596 % +[25] Char ù: 0.0803273569867381 % +[26] Char x: 0.07053294111846971 % +[27] Char y: 0.06622734219651279 % +[28] Char w: 0.05686019950370577 % +[29] Char ì: 0.03414898686956665 % +[30] Char é: 0.03243989416772116 % +[31] Char j: 0.031289543310709766 % +[32] Char ö: 0.004995809436163744 % +[33] Char ä: 0.003746857077122808 % +[34] Char ü: 0.0032867167343182528 % +[35] Char æ: 0.0030895137302591573 % +[36] Char á: 0.0019720300405909517 % +[37] Char ó: 0.001873428538561404 % -The first 34 characters have an accumulated ratio of 0.9997202863031062. +The first 38 characters have an accumulated ratio of 0.9998836502276052. +The first 3 characters have an accumulated ratio of 0.3420627434224581. +All characters whose order is over 16 have an accumulated ratio of 0.03959671985669916. -921 sequences found. +1168 sequences found. -First 512 (typical positive ratio): 0.9992462827093448 -Next 512 (512-1024): 0.0007302141221797497 -Rest: -2.0166160408230382e-17 +First 312 (typical positive ratio): 0.9950294718278113 +Next 206 (518-312): 0.003975533066303161 +Rest: 0.000994995105885521 -- Processing end: 2021-03-16 01:31:12.679004 +- Processing end: 2022-12-14 18:09:46.236617 diff --git a/script/BuildLangModelLogs/LangKoreanModel.log b/script/BuildLangModelLogs/LangKoreanModel.log index ec5fc0a..9f6b173 100644 --- a/script/BuildLangModelLogs/LangKoreanModel.log +++ b/script/BuildLangModelLogs/LangKoreanModel.log @@ -1,701 +1,1454 @@ = Logs of language model for Korean (ko) = - Generated by BuildLangModel.py -- Started: 2021-03-18 21:59:02.887978 +- Started: 2022-12-14 17:24:10.786242 - Maximum depth: 4 -- Max number of pages: 100 +- Max number of pages: 200 == Parsed pages == -칼리스토_(위성) (revision 28961393) -1610년 (revision 28556414) -1월 7일 (revision 28876047) -2003년 12월 (revision 23976672) -2009년 목성 충돌사건 (revision 27323048) -2010년 목성 충돌사건 (revision 27323048) -2차 충돌구 (revision 28946595) -D형 소행성 (revision 28394092) -GND (식별자) (revision 28475536) -HD 209458 b (revision 28560349) -H 콘드라이트 (revision 28394899) -LCCN (식별자) (revision 19573947) -S/2003 J 10 (revision 25753708) -S/2003 J 12 (revision 26400377) -S/2003 J 16 (revision 28763186) -S/2003 J 2 (revision 25753708) -S/2003 J 23 (revision 25753708) -S/2003 J 4 (revision 25753708) -S/2003 J 9 (revision 26400377) -S/2015 (136472) 1 (revision 25714103) -SMARA (revision 24063296) -VIAF (식별자) (revision 27137907) -WorldCat Identities (식별자) (revision 27521768) -가니메데 (위성) (revision 28631891) -가속도 (revision 28183524) -갈릴레오 (우주선) (revision 27368796) -갈릴레오 갈릴레이 (revision 27826096) -갈릴레이 위성 (revision 28669230) -강착 (revision 26422060) -개주기 함수 (revision 28695336) -겉보기등급 (revision 28040769) -고물 카테나 (revision 23226141) -고전적 카이퍼대 천체 (revision 26313323) -공전 주기 (revision 22619799) -관성 모멘트 (revision 28070982) -구조적 융기 (revision 27745126) -궤도 경사 (revision 25327996) -궤도 공명 (revision 27567384) -궤도 이심률 (revision 26995575) -규산염 (revision 22359319) -규소 (revision 28601546) -규칙 위성 (revision 20613029) -그램 (revision 26373186) -그리스 신화 (revision 28186023) -그리스어 (revision 28615929) -근적외선 (revision 27650322) -근지구 소행성 (revision 28435841) -금성 (revision 28157143) -금성의 대기 (revision 28434028) -기반암 (revision 25928764) -기압 (revision 27417925) -긴반지름 (revision 22347233) -네레이드 (위성) (revision 26499827) -녹는점 (revision 23970478) -뇨르드 (충돌구) (revision 26331003) -뉴 허라이즌스 (revision 28462690) -뉴욕 타임스 (revision 27647761) -능선 (revision 27745126) -다모클레스군 (revision 25456157) -단층애 (revision 19923354) -달 (revision 28918055) -달의 대기 (revision 28533295) -대기권 (revision 28369550) -대기압 (revision 27645730) -대류 (revision 27653974) -대적점 (revision 28897867) -도 (각도) (revision 26551438) -도 (충돌구) (revision 25843212) -디스노미아 (위성) (revision 28921686) -디아 (위성) (revision 26367850) -디오네 (위성) (revision 28934552) -디지털 객체 식별자 (revision 27657635) -라플라스-P (revision 28591454) -레다 (위성) (revision 26682939) -레아 (위성) (revision 28593029) -로픈 (충돌구) (revision 27645834) -리시테아 (위성) (revision 26682938) -림 (충돌구) (revision 27645834) -마그네슘 (revision 27661919) -마이크로미터 (revision 28705367) -마케마케 (왜행성) (revision 26405503) -맨틀 (revision 28942503) -메가클리테 (revision 28907421) -메티스 (위성) (revision 23978022) -명왕성 (revision 28921751) -명왕성의 위성 (revision 28903818) -명왕성족 (revision 27553778) -명왕성형 천체 (revision 27268668) -목성 (revision 28958627) -목성 LI (revision 28891035) -목성 LII (revision 28903781) -목성 LIV (revision 28903781) -목성 LIX (revision 28903781) -목성 LV (revision 28903781) -목성 LVI (revision 28903781) -목성 LXI (revision 28903781) -목성 LXIII (revision 28903781) -목성 LXIV (revision 28903781) -목성 LXIX (revision 28903781) -목성 LXVI (revision 28903781) +칼리스토_(위성) (revision 33615840) +부피 (revision 32168599) +반지름순 태양계 천체 목록 (revision 32916452) +지구 대기권 (revision 33315613) +암권 (revision 33357381) +레다 (위성) (revision 31904343) +자외선 (revision 32594684) +메가클리테 (revision 31905283) +달 (revision 33773397) +해침 (revision 33034604) +해저확장설 (revision 32560079) +12월 20일 (revision 32674366) +융기 (revision 33491364) +하우메아의 위성 (revision 31642105) +시노페 (위성) (revision 32057016) +리터 (revision 31917527) +보이저 계획 (revision 33595553) +다모클레스군 (revision 31496390) +칼리스토 (위성) (revision 33615840) +중력 (revision 33560405) +외핵 (revision 31662483) +유피테르 (revision 33579442) +일산화질소 (revision 33754317) +라이먼 계열 (revision 33331667) +난류 (역학) (revision 31964207) +염화은 (revision 31925302) +순록 (revision 32950046) +반구 (revision 32165510) +산소 (revision 32710308) +엔셀라두스 (revision 33377968) +대류권계면 (revision 33274815) +에우포리에 (위성) (revision 31684040) +직육면체 (revision 32156582) +갤런 (revision 31560310) +중간권 (revision 31686988) +미마스 (위성) (revision 32916462) +적위 (revision 33067713) +독일 (revision 33691863) +아르테미스 (revision 33626000) +회전 타원체 (revision 33056951) +2002 MS4 (revision 31905472) +목성 LI (revision 31954758) +다음 (revision 33667305) +레만 불연속면 (revision 33742600) +극자외선 (revision 32046680) +검버섯 (revision 32888305) +연약권 (revision 32503902) +불규칙 위성 (revision 32039660) +맨틀 대류설 (revision 33491985) +원환체 (revision 32871299) +액체 (revision 33525578) +제우스 (revision 33739525) +10월 12일 (revision 33549994) +명왕성 (revision 33600662) +킬로미터 (revision 33091515) +아드라스테아 (위성) (revision 32916466) +지름 (revision 31680490) +오르트 구름 (revision 32837079) +구 (기하학) (revision 33067121) +일광화상 (revision 32070832) +지구 (revision 33773393) +2 팔라스 (revision 32457320) +보이저 2호 (revision 33693751) +지각 변동 (revision 33012720) +파시티 (revision 31528130) +구조선 (지질학) (revision 33777940) +목성 (revision 33548707) +세레스 (왜행성) (revision 33091631) +토성 (revision 33752942) +헬륨 (revision 33375151) +오존 (revision 31837936) +스마트폰 (revision 33787364) +소행성체 목록 (revision 33069588) +비타민 D (revision 33175819) +원기둥 (revision 31829483) +배럴 (revision 31831768) +카르포 (위성) (revision 31902259) +태양 (revision 33773396) +판 구조론 (revision 32759041) +초대륙 (revision 33160735) +SI 단위 (revision 33342933) +연조직염 (revision 32692657) +소행성대 (revision 33566259) +끓는점 (revision 31953974) +물음표 (revision 33489921) +천문학 (revision 33773377) +부가체 (revision 31597362) +반감기 (revision 32493640) +기하학 (revision 33534210) +행성 (revision 32920036) +CAS 등록번호 (revision 31941117) +아글라에아 (revision 31488425) +갤럭시 S7 (revision 32866373) +중간권 (맨틀) (revision 31916719) +검독수리 (revision 33376798) +원통셸 방법 (revision 33591050) +확산 (revision 33773728) +뤼드베리 상수 (revision 33344149) +메노이티오스 (revision 31432123) +열곡 (revision 31514444) +경제상호원조회의 (revision 33541819) +무선주파수 (revision 31597211) +천문 단위 (revision 32546139) +산소 중독 (revision 32046175) +시어도어 라이먼 (revision 31746101) +암석권 (revision 33357381) +카리테스 (revision 32276518) +대구 (어류) (revision 32173825) +겉넓이 (revision 33071538) +1974년 FIFA 월드컵 (revision 33673818) +상한 (revision 31906432) +영국 연방 (revision 33561227) +카시니-하위헌스 (revision 32255896) +천구 (revision 32583693) +멜포메네 (revision 31463039) +공전 (revision 33695308) +이아페투스 (위성) (revision 33591460) +비브코드 (revision 31833107) +침강 (revision 32572552) +소한 (revision 32385637) +트랙백 (revision 32041242) +6월 18일 (revision 33586591) +3 유노 (revision 32101944) +할로젠 원소 (revision 33652438) +궤도 공명 (revision 32018858) +소행성 위성 (revision 32861529) +국제 표준 도서 번호 (revision 32026251) +다름슈타튬 (revision 32226215) +국제 표준 일련 번호 (revision 32034676) +길이 (revision 33490317) +토성의 위성 (revision 33692928) +판도라 (위성) (revision 33623652) +경칩 (revision 32324464) +각지름 (revision 33034375) +에드워드 찰스 피커링 (revision 31742048) +해양 지각 (revision 33295589) +진료과 (revision 32814784) +1998년 (revision 33765643) +사전 (revision 33773642) +왜행성 (revision 32118633) +대혜성 (revision 31460596) +음펨바 효과 (revision 33379197) +성층권 (revision 31544595) +테크네튬 (revision 31970622) +숫자패드 (revision 33740748) +뤼드베리 공식 (revision 33082121) +질량순 태양계 천체 목록 (revision 33126420) +마이크로소프트 윈도우 (revision 33725131) +태양계 (revision 33751968) +사르페돈 2세 (revision 33486645) +에이커 (revision 33013987) +날씨 (revision 33400972) +내부 에너지 (revision 33647626) +아소포스 (revision 33686707) +육지 (revision 33362416) +파인트 (revision 31510407) +카카오 (기업) (revision 33785336) +코르딜레프스키 구름 (revision 32044761) +원거리 소행성체 (revision 31453490) +(202421) 2005 UQ513 (revision 31905372) +오사카시 (revision 33786700) +사투르누스 (revision 31918004) +남아프리카 공화국 (revision 33773387) +파시파에 (위성) (revision 32092430) == End of Parsed pages == -- Wikipedia parsing ended at: 2021-03-18 22:02:04.863862 +- Wikipedia parsing ended at: 2022-12-14 17:27:04.450901 -1048 characters appeared 222213 times. +1261 characters appeared 374868 times. Most Frequent characters: -[ 0] Char 이: 3.8089580717599785 % -[ 1] Char 다: 3.088028153168356 % -[ 2] Char 의: 2.860318703226184 % -[ 3] Char 성: 2.408049934072264 % -[ 4] Char 는: 2.3108459001048542 % -[ 5] Char 에: 2.287444928964552 % -[ 6] Char 로: 1.8288758983497817 % -[ 7] Char 은: 1.5381638337991026 % -[ 8] Char 하: 1.5287134416078267 % -[ 9] Char 가: 1.5017123210613241 % -[10] Char 지: 1.470211013757071 % -[11] Char 을: 1.3694068303834608 % -[12] Char 기: 1.2281009661900968 % -[13] Char 도: 1.188499322721893 % -[14] Char 고: 1.1835491172883674 % -[15] Char 한: 1.1565479967418648 % -[16] Char 위: 1.079144784508557 % -[17] Char 서: 1.0746445977508066 % -[18] Char 스: 0.9832908065684727 % -[19] Char 으: 0.9607898727797203 % -[20] Char 리: 0.9256884160692669 % -[21] Char 어: 0.8482852038359592 % -[22] Char 대: 0.8446850544297588 % -[23] Char 들: 0.8118336910981806 % -[24] Char 있: 0.7902327946609784 % -[25] Char 사: 0.7560313753020751 % -[26] Char 를: 0.7555813566263 % -[27] Char 과: 0.7078793769941453 % -[28] Char 아: 0.6979789661270943 % -[29] Char 되: 0.6610774347135406 % -[30] Char 전: 0.6480268931160643 % -[31] Char 일: 0.6475768744402892 % -[32] Char 년: 0.6187756791906864 % -[33] Char 시: 0.6079752309720853 % -[34] Char 인: 0.5908745212926336 % -[35] Char 그: 0.5872743718864333 % -[36] Char 해: 0.5872743718864333 % -[37] Char 목: 0.5661234941250062 % -[38] Char 수: 0.5656734754492312 % -[39] Char 라: 0.5616233073672557 % -[40] Char 적: 0.5449726163635791 % -[41] Char 나: 0.5382223362269534 % -[42] Char 구: 0.533272130793428 % -[43] Char 주: 0.5251717946294772 % -[44] Char 자: 0.523821738602152 % -[45] Char 정: 0.5080710849500254 % -[46] Char 부: 0.49367048732522395 % -[47] Char 었: 0.4792698897004226 % -[48] Char 소: 0.44236835828686893 % -[49] Char 보: 0.43606809682601827 % -[50] Char 우: 0.43291796609559297 % -[51] Char 레: 0.432467947419818 % -[52] Char 발: 0.4243676112558671 % -[53] Char 행: 0.4243676112558671 % -[54] Char 여: 0.4054668268733152 % -[55] Char 며: 0.4050168081975402 % -[56] Char 와: 0.4036667521702151 % -[57] Char 면: 0.39556641600626424 % -[58] Char 명: 0.3802657810299127 % -[59] Char 된: 0.37801568765103755 % -[60] Char 화: 0.3775656689752625 % -[61] Char 만: 0.36721523943243645 % -[62] Char 중: 0.3667652207566614 % -[63] Char 상: 0.35146458578030987 % -[64] Char 공: 0.3478644363741095 % -[65] Char 것: 0.34516432431945926 % -[66] Char 오: 0.342464212264809 % -[67] Char 원: 0.3366139694797334 % -[68] Char 궤: 0.33391385742508317 % -[69] Char 마: 0.32941367066733274 % -[70] Char 문: 0.3285136333157826 % -[71] Char 계: 0.32581352126113233 % -[72] Char 유: 0.32176335317915694 % -[73] Char 제: 0.3190632411245067 % -[74] Char 영: 0.31771318509718155 % -[75] Char 장: 0.31546309171830633 % -[76] Char 양: 0.3064627182028054 % -[77] Char 국: 0.3060126995270304 % -[78] Char 게: 0.3051126621754803 % -[79] Char 관: 0.3042126248239302 % -[80] Char 니: 0.3033125874723801 % -[81] Char 였: 0.2992624193904047 % -[82] Char 데: 0.2965623073357544 % -[83] Char 경: 0.29431221395687923 % -[84] Char 체: 0.29296215792955405 % -[85] Char 분: 0.292512139253779 % -[86] Char 동: 0.29161210190222897 % -[87] Char 반: 0.28081165368362787 % -[88] Char 름: 0.27721150427742747 % -[89] Char 신: 0.27721150427742747 % -[90] Char 진: 0.27046122414080187 % -[91] Char 비: 0.2695611867892517 % -[92] Char 세: 0.2664110560588265 % -[93] Char 갈: 0.2659610373830514 % -[94] Char 태: 0.2632609253284011 % -[95] Char 음: 0.2610108319495259 % -[96] Char 했: 0.2556106078402254 % -[97] Char 월: 0.2556106078402254 % -[98] Char 메: 0.25201045843402503 % -[99] Char 작: 0.25111042108247494 % -[100] Char 형: 0.24931034637937474 % -[101] Char 견: 0.24841030902782463 % -[102] Char 릴: 0.24796029035204964 % -[103] Char 모: 0.24661023432472448 % -[104] Char 학: 0.24526017829739932 % -[105] Char 선: 0.24481015962162428 % -[106] Char 용: 0.2439101222700742 % -[107] Char 러: 0.24211004756697405 % -[108] Char 각: 0.241660028891199 % -[109] Char 천: 0.2385098981607737 % -[110] Char 크: 0.2358097861061234 % -[111] Char 미: 0.23535976743034837 % -[112] Char 연: 0.23310967405147315 % -[113] Char 점: 0.2308595806725979 % -[114] Char 트: 0.22815946861794767 % -[115] Char 토: 0.22590937523907242 % -[116] Char 물: 0.22590937523907242 % -[117] Char 역: 0.22590937523907242 % -[118] Char 때: 0.2236592818601972 % -[119] Char 거: 0.22230922583287208 % -[120] Char 표: 0.22095916980554692 % -[121] Char 간: 0.22005913245399683 % -[122] Char 터: 0.21915909510244677 % -[123] Char 돌: 0.2178090390751216 % -[124] Char 질: 0.21735902039934657 % -[125] Char 르: 0.21510892702047132 % -[126] Char 파: 0.21420888966892127 % -[127] Char 재: 0.21375887099314622 % -[128] Char 운: 0.2124088149658211 % -[129] Char 개: 0.21015872158694585 % -[130] Char 군: 0.2043084788018703 % -[131] Char 바: 0.20025831071989486 % -[132] Char 까: 0.19890825469256976 % -[133] Char 산: 0.19215797455594408 % -[134] Char 측: 0.191707955880169 % -[135] Char 속: 0.191707955880169 % -[136] Char 않: 0.191257937204394 % -[137] Char 달: 0.1845076570677683 % -[138] Char 조: 0.18180754501311805 % -[139] Char 충: 0.181357526337343 % -[140] Char 추: 0.17820739560691767 % -[141] Char 두: 0.17820739560691767 % -[142] Char 드: 0.1764073209038175 % -[143] Char 또: 0.17460724620071733 % -[144] Char 생: 0.17370720884916724 % -[145] Char 통: 0.17055707811874193 % -[146] Char 왕: 0.16875700341564176 % -[147] Char 방: 0.16695692871254159 % -[148] Char 함: 0.16470683533366637 % -[149] Char 안: 0.1633567793063412 % -[150] Char 합: 0.15930661122436582 % -[151] Char 불: 0.15930661122436582 % -[152] Char 히: 0.1584065738728157 % -[153] Char 내: 0.15660649916971553 % -[154] Char 후: 0.15570646181816544 % -[155] Char 타: 0.1530063497635152 % -[156] Char 약: 0.1521063124119651 % -[157] Char 른: 0.151206275060415 % -[158] Char 할: 0.151206275060415 % -[159] Char 호: 0.15075625638463996 % -[160] Char 네: 0.15075625638463996 % -[161] Char 포: 0.1489561816815398 % -[162] Char 심: 0.1480561443299897 % -[163] Char 탐: 0.14760612565421466 % -[164] Char 같: 0.14580605095111449 % -[165] Char 등: 0.14580605095111449 % -[166] Char 번: 0.14535603227533944 % -[167] Char 량: 0.14445599492378933 % -[168] Char 더: 0.14400597624801428 % -[169] Char 현: 0.14310593889646422 % -[170] Char 치: 0.13950578949026385 % -[171] Char 외: 0.1390557708144888 % -[172] Char 력: 0.13770571478716367 % -[173] Char 칼: 0.1359056400840635 % -[174] Char 따: 0.13275550935363817 % -[175] Char 금: 0.13185547200208808 % -[176] Char 실: 0.12645524789278756 % -[177] Char 카: 0.12645524789278756 % -[178] Char 차: 0.12510519186546243 % -[179] Char 단: 0.12240507981081214 % -[180] Char 프: 0.12240507981081214 % -[181] Char 테: 0.1219550611350371 % -[182] Char 규: 0.12150504245926207 % -[183] Char 결: 0.12150504245926207 % -[184] Char 독: 0.1179048930530617 % -[185] Char 존: 0.11610481834996153 % -[186] Char 변: 0.11475476232263639 % -[187] Char 매: 0.11475476232263639 % -[188] Char 루: 0.11430474364686136 % -[189] Char 무: 0.11115461291643604 % -[190] Char 석: 0.11070459424066098 % -[191] Char 져: 0.11070459424066098 % -[192] Char 근: 0.10935453821333586 % -[193] Char 초: 0.10755446351023566 % -[194] Char 향: 0.10665442615868559 % -[195] Char 설: 0.10485435145558542 % -[196] Char 째: 0.10395431410403531 % -[197] Char 본: 0.10395431410403531 % -[198] Char 직: 0.09765405264318469 % -[199] Char 았: 0.09720403396740965 % -[200] Char 말: 0.09720403396740965 % -[201] Char 식: 0.0967540152916346 % -[202] Char 많: 0.09630399661585956 % -[203] Char 저: 0.09630399661585956 % -[204] Char 요: 0.0958539779400845 % -[205] Char 교: 0.0958539779400845 % -[206] Char 련: 0.09540395926430947 % -[207] Char 회: 0.09540395926430947 % -[208] Char 알: 0.09135379118233407 % -[209] Char 려: 0.09045375383078398 % -[210] Char 배: 0.09045375383078398 % -[211] Char 열: 0.0877536417761337 % -[212] Char 칙: 0.08640358574880858 % -[213] Char 티: 0.08640358574880858 % -[214] Char 온: 0.08595356707303353 % -[215] Char 처: 0.08550354839725849 % -[216] Char 당: 0.08505352972148343 % -[217] Char 색: 0.08505352972148343 % -[218] Char 큰: 0.0846035110457084 % -[219] Char 임: 0.08415349236993334 % -[220] Char 예: 0.08325345501838326 % -[221] Char 권: 0.08280343634260821 % -[222] Char 순: 0.08280343634260821 % -[223] Char 없: 0.08190339899105813 % -[224] Char 던: 0.08010332428795795 % -[225] Char 케: 0.07965330561218291 % -[226] Char 항: 0.07920328693640785 % -[227] Char 최: 0.07875326826063282 % -[228] Char 강: 0.07830324958485776 % -[229] Char 률: 0.07830324958485776 % -[230] Char 망: 0.07830324958485776 % -[231] Char 론: 0.0765031748817576 % -[232] Char 쪽: 0.0756031375302075 % -[233] Char 붙: 0.07470310017865742 % -[234] Char 평: 0.07470310017865742 % -[235] Char 확: 0.07425308150288236 % -[236] Char 얼: 0.07335304415133227 % -[237] Char 래: 0.07290302547555724 % -[238] Char 밝: 0.07110295077245705 % -[239] Char 류: 0.07020291342090697 % -[240] Char 준: 0.06975289474513192 % -[241] Char 노: 0.06885285739358184 % -[242] Char 능: 0.06840283871780678 % -[243] Char 록: 0.06795282004203175 % -[244] Char 퍼: 0.06660276401470662 % -[245] Char 졌: 0.06615274533893156 % -[246] Char 받: 0.06615274533893156 % -[247] Char 획: 0.06615274533893156 % -[248] Char 키: 0.06390265196005634 % -[249] Char 밀: 0.06390265196005634 % -[250] Char 겨: 0.06390265196005634 % -[251] Char 민: 0.06300261460850626 % -[252] Char 플: 0.06255259593273121 % -[253] Char 피: 0.06120253990540607 % -[254] Char 탄: 0.06075252122963103 % -[255] Char 증: 0.05985248387808094 % -[256] Char 암: 0.059402465202305896 % -[257] Char 남: 0.05895244652653085 % -[258] Char 별: 0.05895244652653085 % -[259] Char 층: 0.0585024278507558 % -[260] Char 슘: 0.0585024278507558 % -[261] Char 베: 0.0585024278507558 % -[262] Char 쟁: 0.0585024278507558 % -[263] Char 출: 0.058052409174980765 % -[264] Char 축: 0.058052409174980765 % -[265] Char 특: 0.058052409174980765 % -[266] Char 압: 0.058052409174980765 % -[267] Char 극: 0.058052409174980765 % -[268] Char 높: 0.057602390499205715 % -[269] Char 디: 0.057602390499205715 % -[270] Char 코: 0.057602390499205715 % -[271] Char 철: 0.05715237182343068 % -[272] Char 종: 0.05715237182343068 % -[273] Char 란: 0.05670235314765563 % -[274] Char 탈: 0.05625233447188058 % -[275] Char 격: 0.05535229712033049 % -[276] Char 입: 0.05490227844455545 % -[277] Char 접: 0.0544522597687804 % -[278] Char 법: 0.0544522597687804 % -[279] Char 령: 0.05400224109300536 % -[280] Char 및: 0.053552222417230316 % -[281] Char 야: 0.05265218506568022 % -[282] Char 광: 0.052202166389905186 % -[283] Char 난: 0.051752147714130135 % -[284] Char 감: 0.051302129038355085 % -[285] Char 판: 0.05085211036258005 % -[286] Char 승: 0.05085211036258005 % -[287] Char 새: 0.05085211036258005 % -[288] Char 랑: 0.04995207301102996 % -[289] Char 급: 0.048602016983704824 % -[290] Char 건: 0.04815199830792978 % -[291] Char 페: 0.04815199830792978 % -[292] Char 투: 0.04725196095637969 % -[293] Char 립: 0.04680194228060464 % -[294] Char 폭: 0.0463519236048296 % -[295] Char 황: 0.045901904929054556 % -[296] Char 머: 0.045901904929054556 % -[297] Char 버: 0.04545188625327951 % -[298] Char 핵: 0.04500186757750446 % -[299] Char 든: 0.04500186757750446 % -[300] Char 틀: 0.044101830225954375 % -[301] Char 몇: 0.04320179287440429 % -[302] Char 날: 0.0423017555228542 % -[303] Char 족: 0.04185173684707916 % -[304] Char 럽: 0.04185173684707916 % -[305] Char 언: 0.04185173684707916 % -[306] Char 칭: 0.04185173684707916 % -[307] Char 풍: 0.04140171817130411 % -[308] Char 착: 0.04140171817130411 % -[309] Char 럼: 0.04140171817130411 % -[310] Char 균: 0.04140171817130411 % -[311] Char 복: 0.04140171817130411 % -[312] Char 집: 0.04140171817130411 % -[313] Char 너: 0.04095169949552906 % -[314] Char 움: 0.04095169949552906 % -[315] Char 낮: 0.04050168081975402 % -[316] Char 린: 0.04050168081975402 % -[317] Char 람: 0.04050168081975402 % -[318] Char 술: 0.040051662143978976 % -[319] Char 허: 0.040051662143978976 % -[320] Char 슷: 0.039601643468203926 % -[321] Char 응: 0.0382515874408788 % -[322] Char 찰: 0.03780156876510375 % -[323] Char 클: 0.03780156876510375 % -[324] Char 될: 0.03735155008932871 % -[325] Char 백: 0.03645151273777862 % -[326] Char 뉴: 0.03465143803467843 % -[327] Char 완: 0.03420141935890339 % -[328] Char 료: 0.033751400683128346 % -[329] Char 쓰: 0.033751400683128346 % -[330] Char 긴: 0.03330138200735331 % -[331] Char 편: 0.03330138200735331 % -[332] Char 떨: 0.03285136333157826 % -[333] Char 맨: 0.03285136333157826 % -[334] Char 첫: 0.03285136333157826 % -[335] Char 침: 0.03285136333157826 % -[336] Char 폴: 0.032401344655803216 % -[337] Char 왜: 0.032401344655803216 % -[338] Char 활: 0.03195132598002817 % -[339] Char 험: 0.03195132598002817 % -[340] Char 율: 0.03150130730425313 % -[341] Char 멘: 0.03150130730425313 % -[342] Char 습: 0.03150130730425313 % -[343] Char 늘: 0.03105128862847808 % -[344] Char 얻: 0.030601269952703035 % -[345] Char 환: 0.03015125127692799 % -[346] Char 울: 0.03015125127692799 % -[347] Char 깥: 0.03015125127692799 % -[348] Char 곳: 0.03015125127692799 % -[349] Char 북: 0.03015125127692799 % -[350] Char 왔: 0.029701232601152948 % -[351] Char 났: 0.029701232601152948 % -[352] Char 맹: 0.029701232601152948 % -[353] Char 염: 0.0292512139253779 % -[354] Char 먼: 0.0292512139253779 % -[355] Char 느: 0.0292512139253779 % -[356] Char 혜: 0.0292512139253779 % -[357] Char 킬: 0.0292512139253779 % -[358] Char 절: 0.0292512139253779 % -[359] Char 볼: 0.0292512139253779 % -[360] Char 줄: 0.0292512139253779 % -[361] Char 헤: 0.0292512139253779 % -[362] Char 필: 0.028351176573827814 % -[363] Char 센: 0.02790115789805277 % -[364] Char 값: 0.026551101870727636 % -[365] Char 품: 0.026551101870727636 % -[366] Char 참: 0.026101083194952593 % -[367] Char 륙: 0.026101083194952593 % -[368] Char 잡: 0.025651064519177542 % -[369] Char 링: 0.025651064519177542 % -[370] Char 께: 0.0252010458434025 % -[371] Char 킨: 0.0252010458434025 % -[372] Char 흔: 0.024301008491852412 % -[373] Char 몬: 0.02385098981607737 % -[374] Char 못: 0.02385098981607737 % -[375] Char 촬: 0.02385098981607737 % -[376] Char 막: 0.02385098981607737 % -[377] Char 쳐: 0.02385098981607737 % -[378] Char 찾: 0.02340097114030232 % -[379] Char 웨: 0.02340097114030232 % -[380] Char 슬: 0.022950952464527278 % -[381] Char 둘: 0.022950952464527278 % -[382] Char 징: 0.022950952464527278 % -[383] Char 례: 0.022950952464527278 % -[384] Char 올: 0.022950952464527278 % -[385] Char 살: 0.022950952464527278 % -[386] Char 즌: 0.02250093378875223 % -[387] Char 브: 0.02250093378875223 % -[388] Char 션: 0.02250093378875223 % -[389] Char 즈: 0.021600896437202144 % -[390] Char 런: 0.021600896437202144 % -[391] Char 쿠: 0.021600896437202144 % -[392] Char 헌: 0.0211508777614271 % -[393] Char 곱: 0.0211508777614271 % -[394] Char 웅: 0.0211508777614271 % -[395] Char 헬: 0.0211508777614271 % -[396] Char 밖: 0.020700859085652053 % -[397] Char 멀: 0.020700859085652053 % -[398] Char 혀: 0.020700859085652053 % -[399] Char 빠: 0.020700859085652053 % -[400] Char 범: 0.020700859085652053 % -[401] Char 므: 0.020700859085652053 % -[402] Char 힘: 0.020700859085652053 % -[403] Char 넘: 0.02025084040987701 % -[404] Char 워: 0.02025084040987701 % -[405] Char 커: 0.02025084040987701 % -[406] Char 팀: 0.02025084040987701 % -[407] Char 뮬: 0.02025084040987701 % -[408] Char 냈: 0.019800821734101963 % -[409] Char 총: 0.019800821734101963 % -[410] Char 손: 0.019800821734101963 % -[411] Char 갖: 0.019800821734101963 % -[412] Char 빛: 0.01935080305832692 % -[413] Char 액: 0.01935080305832692 % -[414] Char 창: 0.01935080305832692 % -[415] Char 논: 0.01935080305832692 % -[416] Char 낸: 0.018900784382551876 % -[417] Char 즉: 0.018900784382551876 % -[418] Char 억: 0.018900784382551876 % -[419] Char 청: 0.018900784382551876 % -[420] Char 혹: 0.018450765706776832 % -[421] Char 블: 0.018450765706776832 % -[422] Char 책: 0.018450765706776832 % -[423] Char 찬: 0.018450765706776832 % -[424] Char 곡: 0.018000747031001785 % -[425] Char 누: 0.018000747031001785 % -[426] Char 패: 0.018000747031001785 % -[427] Char 잘: 0.018000747031001785 % -[428] Char 림: 0.017550728355226742 % -[429] Char 검: 0.017550728355226742 % -[430] Char 채: 0.017550728355226742 % -[431] Char 녹: 0.017100709679451695 % -[432] Char 괴: 0.017100709679451695 % -[433] Char 십: 0.017100709679451695 % -[434] Char 글: 0.017100709679451695 % -[435] Char 빨: 0.017100709679451695 % -[436] Char 융: 0.016650691003676655 % -[437] Char 렸: 0.016650691003676655 % -[438] Char 길: 0.016650691003676655 % -[439] Char 삼: 0.016650691003676655 % -[440] Char 협: 0.016650691003676655 % -[441] Char 잃: 0.016650691003676655 % -[442] Char 병: 0.016650691003676655 % -[443] Char 옅: 0.016200672327901608 % -[444] Char 념: 0.015750653652126564 % -[445] Char 뜻: 0.015750653652126564 % -[446] Char 켜: 0.015300634976351517 % -[447] Char 걸: 0.015300634976351517 % -[448] Char 효: 0.015300634976351517 % -[449] Char 육: 0.015300634976351517 % -[450] Char 벨: 0.015300634976351517 % -[451] Char 업: 0.015300634976351517 % -[452] Char 숫: 0.014850616300576474 % -[453] Char 틴: 0.014850616300576474 % -[454] Char 잔: 0.014850616300576474 % -[455] Char 뒤: 0.014850616300576474 % -[456] Char 벽: 0.014400597624801429 % -[457] Char 벌: 0.014400597624801429 % -[458] Char 짧: 0.014400597624801429 % -[459] Char 륨: 0.014400597624801429 % -[460] Char 친: 0.013950578949026385 % -[461] Char 섭: 0.01350056027325134 % -[462] Char 톤: 0.01350056027325134 % -[463] Char 끌: 0.01350056027325134 % -[464] Char 애: 0.013050541597476296 % -[465] Char 눈: 0.013050541597476296 % -[466] Char 담: 0.013050541597476296 % -[467] Char 캐: 0.013050541597476296 % -[468] Char 끝: 0.013050541597476296 % -[469] Char 턴: 0.01260052292170125 % -[470] Char 희: 0.01260052292170125 % -[471] Char 략: 0.01260052292170125 % -[472] Char 떤: 0.01260052292170125 % -[473] Char 깊: 0.012150504245926206 % -[474] Char 켰: 0.012150504245926206 % -[475] Char 렇: 0.012150504245926206 % -[476] Char 흡: 0.012150504245926206 % -[477] Char 겼: 0.012150504245926206 % -[478] Char 슈: 0.012150504245926206 % -[479] Char 빈: 0.012150504245926206 % -[480] Char 곽: 0.012150504245926206 % -[481] Char 앙: 0.012150504245926206 % -[482] Char 악: 0.012150504245926206 % -[483] Char 택: 0.012150504245926206 % -[484] Char 취: 0.012150504245926206 % -[485] Char 늄: 0.012150504245926206 % -[486] Char 찌: 0.01170048557015116 % -[487] Char 박: 0.01170048557015116 % -[488] Char 맞: 0.01170048557015116 % -[489] Char 앞: 0.01170048557015116 % -[490] Char 톨: 0.01170048557015116 % -[491] Char 렵: 0.01170048557015116 % -[492] Char 덮: 0.011250466894376115 % -[493] Char 펙: 0.011250466894376115 % -[494] Char 묘: 0.011250466894376115 % -[495] Char 쌍: 0.011250466894376115 % -[496] Char 덕: 0.010800448218601072 % -[497] Char 켈: 0.010800448218601072 % -[498] Char 엔: 0.010800448218601072 % -[499] Char 델: 0.010800448218601072 % -[500] Char 핀: 0.010800448218601072 % -[501] Char 힌: 0.010800448218601072 % -[502] Char 섬: 0.010800448218601072 % -[503] Char 씨: 0.010800448218601072 % -[504] Char 퇴: 0.010350429542826027 % -[505] Char 렌: 0.010350429542826027 % -[506] Char 웹: 0.010350429542826027 % -[507] Char 텐: 0.010350429542826027 % -[508] Char 섯: 0.010350429542826027 % -[509] Char 흑: 0.010350429542826027 % -[510] Char 큼: 0.009900410867050981 % -[511] Char 혼: 0.009900410867050981 % -[512] Char 써: 0.009900410867050981 % -[513] Char 슨: 0.009900410867050981 % -[514] Char 송: 0.009900410867050981 % -[515] Char 좌: 0.009900410867050981 % -[516] Char 덜: 0.009900410867050981 % -[517] Char 뀌: 0.009900410867050981 % -[518] Char 엘: 0.009900410867050981 % -[519] Char 텔: 0.009900410867050981 % -[520] Char 쪼: 0.009450392191275938 % -[521] Char 락: 0.009450392191275938 % -[522] Char 겉: 0.009450392191275938 % -[523] Char 렀: 0.009450392191275938 % -[524] Char 욕: 0.009450392191275938 % -[525] Char 힐: 0.009450392191275938 % -[526] Char 떠: 0.009000373515500893 % -[527] Char 널: 0.009000373515500893 % -[528] Char 콘: 0.009000373515500893 % -[529] Char 램: 0.009000373515500893 % -[530] Char 엄: 0.009000373515500893 % -[531] Char 룬: 0.009000373515500893 % -[532] Char 딸: 0.009000373515500893 % -[533] Char 벼: 0.009000373515500893 % -[534] Char 윙: 0.009000373515500893 % -[535] Char 휘: 0.008550354839725847 % -[536] Char 밤: 0.008550354839725847 % -[537] Char 뿐: 0.008550354839725847 % -[538] Char 곧: 0.008550354839725847 % -[539] Char 훨: 0.008550354839725847 % -[540] Char 씬: 0.008550354839725847 % -[541] Char 큐: 0.008550354839725847 % -[542] Char 숭: 0.008550354839725847 % -[543] Char 띄: 0.008100336163950804 % -[544] Char 닌: 0.008100336163950804 % -[545] Char 깝: 0.008100336163950804 % -[546] Char 흐: 0.008100336163950804 % -[547] Char 웠: 0.008100336163950804 % -[548] Char 롯: 0.008100336163950804 % -[549] Char 뜨: 0.008100336163950804 % -[550] Char 죽: 0.008100336163950804 % -[551] Char 즘: 0.008100336163950804 % -[552] Char 닉: 0.008100336163950804 % -[553] Char 붕: 0.007650317488175759 % -[554] Char 욱: 0.007650317488175759 % -[555] Char 끼: 0.007650317488175759 % -[556] Char 익: 0.007650317488175759 % -[557] Char 옛: 0.007650317488175759 % -[558] Char 붉: 0.007650317488175759 % -[559] Char 칠: 0.007650317488175759 % -[560] Char 웰: 0.007650317488175759 % -[561] Char 컸: 0.007650317488175759 % -[562] Char 씩: 0.007650317488175759 % -[563] Char 낙: 0.007650317488175759 % -[564] Char 녀: 0.007650317488175759 % -[565] Char 얇: 0.007650317488175759 % -[566] Char 싸: 0.007200298812400714 % -[567] Char 꺼: 0.007200298812400714 % -[568] Char 찍: 0.007200298812400714 % -[569] Char 랜: 0.007200298812400714 % -[570] Char 골: 0.007200298812400714 % -[571] Char 옹: 0.007200298812400714 % -[572] Char 빌: 0.007200298812400714 % -[573] Char 칸: 0.007200298812400714 % +[ 0] Char 이: 3.7802639862564957 % +[ 1] Char 다: 3.0295997524461944 % +[ 2] Char 의: 2.7737763692819875 % +[ 3] Char 는: 2.4787391828590333 % +[ 4] Char 에: 2.3573631251533875 % +[ 5] Char 로: 1.798232978008259 % +[ 6] Char 하: 1.5922938207582404 % +[ 7] Char 지: 1.559749031659144 % +[ 8] Char 성: 1.4546453685030465 % +[ 9] Char 은: 1.4471760726442375 % +[10] Char 가: 1.4266355090325127 % +[11] Char 을: 1.2873864933790027 % +[12] Char 한: 1.2713808593958407 % +[13] Char 기: 1.2145608587556154 % +[14] Char 서: 1.1649433934078128 % +[15] Char 고: 1.074511561402947 % +[16] Char 도: 1.0558383217559248 % +[17] Char 으: 1.0128898705677731 % +[18] Char 리: 0.940864517643544 % +[19] Char 대: 0.9024509960839548 % +[20] Char 어: 0.8744411366134212 % +[21] Char 있: 0.872840573215105 % +[22] Char 스: 0.8400290235496228 % +[23] Char 를: 0.810952121813545 % +[24] Char 사: 0.7365259237918415 % +[25] Char 수: 0.734125078694367 % +[26] Char 일: 0.725588740570014 % +[27] Char 인: 0.6959783177011641 % +[28] Char 들: 0.6783721203196859 % +[29] Char 과: 0.671969866726421 % +[30] Char 아: 0.6511625425483104 % +[31] Char 구: 0.6436932466895013 % +[32] Char 나: 0.6314222606357438 % +[33] Char 해: 0.6242197253433208 % +[34] Char 시: 0.6186177534492141 % +[35] Char 자: 0.6164836689181259 % +[36] Char 라: 0.609547894192089 % +[37] Char 부: 0.6002112743685778 % +[38] Char 년: 0.5927419785097688 % +[39] Char 되: 0.5892740911467503 % +[40] Char 정: 0.5719346543316581 % +[41] Char 전: 0.5655324007383933 % +[42] Char 위: 0.5569960626140401 % +[43] Char 적: 0.5132473297267305 % +[44] Char 주: 0.49670817461079636 % +[45] Char 상: 0.4887053576192153 % +[46] Char 소: 0.47296648420243925 % +[47] Char 면: 0.44975831492685425 % +[48] Char 양: 0.43882113170502685 % +[49] Char 그: 0.4366870471739386 % +[50] Char 우: 0.4364202866075525 % +[51] Char 계: 0.43081831471344584 % +[52] Char 체: 0.4182805680933022 % +[53] Char 보: 0.4057428214731586 % +[54] Char 국: 0.3958726805168753 % +[55] Char 행: 0.3942721171185591 % +[56] Char 태: 0.39107099032192666 % +[57] Char 여: 0.38386845502950373 % +[58] Char 와: 0.3822678916311875 % +[59] Char 화: 0.3790667648345551 % +[60] Char 만: 0.37453183520599254 % +[61] Char 분: 0.37266451124129024 % +[62] Char 원: 0.3699969055774299 % +[63] Char 었: 0.3694633844446578 % +[64] Char 된: 0.36599549708163936 % +[65] Char 중: 0.3654619759488673 % +[66] Char 것: 0.3643949336833232 % +[67] Char 제: 0.3545247927270399 % +[68] Char 공: 0.3478557785673891 % +[69] Char 문: 0.3462552151690728 % +[70] Char 게: 0.34278732780605436 % +[71] Char 며: 0.33931944044303597 % +[72] Char 유: 0.33931944044303597 % +[73] Char 동: 0.3326504262833851 % +[74] Char 장: 0.3179785951321532 % +[75] Char 발: 0.3147774683355208 % +[76] Char 선: 0.31024253870695817 % +[77] Char 트: 0.3049073273792375 % +[78] Char 물: 0.3038402851136933 % +[79] Char 월: 0.3025064822817632 % +[80] Char 천: 0.3003723977506749 % +[81] Char 오: 0.3001056371842889 % +[82] Char 거: 0.2926363413254799 % +[83] Char 학: 0.29236958075909386 % +[84] Char 마: 0.2915692990599357 % +[85] Char 진: 0.2913025384935497 % +[86] Char 용: 0.2905022567943916 % +[87] Char 르: 0.28276620036919664 % +[88] Char 비: 0.2808988764044944 % +[89] Char 미: 0.28036535527172235 % +[90] Char 단: 0.277964510174248 % +[91] Char 질: 0.27049521431543905 % +[92] Char 형: 0.26756084808519265 % +[93] Char 산: 0.25929127052722556 % +[94] Char 반: 0.25822422826168145 % +[95] Char 토: 0.2539560591995049 % +[96] Char 때: 0.25208873523480263 % +[97] Char 카: 0.25128845353564455 % +[98] Char 경: 0.2486208478717842 % +[99] Char 명: 0.24675352390708194 % +[100] Char 모: 0.24541972107515178 % +[101] Char 데: 0.24115155201297522 % +[102] Char 영: 0.2408847914465892 % +[103] Char 음: 0.24061803088020317 % +[104] Char 터: 0.2360831012516406 % +[105] Char 세: 0.23554958011886853 % +[106] Char 간: 0.2339490167205523 % +[107] Char 각: 0.23314873502139422 % +[108] Char 크: 0.23288197445500816 % +[109] Char 러: 0.23021436879114782 % +[110] Char 생: 0.22781352369367353 % +[111] Char 연: 0.22621296029535728 % +[112] Char 운: 0.22487915746342713 % +[113] Char 관: 0.2246123968970411 % +[114] Char 방: 0.22434563633065507 % +[115] Char 표: 0.22407887576426905 % +[116] Char 조: 0.21927718556932038 % +[117] Char 였: 0.21394197424159972 % +[118] Char 개: 0.2091402840466511 % +[119] Char 재: 0.20407183328531642 % +[120] Char 니: 0.200870706488684 % +[121] Char 내: 0.200870706488684 % +[122] Char 했: 0.19980366422313986 % +[123] Char 할: 0.19900338252398178 % +[124] Char 파: 0.19873662195759573 % +[125] Char 드: 0.1984698613912097 % +[126] Char 름: 0.19740281912566554 % +[127] Char 역: 0.1966025374265075 % +[128] Char 신: 0.1936681711962611 % +[129] Char 타: 0.19286788949710298 % +[130] Char 바: 0.19046704439962867 % +[131] Char 목: 0.18806619930215435 % +[132] Char 프: 0.18593211477106608 % +[133] Char 작: 0.1845983119391359 % +[134] Char 치: 0.18379803023997782 % +[135] Char 식: 0.17952986117780123 % +[136] Char 독: 0.17899634004502918 % +[137] Char 두: 0.17872957947864315 % +[138] Char 등: 0.17552845268201073 % +[139] Char 력: 0.1744614104164666 % +[140] Char 점: 0.17419464985008057 % +[141] Char 레: 0.17339436815092246 % +[142] Char 통: 0.17339436815092246 % +[143] Char 약: 0.16725887512404367 % +[144] Char 외: 0.1659250722921135 % +[145] Char 속: 0.16565831172572745 % +[146] Char 안: 0.16485803002656935 % +[147] Char 같: 0.16352422719463916 % +[148] Char 포: 0.16325746662825313 % +[149] Char 현: 0.16325746662825313 % +[150] Char 측: 0.16272394549548108 % +[151] Char 층: 0.16245718492909506 % +[152] Char 른: 0.15925605813246263 % +[153] Char 달: 0.15845577643330452 % +[154] Char 호: 0.15792225530053247 % +[155] Char 량: 0.15765549473414642 % +[156] Char 차: 0.1573887341677604 % +[157] Char 권: 0.15258704397281175 % +[158] Char 따: 0.14885239604340728 % +[159] Char 또: 0.14751859321147712 % +[160] Char 무: 0.14725183264509106 % +[161] Char 까: 0.14645155094593298 % +[162] Char 민: 0.14485098754761677 % +[163] Char 않: 0.1443174664148447 % +[164] Char 루: 0.13791521282157987 % +[165] Char 변: 0.1360478888568776 % +[166] Char 온: 0.13444732545856142 % +[167] Char 후: 0.13418056489217536 % +[168] Char 히: 0.1336470437594033 % +[169] Char 궤: 0.13338028319301728 % +[170] Char 존: 0.13097943809554297 % +[171] Char 피: 0.13071267752915694 % +[172] Char 견: 0.12831183243168262 % +[173] Char 회: 0.12831183243168262 % +[174] Char 돌: 0.12644450846698038 % +[175] Char 심: 0.12537746620143625 % +[176] Char 출: 0.12457718450227813 % +[177] Char 함: 0.12404366336950608 % +[178] Char 합: 0.12217633940480382 % +[179] Char 테: 0.12110929713925968 % +[180] Char 류: 0.12110929713925968 % +[181] Char 열: 0.12084253657287365 % +[182] Char 직: 0.11897521260817141 % +[183] Char 왕: 0.11790817034262728 % +[184] Char 불: 0.1173746492098552 % +[185] Char 당: 0.11710788864346916 % +[186] Char 충: 0.11524056467876692 % +[187] Char 결: 0.11364000128045071 % +[188] Char 더: 0.11364000128045071 % +[189] Char 요: 0.11337324071406468 % +[190] Char 알: 0.11150591674936244 % +[191] Char 번: 0.1112391561829764 % +[192] Char 설: 0.1112391561829764 % +[193] Char 초: 0.10857155051911607 % +[194] Char 실: 0.10750450825357193 % +[195] Char 별: 0.10670422655441382 % +[196] Char 법: 0.10617070542164174 % +[197] Char 종: 0.10483690258971158 % +[198] Char 래: 0.10376986032416743 % +[199] Char 근: 0.10350309975778141 % +[200] Char 추: 0.10163577579307916 % +[201] Char 암: 0.10136901522669313 % +[202] Char 남: 0.10030197296114898 % +[203] Char 판: 0.10003521239476297 % +[204] Char 려: 0.09950169126199089 % +[205] Char 교: 0.09950169126199089 % +[206] Char 석: 0.09923493069560485 % +[207] Char 향: 0.09843464899644676 % +[208] Char 저: 0.09763436729728865 % +[209] Char 많: 0.09710084616451657 % +[210] Char 평: 0.09683408559813055 % +[211] Char 처: 0.09603380389897244 % +[212] Char 메: 0.09550028276620036 % +[213] Char 확: 0.09523352219981433 % +[214] Char 광: 0.0949667616334283 % +[215] Char 최: 0.0949667616334283 % +[216] Char 입: 0.0941664799342702 % +[217] Char 져: 0.09256591653595399 % +[218] Char 론: 0.09256591653595399 % +[219] Char 매: 0.09229915596956795 % +[220] Char 키: 0.09096535313763779 % +[221] Char 배: 0.09069859257125175 % +[222] Char 예: 0.08963155030570762 % +[223] Char 항: 0.08909802917293554 % +[224] Char 탐: 0.08883126860654951 % +[225] Char 야: 0.0880309869073914 % +[226] Char 말: 0.08616366294268916 % +[227] Char 노: 0.08589690237630312 % +[228] Char 네: 0.08563014180991708 % +[229] Char 본: 0.08536338124353106 % +[230] Char 금: 0.08482986011075899 % +[231] Char 없: 0.08456309954437295 % +[232] Char 준: 0.08402957841160089 % +[233] Char 특: 0.08296253614605675 % +[234] Char 족: 0.08242901501328467 % +[235] Char 베: 0.08162873331412657 % +[236] Char 디: 0.0794946487830383 % +[237] Char 탄: 0.07896112765026624 % +[238] Char 칼: 0.07816084595110813 % +[239] Char 쪽: 0.07816084595110813 % +[240] Char 길: 0.07682704311917794 % +[241] Char 너: 0.07656028255279192 % +[242] Char 색: 0.07576000085363382 % +[243] Char 임: 0.07522647972086174 % +[244] Char 활: 0.07389267688893157 % +[245] Char 강: 0.0733591557561595 % +[246] Char 밀: 0.07309239518977347 % +[247] Char 능: 0.07202535292422932 % +[248] Char 플: 0.07202535292422932 % +[249] Char 증: 0.07149183179145727 % +[250] Char 축: 0.07042478952591312 % +[251] Char 난: 0.06962450782675501 % +[252] Char 큰: 0.06935774726036899 % +[253] Char 록: 0.06935774726036899 % +[254] Char 액: 0.06855746556121088 % +[255] Char 티: 0.0680239444284388 % +[256] Char 던: 0.06695690216289467 % +[257] Char 백: 0.06669014159650864 % +[258] Char 란: 0.06588985989735054 % +[259] Char 얼: 0.06508957819819243 % +[260] Char 받: 0.06508957819819243 % +[261] Char 페: 0.06375577536626226 % +[262] Char 핵: 0.06295549366710415 % +[263] Char 극: 0.06268873310071812 % +[264] Char 규: 0.062421972534332085 % +[265] Char 감: 0.062421972534332085 % +[266] Char 접: 0.061354930268787945 % +[267] Char 버: 0.061354930268787945 % +[268] Char 업: 0.061354930268787945 % +[269] Char 졌: 0.060287888003243806 % +[270] Char 밝: 0.06002112743685778 % +[271] Char 날: 0.06002112743685778 % +[272] Char 혜: 0.06002112743685778 % +[273] Char 았: 0.059487606304085706 % +[274] Char 갈: 0.05762028233938346 % +[275] Char 든: 0.056286479507453295 % +[276] Char 코: 0.05575295837468122 % +[277] Char 윈: 0.05548619780829519 % +[278] Char 압: 0.054952676675523116 % +[279] Char 검: 0.05441915554275105 % +[280] Char 건: 0.053885634409978976 % +[281] Char 럼: 0.05281859214443484 % +[282] Char 및: 0.05228507101166278 % +[283] Char 움: 0.05228507101166278 % +[284] Char 높: 0.052018310445276744 % +[285] Char 새: 0.052018310445276744 % +[286] Char 복: 0.052018310445276744 % +[287] Char 투: 0.051751549878890704 % +[288] Char 편: 0.050684507613346565 % +[289] Char 료: 0.05041774704696053 % +[290] Char 환: 0.05015098648057449 % +[291] Char 겨: 0.04988422591418846 % +[292] Char 집: 0.04988422591418846 % +[293] Char 린: 0.04748338081671415 % +[294] Char 순: 0.04748338081671415 % +[295] Char 급: 0.046149577984783974 % +[296] Char 응: 0.04588281741839794 % +[297] Char 망: 0.04588281741839794 % +[298] Char 즉: 0.04561605685201191 % +[299] Char 곳: 0.04561605685201191 % +[300] Char 군: 0.04561605685201191 % +[301] Char 째: 0.045349296285625874 % +[302] Char 륙: 0.045349296285625874 % +[303] Char 령: 0.04508253571923984 % +[304] Char 빛: 0.04454901458646777 % +[305] Char 승: 0.04454901458646777 % +[306] Char 북: 0.04454901458646777 % +[307] Char 절: 0.0440154934536957 % +[308] Char 균: 0.04374873288730966 % +[309] Char 퍼: 0.043481972320923636 % +[310] Char 격: 0.042414930055379496 % +[311] Char 언: 0.042148169488993456 % +[312] Char 케: 0.04134788778983536 % +[313] Char 될: 0.04054760609067725 % +[314] Char 풍: 0.04028084552429122 % +[315] Char 먼: 0.04028084552429122 % +[316] Char 몇: 0.04028084552429122 % +[317] Char 막: 0.04001408495790518 % +[318] Char 폰: 0.04001408495790518 % +[319] Char 머: 0.03948056382513312 % +[320] Char 억: 0.03921380325874708 % +[321] Char 랑: 0.038947042692361045 % +[322] Char 련: 0.038947042692361045 % +[323] Char 브: 0.038947042692361045 % +[324] Char 율: 0.03841352155958897 % +[325] Char 못: 0.03788000042681691 % +[326] Char 커: 0.03761323986043087 % +[327] Char 틀: 0.03761323986043087 % +[328] Char 헬: 0.03654619759488673 % +[329] Char 황: 0.0362794370285007 % +[330] Char 낮: 0.03574591589572863 % +[331] Char 께: 0.03574591589572863 % +[332] Char 붙: 0.03574591589572863 % +[333] Char 긴: 0.03521239476295656 % +[334] Char 립: 0.03521239476295656 % +[335] Char 침: 0.03521239476295656 % +[336] Char 릴: 0.03494563419657053 % +[337] Char 괴: 0.034678873630184494 % +[338] Char 슷: 0.034412113063798454 % +[339] Char 허: 0.034412113063798454 % +[340] Char 험: 0.034412113063798454 % +[341] Char 완: 0.03414535249741242 % +[342] Char 럽: 0.03414535249741242 % +[343] Char 칙: 0.033611831364640354 % +[344] Char 워: 0.033611831364640354 % +[345] Char 철: 0.03334507079825432 % +[346] Char 술: 0.03334507079825432 % +[347] Char 블: 0.03334507079825432 % +[348] Char 즈: 0.03334507079825432 % +[349] Char 획: 0.03307831023186828 % +[350] Char 울: 0.03281154966548225 % +[351] Char 곡: 0.032544789099096215 % +[352] Char 올: 0.03227802853271018 % +[353] Char 륨: 0.03227802853271018 % +[354] Char 쳐: 0.03201126796632415 % +[355] Char 볼: 0.03201126796632415 % +[356] Char 므: 0.031744507399938116 % +[357] Char 센: 0.031210986267166042 % +[358] Char 쓰: 0.031210986267166042 % +[359] Char 폭: 0.03094422570078001 % +[360] Char 왔: 0.030677465134393973 % +[361] Char 람: 0.030677465134393973 % +[362] Char 칭: 0.030677465134393973 % +[363] Char 느: 0.030410704568007936 % +[364] Char 살: 0.030410704568007936 % +[365] Char 염: 0.029610422868849837 % +[366] Char 늘: 0.029610422868849837 % +[367] Char 값: 0.0293436623024638 % +[368] Char 맨: 0.0293436623024638 % +[369] Char 징: 0.02881014116969173 % +[370] Char 찰: 0.02881014116969173 % +[371] Char 팔: 0.02881014116969173 % +[372] Char 혀: 0.028543380603305694 % +[373] Char 참: 0.028543380603305694 % +[374] Char 누: 0.02827662003691966 % +[375] Char 총: 0.02800985947053363 % +[376] Char 엔: 0.027743098904147594 % +[377] Char 범: 0.027476338337761558 % +[378] Char 탈: 0.027476338337761558 % +[379] Char 떨: 0.027209577771375525 % +[380] Char 눈: 0.026676056638603455 % +[381] Char 쟁: 0.02640929607221742 % +[382] Char 깥: 0.02614253550583139 % +[383] Char 잡: 0.02614253550583139 % +[384] Char 삼: 0.02614253550583139 % +[385] Char 림: 0.025875774939445352 % +[386] Char 흑: 0.025875774939445352 % +[387] Char 효: 0.02560901437305932 % +[388] Char 뜻: 0.02560901437305932 % +[389] Char 필: 0.02560901437305932 % +[390] Char 취: 0.02560901437305932 % +[391] Char 헌: 0.025342253806673282 % +[392] Char 걸: 0.025342253806673282 % +[393] Char 왜: 0.025342253806673282 % +[394] Char 뒤: 0.025342253806673282 % +[395] Char 률: 0.025075493240287246 % +[396] Char 링: 0.025075493240287246 % +[397] Char 클: 0.025075493240287246 % +[398] Char 킬: 0.025075493240287246 % +[399] Char 채: 0.025075493240287246 % +[400] Char 깊: 0.024808732673901213 % +[401] Char 습: 0.024808732673901213 % +[402] Char 써: 0.024808732673901213 % +[403] Char 줄: 0.024541972107515176 % +[404] Char 품: 0.024541972107515176 % +[405] Char 둘: 0.024275211541129143 % +[406] Char 청: 0.02400845097474311 % +[407] Char 션: 0.023741690408357077 % +[408] Char 육: 0.02347492984197104 % +[409] Char 병: 0.023208169275585007 % +[410] Char 송: 0.02294140870919897 % +[411] Char 엄: 0.022407887576426904 % +[412] Char 셀: 0.022407887576426904 % +[413] Char 헤: 0.022141127010040867 % +[414] Char 착: 0.02187436644365483 % +[415] Char 얻: 0.02187436644365483 % +[416] Char 런: 0.02187436644365483 % +[417] Char 글: 0.02187436644365483 % +[418] Char 융: 0.021607605877268798 % +[419] Char 흐: 0.021607605877268798 % +[420] Char 섭: 0.021340845310882765 % +[421] Char 흡: 0.021074084744496728 % +[422] Char 났: 0.021074084744496728 % +[423] Char 츠: 0.021074084744496728 % +[424] Char 닌: 0.020807324178110695 % +[425] Char 악: 0.020540563611724662 % +[426] Char 웨: 0.020540563611724662 % +[427] Char 밖: 0.020273803045338625 % +[428] Char 끝: 0.020273803045338625 % +[429] Char 멀: 0.02000704247895259 % +[430] Char 잘: 0.02000704247895259 % +[431] Char 흔: 0.01974028191256656 % +[432] Char 뉴: 0.01974028191256656 % +[433] Char 떤: 0.01974028191256656 % +[434] Char 책: 0.01974028191256656 % +[435] Char 갤: 0.01974028191256656 % +[436] Char 힘: 0.019473521346180522 % +[437] Char 애: 0.019206760779794486 % +[438] Char 희: 0.019206760779794486 % +[439] Char 넘: 0.018406479080636383 % +[440] Char 렇: 0.018406479080636383 % +[441] Char 창: 0.018406479080636383 % +[442] Char 쿠: 0.018406479080636383 % +[443] Char 골: 0.018406479080636383 % +[444] Char 례: 0.01813971851425035 % +[445] Char 패: 0.01813971851425035 % +[446] Char 빠: 0.017872957947864317 % +[447] Char 혼: 0.017872957947864317 % +[448] Char 슈: 0.017872957947864317 % +[449] Char 갖: 0.01760619738147828 % +[450] Char 넓: 0.017339436815092247 % +[451] Char 녹: 0.017339436815092247 % +[452] Char 펙: 0.01707267624870621 % +[453] Char 킨: 0.01707267624870621 % +[454] Char 털: 0.01707267624870621 % +[455] Char 럭: 0.01707267624870621 % +[456] Char 혹: 0.016805915682320177 % +[457] Char 빈: 0.016805915682320177 % +[458] Char 맞: 0.016805915682320177 % +[459] Char 앙: 0.016805915682320177 % +[460] Char 널: 0.01653915511593414 % +[461] Char 맥: 0.01653915511593414 % +[462] Char 랜: 0.01653915511593414 % +[463] Char 낸: 0.016272394549548107 % +[464] Char 렸: 0.016272394549548107 % +[465] Char 친: 0.016005633983162074 % +[466] Char 렌: 0.016005633983162074 % +[467] Char 략: 0.016005633983162074 % +[468] Char 훨: 0.016005633983162074 % +[469] Char 씬: 0.016005633983162074 % +[470] Char 푸: 0.016005633983162074 % +[471] Char 김: 0.016005633983162074 % +[472] Char 찾: 0.015738873416776038 % +[473] Char 첫: 0.015738873416776038 % +[474] Char 득: 0.015738873416776038 % +[475] Char 농: 0.015472112850390005 % +[476] Char 논: 0.015472112850390005 % +[477] Char 먹: 0.015472112850390005 % +[478] Char 폴: 0.015472112850390005 % +[479] Char 떠: 0.015205352284003968 % +[480] Char 박: 0.015205352284003968 % +[481] Char 뜨: 0.015205352284003968 % +[482] Char 맹: 0.015205352284003968 % +[483] Char 퇴: 0.014938591717617937 % +[484] Char 멸: 0.014938591717617937 % +[485] Char 손: 0.014938591717617937 % +[486] Char 컴: 0.0146718311512319 % +[487] Char 촬: 0.014405070584845865 % +[488] Char 붕: 0.014405070584845865 % +[489] Char 념: 0.014405070584845865 % +[490] Char 벌: 0.01413831001845983 % +[491] Char 곱: 0.013871549452073797 % +[492] Char 냈: 0.013604788885687762 % +[493] Char 끼: 0.013604788885687762 % +[494] Char 협: 0.013338028319301727 % +[495] Char 짧: 0.013338028319301727 % +[496] Char 캐: 0.013338028319301727 % +[497] Char 택: 0.013338028319301727 % +[498] Char 켜: 0.013071267752915694 % +[499] Char 찬: 0.013071267752915694 % +[500] Char 둥: 0.013071267752915694 % +[501] Char 덮: 0.01280450718652966 % +[502] Char 휘: 0.01280450718652966 % +[503] Char 뿐: 0.01280450718652966 % +[504] Char 램: 0.01280450718652966 % +[505] Char 폐: 0.01280450718652966 % +[506] Char 좌: 0.01280450718652966 % +[507] Char 슬: 0.012537746620143623 % +[508] Char 켈: 0.012537746620143623 % +[509] Char 델: 0.012537746620143623 % +[510] Char 틴: 0.012004225487371555 % +[511] Char 놓: 0.012004225487371555 % +[512] Char 뇌: 0.012004225487371555 % +[513] Char 척: 0.012004225487371555 % +[514] Char 띠: 0.01173746492098552 % +[515] Char 톨: 0.01173746492098552 % +[516] Char 냥: 0.01173746492098552 % +[517] Char 앞: 0.01173746492098552 % +[518] Char 덩: 0.011470704354599485 % +[519] Char 퓨: 0.011470704354599485 % +[520] Char 잠: 0.011470704354599485 % +[521] Char 몸: 0.011470704354599485 % +[522] Char 혁: 0.011470704354599485 % +[523] Char 숫: 0.011203943788213452 % +[524] Char 랙: 0.011203943788213452 % +[525] Char 끌: 0.011203943788213452 % +[526] Char 덜: 0.011203943788213452 % +[527] Char 벨: 0.011203943788213452 % +[528] Char 봉: 0.010937183221827415 % +[529] Char 톤: 0.010937183221827415 % +[530] Char 늄: 0.010937183221827415 % +[531] Char 셜: 0.010937183221827415 % +[532] Char 좋: 0.010670422655441382 % +[533] Char 큼: 0.010670422655441382 % +[534] Char 슨: 0.010670422655441382 % +[535] Char 쉽: 0.010670422655441382 % +[536] Char 홀: 0.010670422655441382 % +[537] Char 돈: 0.010403662089055347 % +[538] Char 벽: 0.010136901522669313 % +[539] Char 씩: 0.010136901522669313 % +[540] Char 십: 0.00987014095628328 % +[541] Char 팀: 0.00987014095628328 % +[542] Char 쌍: 0.00987014095628328 % +[543] Char 턴: 0.00987014095628328 % +[544] Char 휴: 0.00987014095628328 % +[545] Char 텐: 0.00987014095628328 % +[546] Char 넷: 0.00987014095628328 % +[547] Char 콘: 0.009603380389897243 % +[548] Char 몰: 0.009603380389897243 % +[549] Char 냉: 0.009603380389897243 % +[550] Char 젝: 0.009603380389897243 % +[551] Char 햇: 0.009603380389897243 % +[552] Char 릭: 0.009603380389897243 % +[553] Char 롯: 0.009603380389897243 % +[554] Char 섞: 0.00933661982351121 % +[555] Char 픈: 0.00933661982351121 % +[556] Char 욱: 0.00933661982351121 % +[557] Char 컵: 0.00933661982351121 % +[558] Char 튬: 0.00933661982351121 % +[559] Char 싸: 0.009069859257125175 % +[560] Char 멘: 0.009069859257125175 % +[561] Char 씨: 0.009069859257125175 % +[562] Char 빌: 0.009069859257125175 % +[563] Char 귀: 0.009069859257125175 % +[564] Char 죽: 0.009069859257125175 % +[565] Char 풀: 0.009069859257125175 % +[566] Char 켰: 0.00880309869073914 % +[567] Char 끊: 0.00880309869073914 % +[568] Char 담: 0.00880309869073914 % +[569] Char 겉: 0.00880309869073914 % +[570] Char 굴: 0.00880309869073914 % +[571] Char 갔: 0.00880309869073914 % +[572] Char 락: 0.00880309869073914 % +[573] Char 팽: 0.00880309869073914 % +[574] Char 빨: 0.00880309869073914 % +[575] Char 끓: 0.00880309869073914 % +[576] Char 뤼: 0.00880309869073914 % +[577] Char 템: 0.008536338124353105 % +[578] Char 쇄: 0.008536338124353105 % +[579] Char 뮤: 0.008536338124353105 % +[580] Char 덕: 0.00826957755796707 % +[581] Char 잔: 0.00826957755796707 % +[582] Char 곽: 0.00826957755796707 % +[583] Char 웹: 0.00826957755796707 % +[584] Char 깨: 0.00826957755796707 % +[585] Char 낼: 0.00826957755796707 % +[586] Char 멜: 0.00826957755796707 % +[587] Char 컷: 0.008002816991581037 % +[588] Char 녀: 0.008002816991581037 % +[589] Char 콜: 0.008002816991581037 % +[590] Char 힌: 0.007736056425195002 % +[591] Char 탑: 0.007736056425195002 % +[592] Char 믿: 0.007736056425195002 % +[593] Char 땅: 0.007736056425195002 % +[594] Char 익: 0.007736056425195002 % +[595] Char 뭉: 0.007736056425195002 % +[596] Char 욕: 0.007736056425195002 % +[597] Char 엘: 0.007736056425195002 % +[598] Char 퀴: 0.007469295858808968 % +[599] Char 픽: 0.007469295858808968 % +[600] Char 밑: 0.007202535292422933 % +[601] Char 섯: 0.007202535292422933 % +[602] Char 짐: 0.007202535292422933 % +[603] Char 렀: 0.007202535292422933 % +[604] Char 윤: 0.007202535292422933 % +[605] Char 촉: 0.006935774726036899 % +[606] Char 꺼: 0.006935774726036899 % +[607] Char 겼: 0.006935774726036899 % +[608] Char 됨: 0.006935774726036899 % +[609] Char 핀: 0.006935774726036899 % +[610] Char 꾸: 0.006935774726036899 % +[611] Char 뿔: 0.006935774726036899 % +[612] Char 섬: 0.006935774726036899 % +[613] Char 딸: 0.006935774726036899 % +[614] Char 덴: 0.006935774726036899 % +[615] Char 끔: 0.006669014159650864 % +[616] Char 찍: 0.006669014159650864 % +[617] Char 뉜: 0.006669014159650864 % +[618] Char 옛: 0.006669014159650864 % +[619] Char 텔: 0.006669014159650864 % +[620] Char 칸: 0.006669014159650864 % +[621] Char 헝: 0.006669014159650864 % +[622] Char 띄: 0.00640225359326483 % +[623] Char 짜: 0.00640225359326483 % +[624] Char 냐: 0.00640225359326483 % +[625] Char 밤: 0.00640225359326483 % +[626] Char 얇: 0.00640225359326483 % +[627] Char 룹: 0.00640225359326483 % +[628] Char 혔: 0.00640225359326483 % +[629] Char 롭: 0.00640225359326483 % +[630] Char 됐: 0.00640225359326483 % +[631] Char 캘: 0.00640225359326483 % +[632] Char 칠: 0.00640225359326483 % +[633] Char 윌: 0.00640225359326483 % +[634] Char 엣: 0.00640225359326483 % +[635] Char 몬: 0.006135493026878794 % +[636] Char 갑: 0.006135493026878794 % +[637] Char 렬: 0.006135493026878794 % +[638] Char 붉: 0.006135493026878794 % +[639] Char 늬: 0.006135493026878794 % +[640] Char 룬: 0.006135493026878794 % +[641] Char 잃: 0.006135493026878794 % +[642] Char 탕: 0.006135493026878794 % +[643] Char 혈: 0.006135493026878794 % +[644] Char 뉘: 0.00586873246049276 % +[645] Char 훈: 0.00586873246049276 % +[646] Char 솔: 0.00586873246049276 % +[647] Char 끈: 0.00586873246049276 % +[648] Char 젠: 0.00586873246049276 % +[649] Char 슘: 0.005601971894106726 % +[650] Char 깝: 0.005601971894106726 % +[651] Char 겁: 0.005601971894106726 % +[652] Char 묘: 0.005601971894106726 % +[653] Char 켓: 0.005601971894106726 % +[654] Char 샤: 0.005601971894106726 % +[655] Char 쇼: 0.005601971894106726 % +[656] Char 닥: 0.005335211327720691 % +[657] Char 홈: 0.005335211327720691 % +[658] Char 곤: 0.005335211327720691 % +[659] Char 얀: 0.005335211327720691 % +[660] Char 뀌: 0.005335211327720691 % +[661] Char 럴: 0.005335211327720691 % +[662] Char 숭: 0.005335211327720691 % +[663] Char 짓: 0.005335211327720691 % +[664] Char 벗: 0.005335211327720691 % +[665] Char 벼: 0.005335211327720691 % +[666] Char 렵: 0.005335211327720691 % +[667] Char 쉬: 0.005068450761334656 % +[668] Char 떻: 0.005068450761334656 % +[669] Char 뢰: 0.005068450761334656 % +[670] Char 깃: 0.005068450761334656 % +[671] Char 놀: 0.005068450761334656 % +[672] Char 헨: 0.005068450761334656 % +[673] Char 벤: 0.005068450761334656 % +[674] Char 컫: 0.005068450761334656 % +[675] Char 겪: 0.005068450761334656 % +[676] Char 뷰: 0.005068450761334656 % +[677] Char 뻗: 0.004801690194948621 % +[678] Char 맺: 0.004801690194948621 % +[679] Char 빙: 0.004801690194948621 % +[680] Char 숙: 0.004801690194948621 % +[681] Char 낳: 0.004801690194948621 % +[682] Char 낙: 0.004801690194948621 % +[683] Char 싱: 0.004801690194948621 % +[684] Char 묶: 0.004801690194948621 % +[685] Char 핑: 0.004801690194948621 % +[686] Char 곧: 0.0045349296285625874 % +[687] Char 뮬: 0.0045349296285625874 % +[688] Char 롱: 0.0045349296285625874 % +[689] Char 옥: 0.0045349296285625874 % +[690] Char 홍: 0.0045349296285625874 % +[691] Char 웠: 0.0045349296285625874 % +[692] Char 좀: 0.0045349296285625874 % +[693] Char 겐: 0.0045349296285625874 % +[694] Char 넣: 0.0045349296285625874 % +[695] Char 흩: 0.0045349296285625874 % +[696] Char 즌: 0.004268169062176553 % +[697] Char 읽: 0.004268169062176553 % +[698] Char 캄: 0.004268169062176553 % +[699] Char 뀐: 0.004268169062176553 % +[700] Char 궁: 0.004268169062176553 % +[701] Char 낱: 0.004268169062176553 % +[702] Char 뛰: 0.004268169062176553 % +[703] Char 춘: 0.004268169062176553 % +[704] Char 랫: 0.004268169062176553 % +[705] Char 랐: 0.004268169062176553 % +[706] Char 흰: 0.004268169062176553 % +[707] Char 틸: 0.004268169062176553 % +[708] Char 룸: 0.004268169062176553 % +[709] Char 앱: 0.004268169062176553 % +[710] Char 톡: 0.004268169062176553 % +[711] Char 앤: 0.004268169062176553 % +[712] Char 젊: 0.004001408495790519 % +[713] Char 웃: 0.004001408495790519 % +[714] Char 쌓: 0.004001408495790519 % +[715] Char 컬: 0.004001408495790519 % +[716] Char 곰: 0.004001408495790519 % +[717] Char 좁: 0.004001408495790519 % +[718] Char 꼴: 0.004001408495790519 % +[719] Char 엇: 0.004001408495790519 % +[720] Char 섹: 0.004001408495790519 % +[721] Char 옅: 0.003734647929404484 % +[722] Char 눌: 0.003734647929404484 % +[723] Char 펠: 0.003734647929404484 % +[724] Char 굽: 0.003734647929404484 % +[725] Char 끄: 0.003734647929404484 % +[726] Char 삭: 0.003734647929404484 % +[727] Char 옮: 0.003734647929404484 % +[728] Char 힐: 0.003734647929404484 % +[729] Char 냄: 0.003734647929404484 % +[730] Char 흥: 0.003734647929404484 % +[731] Char 펄: 0.003734647929404484 % +[732] Char 쳤: 0.003734647929404484 % +[733] Char 쓴: 0.003734647929404484 % +[734] Char 틈: 0.003734647929404484 % +[735] Char 랄: 0.003734647929404484 % +[736] Char 튀: 0.003734647929404484 % +[737] Char 객: 0.003734647929404484 % +[738] Char 웰: 0.003734647929404484 % +[739] Char 닉: 0.003734647929404484 % +[740] Char 팅: 0.003734647929404484 % +[741] Char 펨: 0.003734647929404484 % +[742] Char 쏘: 0.0034678873630184493 % +[743] Char 멕: 0.0034678873630184493 % +[744] Char 둔: 0.0034678873630184493 % +[745] Char 옹: 0.0034678873630184493 % +[746] Char 펜: 0.0034678873630184493 % +[747] Char 잇: 0.0034678873630184493 % +[748] Char 잉: 0.0034678873630184493 % +[749] Char 엑: 0.0034678873630184493 % +[750] Char 촌: 0.0034678873630184493 % +[751] Char 찌: 0.003201126796632415 % +[752] Char 얕: 0.003201126796632415 % +[753] Char 깔: 0.003201126796632415 % +[754] Char 앉: 0.003201126796632415 % +[755] Char 튼: 0.003201126796632415 % +[756] Char 맡: 0.003201126796632415 % +[757] Char 렴: 0.003201126796632415 % +[758] Char 뤄: 0.003201126796632415 % +[759] Char 펼: 0.003201126796632415 % +[760] Char 납: 0.003201126796632415 % +[761] Char 룩: 0.003201126796632415 % +[762] Char 컨: 0.003201126796632415 % +[763] Char 뚜: 0.003201126796632415 % +[764] Char 듯: 0.00293436623024638 % +[765] Char 꾼: 0.00293436623024638 % +[766] Char 슐: 0.00293436623024638 % +[767] Char 롤: 0.00293436623024638 % +[768] Char 킹: 0.00293436623024638 % +[769] Char 렉: 0.00293436623024638 % +[770] Char 큐: 0.00293436623024638 % +[771] Char 숲: 0.00293436623024638 % +[772] Char 툴: 0.00293436623024638 % +[773] Char 빼: 0.00293436623024638 % +[774] Char 흘: 0.00293436623024638 % +[775] Char 솜: 0.00293436623024638 % +[776] Char 뿜: 0.00293436623024638 % +[777] Char 옆: 0.00293436623024638 % +[778] Char 쿼: 0.00293436623024638 % +[779] Char 싼: 0.00293436623024638 % +[780] Char 렷: 0.00293436623024638 % +[781] Char 겹: 0.0026676056638603456 % +[782] Char 쪼: 0.0026676056638603456 % +[783] Char 빅: 0.0026676056638603456 % +[784] Char 쯤: 0.0026676056638603456 % +[785] Char 쫓: 0.0026676056638603456 % +[786] Char 삽: 0.0026676056638603456 % +[787] Char 펴: 0.0026676056638603456 % +[788] Char 랭: 0.0026676056638603456 % +[789] Char 춤: 0.0026676056638603456 % +[790] Char 핼: 0.0026676056638603456 % +[791] Char 횡: 0.0026676056638603456 % +[792] Char 답: 0.0026676056638603456 % +[793] Char 썼: 0.0026676056638603456 % +[794] Char 슴: 0.0026676056638603456 % +[795] Char 탁: 0.0026676056638603456 % +[796] Char 툰: 0.0026676056638603456 % +[797] Char 틱: 0.0026676056638603456 % +[798] Char 랩: 0.0026676056638603456 % +[799] Char 듐: 0.0026676056638603456 % +[800] Char 딩: 0.0026676056638603456 % +[801] Char 탠: 0.0026676056638603456 % +[802] Char 넬: 0.0026676056638603456 % +[803] Char 꺾: 0.0026676056638603456 % +[804] Char 쿄: 0.0026676056638603456 % +[805] Char 둡: 0.0024008450974743107 % +[806] Char 듬: 0.0024008450974743107 % +[807] Char 닿: 0.0024008450974743107 % +[808] Char 멈: 0.0024008450974743107 % +[809] Char 듭: 0.0024008450974743107 % +[810] Char 딱: 0.0024008450974743107 % +[811] Char 킴: 0.0024008450974743107 % +[812] Char 짝: 0.0024008450974743107 % +[813] Char 숨: 0.0024008450974743107 % +[814] Char 님: 0.0024008450974743107 % +[815] Char 핍: 0.0024008450974743107 % +[816] Char 삶: 0.0024008450974743107 % +[817] Char 벡: 0.0024008450974743107 % +[818] Char 폼: 0.0024008450974743107 % +[819] Char 늦: 0.0024008450974743107 % +[820] Char 헐: 0.0024008450974743107 % +[821] Char 닛: 0.0024008450974743107 % +[822] Char 콰: 0.0024008450974743107 % +[823] Char 꼭: 0.0024008450974743107 % +[824] Char 롬: 0.0024008450974743107 % +[825] Char 셸: 0.0024008450974743107 % +[826] Char 겸: 0.0024008450974743107 % +[827] Char 쏠: 0.0024008450974743107 % +[828] Char 겠: 0.0024008450974743107 % +[829] Char 긋: 0.0024008450974743107 % +[830] Char 뱅: 0.0024008450974743107 % +[831] Char 멍: 0.0024008450974743107 % +[832] Char 봤: 0.0024008450974743107 % +[833] Char 덧: 0.0024008450974743107 % +[834] Char 빚: 0.0021340845310882763 % +[835] Char 뻣: 0.0021340845310882763 % +[836] Char 껍: 0.0021340845310882763 % +[837] Char 똑: 0.0021340845310882763 % +[838] Char 닭: 0.0021340845310882763 % +[839] Char 뒷: 0.0021340845310882763 % +[840] Char 홉: 0.0021340845310882763 % +[841] Char 즐: 0.0021340845310882763 % +[842] Char 쓸: 0.0021340845310882763 % +[843] Char 팩: 0.0021340845310882763 % +[844] Char 쥐: 0.0021340845310882763 % +[845] Char 짙: 0.0021340845310882763 % +[846] Char 즘: 0.0021340845310882763 % +[847] Char 닷: 0.0021340845310882763 % +[848] Char 톰: 0.0021340845310882763 % +[849] Char 릿: 0.0021340845310882763 % +[850] Char 닫: 0.001867323964702242 % +[851] Char 솟: 0.001867323964702242 % +[852] Char 옴: 0.001867323964702242 % +[853] Char 빗: 0.001867323964702242 % +[854] Char 죄: 0.001867323964702242 % +[855] Char 콩: 0.001867323964702242 % +[856] Char 첩: 0.001867323964702242 % +[857] Char 꼬: 0.001867323964702242 % +[858] Char 봄: 0.001867323964702242 % +[859] Char 앗: 0.001867323964702242 % +[860] Char 옵: 0.001867323964702242 % +[861] Char 팬: 0.001867323964702242 % +[862] Char 엽: 0.001867323964702242 % +[863] Char 졸: 0.001867323964702242 % +[864] Char 딴: 0.001867323964702242 % +[865] Char 셋: 0.001867323964702242 % +[866] Char 볍: 0.0016005633983162074 % +[867] Char 꽤: 0.0016005633983162074 % +[868] Char 맑: 0.0016005633983162074 % +[869] Char 봐: 0.0016005633983162074 % +[870] Char 빽: 0.0016005633983162074 % +[871] Char 굳: 0.0016005633983162074 % +[872] Char 닦: 0.0016005633983162074 % +[873] Char 묻: 0.0016005633983162074 % +[874] Char 듈: 0.0016005633983162074 % +[875] Char 녔: 0.0016005633983162074 % +[876] Char 혐: 0.0016005633983162074 % +[877] Char 셔: 0.0016005633983162074 % +[878] Char 렘: 0.0016005633983162074 % +[879] Char 켄: 0.0016005633983162074 % +[880] Char 룰: 0.0016005633983162074 % +[881] Char 셰: 0.0016005633983162074 % +[882] Char 듣: 0.0016005633983162074 % +[883] Char 셈: 0.0016005633983162074 % +[884] Char 뮌: 0.0016005633983162074 % +[885] Char 겔: 0.0016005633983162074 % +[886] Char 낭: 0.0016005633983162074 % +[887] Char 몽: 0.0016005633983162074 % +[888] Char 돼: 0.0016005633983162074 % +[889] Char 뱀: 0.0016005633983162074 % +[890] Char 륭: 0.0016005633983162074 % +[891] Char 랍: 0.0016005633983162074 % +[892] Char 딘: 0.0016005633983162074 % +[893] Char 듀: 0.0016005633983162074 % +[894] Char 퀘: 0.0016005633983162074 % +[895] Char 칩: 0.0016005633983162074 % +[896] Char 뚫: 0.0013338028319301728 % +[897] Char 굉: 0.0013338028319301728 % +[898] Char 묵: 0.0013338028319301728 % +[899] Char 헥: 0.0013338028319301728 % +[900] Char 꿀: 0.0013338028319301728 % +[901] Char 렐: 0.0013338028319301728 % +[902] Char 콧: 0.0013338028319301728 % +[903] Char 닝: 0.0013338028319301728 % +[904] Char 닐: 0.0013338028319301728 % +[905] Char 쾌: 0.0013338028319301728 % +[906] Char 낌: 0.0013338028319301728 % +[907] Char 섰: 0.0013338028319301728 % +[908] Char 덤: 0.0013338028319301728 % +[909] Char 톱: 0.0013338028319301728 % +[910] Char 빵: 0.0013338028319301728 % +[911] Char 춰: 0.0013338028319301728 % +[912] Char 뜬: 0.0013338028319301728 % +[913] Char 꿈: 0.0013338028319301728 % +[914] Char 떼: 0.0013338028319301728 % +[915] Char 뼈: 0.0013338028319301728 % +[916] Char 꼽: 0.0013338028319301728 % +[917] Char 츰: 0.0013338028319301728 % +[918] Char 릉: 0.0013338028319301728 % +[919] Char 콤: 0.0013338028319301728 % +[920] Char 퉁: 0.0013338028319301728 % +[921] Char 굵: 0.0013338028319301728 % +[922] Char 럿: 0.0013338028319301728 % +[923] Char 밥: 0.0013338028319301728 % +[924] Char 훌: 0.0013338028319301728 % +[925] Char 픔: 0.0013338028319301728 % +[926] Char 찢: 0.0013338028319301728 % +[927] Char 팟: 0.0013338028319301728 % +[928] Char 땀: 0.0013338028319301728 % +[929] Char 밴: 0.0013338028319301728 % +[930] Char 쇠: 0.0013338028319301728 % +[931] Char 덟: 0.0013338028319301728 % +[932] Char 팸: 0.0013338028319301728 % +[933] Char 딜: 0.0013338028319301728 % +[934] Char 댓: 0.0010670422655441381 % +[935] Char 궈: 0.0010670422655441381 % +[936] Char 괄: 0.0010670422655441381 % +[937] Char 띤: 0.0010670422655441381 % +[938] Char 왼: 0.0010670422655441381 % +[939] Char 샘: 0.0010670422655441381 % +[940] Char 뿌: 0.0010670422655441381 % +[941] Char 몹: 0.0010670422655441381 % +[942] Char 쁜: 0.0010670422655441381 % +[943] Char 쁘: 0.0010670422655441381 % +[944] Char 눠: 0.0010670422655441381 % +[945] Char 캔: 0.0010670422655441381 % +[946] Char 퀸: 0.0010670422655441381 % +[947] Char 뽑: 0.0010670422655441381 % +[948] Char 뮴: 0.0010670422655441381 % +[949] Char 잎: 0.0010670422655441381 % +[950] Char 펀: 0.0010670422655441381 % +[951] Char 끗: 0.0010670422655441381 % +[952] Char 컸: 0.0010670422655441381 % +[953] Char 갱: 0.0010670422655441381 % +[954] Char 뷔: 0.0010670422655441381 % +[955] Char 젤: 0.0010670422655441381 % +[956] Char 엥: 0.0010670422655441381 % +[957] Char 쭉: 0.0010670422655441381 % +[958] Char 웅: 0.0010670422655441381 % +[959] Char 첨: 0.0010670422655441381 % +[960] Char 딪: 0.0010670422655441381 % +[961] Char 훗: 0.0010670422655441381 % +[962] Char 돕: 0.0010670422655441381 % +[963] Char 볕: 0.0010670422655441381 % +[964] Char 켁: 0.0010670422655441381 % +[965] Char 샌: 0.0010670422655441381 % +[966] Char 걀: 0.0010670422655441381 % +[967] Char 흙: 0.0010670422655441381 % +[968] Char 텍: 0.0010670422655441381 % +[969] Char 팜: 0.0010670422655441381 % +[970] Char 튜: 0.0010670422655441381 % +[971] Char 눅: 0.0010670422655441381 % +[972] Char 쬐: 0.0010670422655441381 % +[973] Char 뀔: 0.0010670422655441381 % +[974] Char 옳: 0.0010670422655441381 % +[975] Char 앨: 0.0010670422655441381 % +[976] Char 뎀: 0.0010670422655441381 % +[977] Char 싣: 0.0008002816991581037 % +[978] Char 줘: 0.0008002816991581037 % +[979] Char 줌: 0.0008002816991581037 % +[980] Char 띈: 0.0008002816991581037 % +[981] Char 쌀: 0.0008002816991581037 % +[982] Char 굶: 0.0008002816991581037 % +[983] Char 헛: 0.0008002816991581037 % +[984] Char 깐: 0.0008002816991581037 % +[985] Char 껴: 0.0008002816991581037 % +[986] Char 딕: 0.0008002816991581037 % +[987] Char 둑: 0.0008002816991581037 % +[988] Char 탱: 0.0008002816991581037 % +[989] Char 흉: 0.0008002816991581037 % +[990] Char 넥: 0.0008002816991581037 % +[991] Char 켐: 0.0008002816991581037 % +[992] Char 맛: 0.0008002816991581037 % +[993] Char 닮: 0.0008002816991581037 % +[994] Char 돗: 0.0008002816991581037 % +[995] Char 늑: 0.0008002816991581037 % +[996] Char 젖: 0.0008002816991581037 % +[997] Char 탓: 0.0008002816991581037 % +[998] Char 쏟: 0.0008002816991581037 % +[999] Char 쿨: 0.0008002816991581037 % +[1000] Char 팝: 0.0008002816991581037 % +[1001] Char 탔: 0.0008002816991581037 % +[1002] Char 샵: 0.0008002816991581037 % +[1003] Char 넛: 0.0008002816991581037 % +[1004] Char 밸: 0.0008002816991581037 % +[1005] Char 샐: 0.0008002816991581037 % +[1006] Char 냅: 0.0008002816991581037 % +[1007] Char 팡: 0.0008002816991581037 % +[1008] Char 앵: 0.0008002816991581037 % +[1009] Char 룡: 0.0008002816991581037 % +[1010] Char 잭: 0.0008002816991581037 % +[1011] Char 싶: 0.0008002816991581037 % +[1012] Char 옷: 0.0008002816991581037 % +[1013] Char 갓: 0.0008002816991581037 % +[1014] Char 튕: 0.0008002816991581037 % +[1015] Char 믹: 0.0008002816991581037 % +[1016] Char 쇤: 0.0008002816991581037 % +[1017] Char 쭈: 0.0008002816991581037 % +[1018] Char 샨: 0.0008002816991581037 % +[1019] Char 잰: 0.0008002816991581037 % +[1020] Char 퀄: 0.0008002816991581037 % +[1021] Char 턱: 0.0008002816991581037 % +[1022] Char 꽃: 0.0008002816991581037 % +[1023] Char 넌: 0.0008002816991581037 % +[1024] Char 븀: 0.0008002816991581037 % +[1025] Char 엡: 0.0008002816991581037 % +[1026] Char 눴: 0.0005335211327720691 % +[1027] Char 랬: 0.0005335211327720691 % +[1028] Char 돔: 0.0005335211327720691 % +[1029] Char 윗: 0.0005335211327720691 % +[1030] Char 믐: 0.0005335211327720691 % +[1031] Char 옐: 0.0005335211327720691 % +[1032] Char 싹: 0.0005335211327720691 % +[1033] Char 붓: 0.0005335211327720691 % +[1034] Char 뻐: 0.0005335211327720691 % +[1035] Char 잊: 0.0005335211327720691 % +[1036] Char 첼: 0.0005335211327720691 % +[1037] Char 츄: 0.0005335211327720691 % +[1038] Char 깎: 0.0005335211327720691 % +[1039] Char 엉: 0.0005335211327720691 % +[1040] Char 캡: 0.0005335211327720691 % +[1041] Char 넵: 0.0005335211327720691 % +[1042] Char 뀜: 0.0005335211327720691 % +[1043] Char 툼: 0.0005335211327720691 % +[1044] Char 샹: 0.0005335211327720691 % +[1045] Char 퀼: 0.0005335211327720691 % +[1046] Char 쑥: 0.0005335211327720691 % +[1047] Char 셨: 0.0005335211327720691 % +[1048] Char 낫: 0.0005335211327720691 % +[1049] Char 텀: 0.0005335211327720691 % +[1050] Char 펌: 0.0005335211327720691 % +[1051] Char 곁: 0.0005335211327720691 % +[1052] Char 돋: 0.0005335211327720691 % +[1053] Char 핫: 0.0005335211327720691 % +[1054] Char 썰: 0.0005335211327720691 % +[1055] Char 촛: 0.0005335211327720691 % +[1056] Char 됭: 0.0005335211327720691 % +[1057] Char 룽: 0.0005335211327720691 % +[1058] Char 몫: 0.0005335211327720691 % +[1059] Char 숀: 0.0005335211327720691 % +[1060] Char 긍: 0.0005335211327720691 % +[1061] Char 엮: 0.0005335211327720691 % +[1062] Char 걷: 0.0005335211327720691 % +[1063] Char 챈: 0.0005335211327720691 % +[1064] Char 콥: 0.0005335211327720691 % +[1065] Char 짱: 0.0005335211327720691 % +[1066] Char 맵: 0.0005335211327720691 % +[1067] Char 읍: 0.0005335211327720691 % +[1068] Char 눗: 0.0005335211327720691 % +[1069] Char 쑨: 0.0005335211327720691 % +[1070] Char 엷: 0.0005335211327720691 % +[1071] Char 뱃: 0.0005335211327720691 % +[1072] Char 굿: 0.0005335211327720691 % +[1073] Char 쩌: 0.0005335211327720691 % +[1074] Char 갗: 0.0005335211327720691 % +[1075] Char 셉: 0.0005335211327720691 % +[1076] Char 뺀: 0.0005335211327720691 % +[1077] Char 렛: 0.0005335211327720691 % +[1078] Char 뛴: 0.0005335211327720691 % +[1079] Char 쐬: 0.0005335211327720691 % +[1080] Char 큘: 0.0005335211327720691 % +[1081] Char 삐: 0.0005335211327720691 % +[1082] Char 얽: 0.0005335211327720691 % +[1083] Char 잴: 0.0005335211327720691 % +[1084] Char 늙: 0.0005335211327720691 % +[1085] Char 꿔: 0.0005335211327720691 % +[1086] Char 쐐: 0.0005335211327720691 % +[1087] Char 뭇: 0.0005335211327720691 % +[1088] Char 빔: 0.0005335211327720691 % +[1089] Char 깅: 0.0005335211327720691 % +[1090] Char 얹: 0.0005335211327720691 % +[1091] Char 씌: 0.0005335211327720691 % +[1092] Char 뇨: 0.0005335211327720691 % +[1093] Char 콕: 0.0005335211327720691 % +[1094] Char 븐: 0.0005335211327720691 % +[1095] Char 넉: 0.0005335211327720691 % +[1096] Char 뭍: 0.0005335211327720691 % +[1097] Char 핸: 0.0005335211327720691 % +[1098] Char 텡: 0.0005335211327720691 % +[1099] Char 뒀: 0.00026676056638603454 % +[1100] Char 푹: 0.00026676056638603454 % +[1101] Char 녁: 0.00026676056638603454 % +[1102] Char 뵈: 0.00026676056638603454 % +[1103] Char 딛: 0.00026676056638603454 % +[1104] Char 꽂: 0.00026676056638603454 % +[1105] Char 찧: 0.00026676056638603454 % +[1106] Char 녕: 0.00026676056638603454 % +[1107] Char ㄱ: 0.00026676056638603454 % +[1108] Char ㄴ: 0.00026676056638603454 % +[1109] Char ㄷ: 0.00026676056638603454 % +[1110] Char ㄹ: 0.00026676056638603454 % +[1111] Char ㅁ: 0.00026676056638603454 % +[1112] Char ㅂ: 0.00026676056638603454 % +[1113] Char ㅅ: 0.00026676056638603454 % +[1114] Char ㅇ: 0.00026676056638603454 % +[1115] Char ㅈ: 0.00026676056638603454 % +[1116] Char ㅊ: 0.00026676056638603454 % +[1117] Char ㅋ: 0.00026676056638603454 % +[1118] Char ㅌ: 0.00026676056638603454 % +[1119] Char ㅍ: 0.00026676056638603454 % +[1120] Char ㅎ: 0.00026676056638603454 % +[1121] Char 둠: 0.00026676056638603454 % +[1122] Char 싫: 0.00026676056638603454 % +[1123] Char 넨: 0.00026676056638603454 % +[1124] Char 뚱: 0.00026676056638603454 % +[1125] Char 뾰: 0.00026676056638603454 % +[1126] Char 랠: 0.00026676056638603454 % +[1127] Char 짊: 0.00026676056638603454 % +[1128] Char 벰: 0.00026676056638603454 % +[1129] Char 뜰: 0.00026676056638603454 % +[1130] Char 팠: 0.00026676056638603454 % +[1131] Char 눔: 0.00026676056638603454 % +[1132] Char 넴: 0.00026676056638603454 % +[1133] Char 뻬: 0.00026676056638603454 % +[1134] Char 켑: 0.00026676056638603454 % +[1135] Char 꺽: 0.00026676056638603454 % +[1136] Char 퍽: 0.00026676056638603454 % +[1137] Char 뉠: 0.00026676056638603454 % +[1138] Char 웬: 0.00026676056638603454 % +[1139] Char 쾰: 0.00026676056638603454 % +[1140] Char 엠: 0.00026676056638603454 % +[1141] Char 큉: 0.00026676056638603454 % +[1142] Char 궐: 0.00026676056638603454 % +[1143] Char 뮈: 0.00026676056638603454 % +[1144] Char 찔: 0.00026676056638603454 % +[1145] Char 촐: 0.00026676056638603454 % +[1146] Char 륀: 0.00026676056638603454 % +[1147] Char 휩: 0.00026676056638603454 % +[1148] Char 꾀: 0.00026676056638603454 % +[1149] Char 퓰: 0.00026676056638603454 % +[1150] Char 숄: 0.00026676056638603454 % +[1151] Char 샴: 0.00026676056638603454 % +[1152] Char 푼: 0.00026676056638603454 % +[1153] Char 펑: 0.00026676056638603454 % +[1154] Char 뵘: 0.00026676056638603454 % +[1155] Char 셴: 0.00026676056638603454 % +[1156] Char 웁: 0.00026676056638603454 % +[1157] Char 귄: 0.00026676056638603454 % +[1158] Char 뵐: 0.00026676056638603454 % +[1159] Char 펭: 0.00026676056638603454 % +[1160] Char 챔: 0.00026676056638603454 % +[1161] Char 잿: 0.00026676056638603454 % +[1162] Char 뽐: 0.00026676056638603454 % +[1163] Char 뜩: 0.00026676056638603454 % +[1164] Char 깬: 0.00026676056638603454 % +[1165] Char 팁: 0.00026676056638603454 % +[1166] Char 릇: 0.00026676056638603454 % +[1167] Char 뺐: 0.00026676056638603454 % +[1168] Char 샷: 0.00026676056638603454 % +[1169] Char 멋: 0.00026676056638603454 % +[1170] Char 넸: 0.00026676056638603454 % +[1171] Char 륌: 0.00026676056638603454 % +[1172] Char 릅: 0.00026676056638603454 % +[1173] Char 셌: 0.00026676056638603454 % +[1174] Char 꾐: 0.00026676056638603454 % +[1175] Char 벅: 0.00026676056638603454 % +[1176] Char 볜: 0.00026676056638603454 % +[1177] Char 빴: 0.00026676056638603454 % +[1178] Char 뤽: 0.00026676056638603454 % +[1179] Char 흼: 0.00026676056638603454 % +[1180] Char 갭: 0.00026676056638603454 % +[1181] Char 낄: 0.00026676056638603454 % +[1182] Char 썬: 0.00026676056638603454 % +[1183] Char 윽: 0.00026676056638603454 % +[1184] Char 챘: 0.00026676056638603454 % +[1185] Char 맴: 0.00026676056638603454 % +[1186] Char 쭝: 0.00026676056638603454 % +[1187] Char 훅: 0.00026676056638603454 % +[1188] Char 댄: 0.00026676056638603454 % +[1189] Char 욘: 0.00026676056638603454 % +[1190] Char 숯: 0.00026676056638603454 % +[1191] Char 렁: 0.00026676056638603454 % +[1192] Char 댁: 0.00026676056638603454 % +[1193] Char 쌌: 0.00026676056638603454 % +[1194] Char 팥: 0.00026676056638603454 % +[1195] Char 쩍: 0.00026676056638603454 % +[1196] Char 텼: 0.00026676056638603454 % +[1197] Char 젭: 0.00026676056638603454 % +[1198] Char 얘: 0.00026676056638603454 % +[1199] Char 텅: 0.00026676056638603454 % +[1200] Char 땄: 0.00026676056638603454 % +[1201] Char 껑: 0.00026676056638603454 % +[1202] Char 쩐: 0.00026676056638603454 % +[1203] Char 밭: 0.00026676056638603454 % +[1204] Char 쾨: 0.00026676056638603454 % +[1205] Char 뻔: 0.00026676056638603454 % +[1206] Char 퀀: 0.00026676056638603454 % +[1207] Char 텃: 0.00026676056638603454 % +[1208] Char 꽁: 0.00026676056638603454 % +[1209] Char 폈: 0.00026676056638603454 % +[1210] Char 똥: 0.00026676056638603454 % +[1211] Char 챙: 0.00026676056638603454 % +[1212] Char 훔: 0.00026676056638603454 % +[1213] Char 쎄: 0.00026676056638603454 % +[1214] Char 잣: 0.00026676056638603454 % +[1215] Char 썩: 0.00026676056638603454 % +[1216] Char 갰: 0.00026676056638603454 % +[1217] Char 쉐: 0.00026676056638603454 % +[1218] Char 멓: 0.00026676056638603454 % +[1219] Char 맬: 0.00026676056638603454 % +[1220] Char 홑: 0.00026676056638603454 % +[1221] Char 늠: 0.00026676056638603454 % +[1222] Char 맷: 0.00026676056638603454 % +[1223] Char 삿: 0.00026676056638603454 % +[1224] Char 윳: 0.00026676056638603454 % +[1225] Char 룀: 0.00026676056638603454 % +[1226] Char 칫: 0.00026676056638603454 % +[1227] Char ㆍ: 0.00026676056638603454 % +[1228] Char 맣: 0.00026676056638603454 % +[1229] Char 낵: 0.00026676056638603454 % +[1230] Char 댐: 0.00026676056638603454 % +[1231] Char 딥: 0.00026676056638603454 % +[1232] Char 갬: 0.00026676056638603454 % +[1233] Char 밍: 0.00026676056638603454 % +[1234] Char 흄: 0.00026676056638603454 % +[1235] Char 벳: 0.00026676056638603454 % +[1236] Char 햐: 0.00026676056638603454 % +[1237] Char 괜: 0.00026676056638603454 % +[1238] Char 찮: 0.00026676056638603454 % +[1239] Char 젱: 0.00026676056638603454 % +[1240] Char 퓸: 0.00026676056638603454 % +[1241] Char 큠: 0.00026676056638603454 % +[1242] Char 뺄: 0.00026676056638603454 % +[1243] Char 헷: 0.00026676056638603454 % +[1244] Char 잦: 0.00026676056638603454 % +[1245] Char 횟: 0.00026676056638603454 % +[1246] Char 푄: 0.00026676056638603454 % +[1247] Char 뭄: 0.00026676056638603454 % +[1248] Char 늪: 0.00026676056638603454 % +[1249] Char 쥬: 0.00026676056638603454 % +[1250] Char 뎠: 0.00026676056638603454 % +[1251] Char 엿: 0.00026676056638603454 % +[1252] Char 웍: 0.00026676056638603454 % +[1253] Char 봇: 0.00026676056638603454 % +[1254] Char 킥: 0.00026676056638603454 % +[1255] Char 캠: 0.00026676056638603454 % +[1256] Char 퓌: 0.00026676056638603454 % +[1257] Char 왈: 0.00026676056638603454 % +[1258] Char 곷: 0.00026676056638603454 % +[1259] Char 켕: 0.00026676056638603454 % +[1260] Char 훼: 0.00026676056638603454 % -The first 574 characters have an accumulated ratio of 0.9900230859580663. +The first 1261 characters have an accumulated ratio of 0.9999999999999901. +The first 0 characters have an accumulated ratio of 0. +All characters whose order is over 454 have an accumulated ratio of 0.030031904563739758. -14099 sequences found. +22333 sequences found. -First 13365 (typical positive ratio): 0.995000852514919 -Next 587 (13952-13365): 0.004003410059676082 -Rest: 0.00099573742540493 +First 21098 (typical positive ratio): 0.995002875169068 +Next 988 (22086-21098): 0.00400093949186453 +Rest: 0.0009961853390674236 -- Processing end: 2021-03-18 22:02:18.933817 +- Processing end: 2022-12-14 17:27:50.280077 diff --git a/script/BuildLangModelLogs/LangLatvianModel.log b/script/BuildLangModelLogs/LangLatvianModel.log index 3fafa6c..08e06b6 100644 --- a/script/BuildLangModelLogs/LangLatvianModel.log +++ b/script/BuildLangModelLogs/LangLatvianModel.log @@ -1,165 +1,259 @@ = Logs of language model for Latvian (lv) = - Generated by BuildLangModel.py -- Started: 2021-03-16 19:26:37.227238 +- Started: 2022-12-14 18:07:58.784895 - Maximum depth: 4 -- Max number of pages: 100 +- Max number of pages: 200 == Parsed pages == -Zigfrīds Anna Meierovics (revision 3325285) -1. Saeima (revision 3366185) -1. Saeimas deputāti (revision 3368139) -1. Saeimas frakcijas (revision 3366184) -1. Saeimas vēlēšanas (revision 3330484) -1887. gads (revision 2773799) -1919. gada Parīzes miera konference (revision 3359347) -1920 (revision 3362733) -1921 (revision 3340387) -1922 (revision 3337740) -1923 (revision 3347028) -1924 (revision 3347028) -1925 (revision 3347028) -22. augusts (revision 3327223) -31. jūlijs (revision 3347080) -5. februāris (revision 3364814) -Agrārā reforma Latvijā (revision 3328548) -Agudas Izrael (Latvija) (revision 3285729) -Aigars Kalvītis (revision 3169702) -Alberts Kviesis (revision 3379738) -Aleksandrs Bočagovs (revision 3004343) -Aleksandrs Dauge (revision 3062538) -Aleksandrs Jaunbērzs (revision 3373734) -Aleksandrs Kerenskis (revision 2758772) -Aleksandrs Millerāns (revision 3108576) -Aleksandrs Neibergs (deputāts) (revision 3349399) -Alfrēds Birznieks (revision 3300916) -Alfrēds Jēkabs Bērziņš (revision 3351998) -Alfrēds Riekstiņš (politiķis) (revision 3034089) -Amerikas Savienotās Valstis (revision 3355214) -Andrejs Bērziņš (politiķis) (revision 3089135) -Andrejs Kurcijs (revision 3223696) -Andrejs Petrevics (revision 2460269) -Andrejs Sīmanis (revision 3210302) -Andrejs Veckalns (revision 3237365) -Andrievs Niedra (revision 3374557) -Andris Bērziņš (politiķis, 1951) (revision 3231604) -Andris Šķēle (revision 3379347) -Angļu valoda (revision 3303218) -Ansis Buševics (revision 2927384) -Ansis Rudevics (revision 2700953) -Antante (revision 3373256) -Antons Dzenis (revision 2564295) -Antons Laizāns (revision 3360427) -Antons Rubins (1885) (revision 3351508) -Antons Velkme (revision 3279136) -Ants Pīps (revision 3375003) -Apollo (portāls) (revision 3232284) -Apolonija Laurinoviča (revision 3209013) -Aprīļa pučs (revision 3010427) -Apvienotā Karaliste (revision 3382180) -Aristīds Briāns (revision 2767296) -Arons Nuroks (revision 3062127) -Arturs Alberings (revision 3325257) -Arturs Krišjānis Kariņš (revision 3381504) -Arturs Ozols (inženieris) (revision 3352707) -Artūrs Balfūrs (revision 3177309) -Artūrs Reisners (revision 3300906) -Artūrs Vīgants (revision 3296217) -Artūrs Žers (revision 3296461) -Arveds Bergs (revision 3238379) -Arveds Švābe (revision 3340584) -Arvīds Kalniņš (ķīmiķis) (revision 3382254) -Aspazija (revision 3382469) -Augusts Briedis (revision 3163311) -Augusts Kalniņš (revision 3310251) -Augusts Kirhenšteins (revision 3302758) -Austroungārija (revision 3376635) -Autoritatīvā vadība (revision 2385793) -Balfūra nota (revision 3224093) -Baltijas Antante (revision 3236261) -Baltijas pārkrievošana (revision 3311586) -Bermontiāde (revision 3156269) -Bernards Kublinskis (revision 2441386) -Berta Vesmane (revision 3299697) -Bezpartijiskais nacionālais centrs (revision 3286113) -Beļģija (revision 3308106) -Brestļitovskas miera līgums (revision 3348377) -Brizules muiža (revision 3103947) -Bruno Kalniņš (revision 3297011) -Brīvības piemineklis (revision 3343774) -Bulduru konference (revision 3122422) -Bunds (revision 3368404) -Ceire-Cion (revision 3285715) -Celmiņa 1. Ministru kabinets (revision 2925529) -Delfi (portāls) (revision 3363824) -Demokrātiskais Centrs (revision 3286115) -Demokrātu savienība (revision 3339759) -Diena (laikraksts) (revision 3343800) -Donats Bicāns (revision 3311441) -Dubulti (Jūrmala) (revision 3349180) -Durbe (revision 3380441) -Dāvids Komisārs (revision 3082713) -Džovanni Džoliti (revision 3165202) -Ebreji (revision 3340750) -Ebreju bloks (revision 3285659) -Ebreju nacionāldemokrātu partija (revision 3368172) -Eduards Grantskalns (revision 2932497) +Zigfrīds Anna Meierovics (revision 3686782) +Francis Trasuns (revision 3731532) +Vācbaltu progresīvā partija (revision 2783454) +31. jūlijs (revision 3718305) +Ģermāņu valodas (revision 3667654) +Delfi (portāls) (revision 3738053) +Pauls Kalniņš (politiķis) (revision 3646687) +Latvijas Ministru prezidents (revision 3646904) +Īzaks Berss (revision 3548498) +Kārlis Benze (revision 3674238) +Ženēva (revision 3457596) +Latvijas okupācija (1940) (revision 3715053) +Francis Kemps (revision 3715202) +Kārlis Karlsons (revision 3549381) +Ebreju nacionāldemokrātu partija (revision 3456556) +Apollo (portāls) (revision 3608086) +Ferdināns de Sosīrs (revision 3584492) +Latvija (revision 3741789) +Augškurzeme (revision 3697087) +Jēkabs Jansons (revision 3675095) +Februāris (revision 3737039) +Anrī Frederiks Amjels (revision 3584507) +1905. gada revolūcija Latvijā (revision 3708101) +Vācbaltu reformu partija (revision 2783453) +Nacionālā apvienība (pirmskara) (revision 3731258) +Latgaliešu valoda (revision 3735589) +Brazīlija (revision 3697807) +Arveds Švābe (revision 3682767) +Voldemārs Bastjānis (revision 3716209) +Vēsture (revision 3693386) +Rietumģermāņu valodas (revision 2720009) +Ernests Morics (revision 3674395) +Eduards Tomass (revision 3675137) +Pāvels Meļņikovs (būvuzņēmējs) (revision 3675116) +1928 (revision 3668599) +Bīskaps (revision 3619441) +Džastins Vilsons (revision 3575051) +Berta Vesmane (revision 3675315) +Kārlis Ulmanis (revision 3724575) +Voldemārs Salnais (revision 3560689) +Latvijas Republikas kultūras ministru uzskaitījums (revision 3682627) +Latvijas Valsts prezidents (revision 3736519) +Latvijas Pagaidu valdība (revision 3608584) +Latviešu zemnieku savienība (revision 3561832) +Vācbaltu demokrātu partija (revision 2939545) +Krišs Ķūķis (revision 3366239) +Latvijas Tautas padome (revision 3673890) +Miķelis Bružis (revision 3675043) +Sarkanā armija (revision 3713980) +Eduards Grantskalns (revision 3664250) +Andrejs Vaivods (revision 3674911) +12. Saeimas vēlēšanas (revision 3663152) +Vācbaltieši (revision 3730519) +Valodu saime (revision 3515480) +Livonijas Konfederācija (revision 3734854) +Fēlikss Cielēns (revision 3742722) +Liberālisms (revision 3401436) +Laimdota Straujuma (revision 3676634) +Alberts Erniņš (revision 3674863) +Nāciju Līga (revision 3435512) +1875. gads (revision 3705074) +Jānis Rubulis (revision 3674501) +Ernests Bauers (revision 3560912) +Centrālā vēlēšanu komisija (revision 3035506) +Kristaps Eliass (revision 3674320) +Agudas Izrael (Latvija) (revision 3684383) +Marģers Skujenieks (revision 3560686) +Windows (revision 3689779) +Arveds Bergs (revision 3561885) +Saeima (revision 3730798) +Maskava (revision 3734444) +10. oktobris (revision 3683381) +Neatkarīgā Rīta Avīze (revision 3658183) +Latvijas Republikas Ministru prezidenta biedru uzskaitījums (revision 3682725) +Vikikrātuve (revision 3678520) +Diena (laikraksts) (revision 3720910) +Latviešu valoda (revision 3737628) +Wayback Machine (revision 3651273) +Vācbaltu partiju padome (revision 3286112) +Emisārs (revision 3133029) +Aleksandrs Neibergs (deputāts) (revision 3675118) +Autoritatīvā vadība (revision 3701100) +Mordehajs Dubins (revision 3674318) +26. augusts (revision 3682982) +Padomju Savienība (revision 3739451) +Bernards Kublinskis (revision 3674292) +Indriķis Segliņš (revision 3675131) +Nīderlandiešu valoda (revision 3667656) +Latvijas okupācija (1941—1945) (revision 3715054) +Juris Naķelis (revision 3675117) +Pāvils Laizāns (revision 3675108) +Ministru kabinets (revision 3743490) +Elī Dikomēns (revision 3584749) +Jidišs (revision 3684570) +Skotu valoda (revision 3219822) +Saeimas priekšsēdētājs (revision 3716447) +Eiropa (revision 3634406) +Idumeja (revision 3722415) +Norvēģija (revision 3715078) +Ernests Felsbergs (revision 3698972) +1556. gads (revision 2908632) +Ģermāņi (revision 3646784) +Pirmais Latgales latviešu kongress (revision 3739120) +Vilhelms Šreiners (revision 3561925) +Ulmaņa apvērsums (revision 3703319) +Latvijas Republikas Satversme (revision 3736529) +Ženēvas konvencijas (revision 3462803) +21. oktobris (revision 3710603) +Starptautiskā Sarkanā Krusta komiteja (revision 3496430) +Fricis Kociņš (revision 3703749) +Latvijas Banka (revision 3640892) +MicroLink (revision 3717740) +Latvijas Nacionālā bibliotēka (revision 3705556) +18. jūlijs (revision 3707951) +31. augusts (revision 3653712) +Konstantīns Mateuss (revision 3421386) +Jānis Beļinskis (revision 3610526) +Valodniecība (revision 3606074) +Oto Nonācs (revision 3560641) +28. janvāris (revision 3657218) +26. jūlijs (revision 3670740) +Krišs Birznieks (revision 3675042) +Zviedru valoda (revision 3695436) +Afrikandu valoda (revision 3306666) +Kauņa (revision 3660641) +Viktors Barkāns (revision 3674296) +1. maijs (revision 3662020) +Vācu valoda (revision 3689286) +Kongresa bibliotēka (revision 3694816) +14. februāris (revision 3649962) +Polijas kampaņa (revision 3666922) +Jānis Kalējs (revision 3674343) +Kārlis Pauļuks (revision 3610582) +29. janvāris (revision 3656956) +Simons Vitenbergs (revision 3674913) +Pēteris Klūge (revision 3675104) +Kultūra (revision 3682184) +Latgales Kristīgo zemnieku savienība (revision 3705204) +Mehiko (revision 3453294) +Lejasreina (revision 3714713) +Jānis Čakste (revision 3738021) +Municipalitāte (revision 3585876) +Tēvzemei un Brīvībai/LNNK (revision 3707655) +Dzintars Jaundžeikars (revision 3682703) +Senprūšu valoda (revision 3736763) +Latvijas Vācu baltiešu partija (revision 3286112) +Kārlis Bojārs (revision 3308264) +Fricis Menders (revision 3741179) +Holocēna kalendārs (revision 2706026) +Izolēta valoda (revision 2899559) +Ķīļu raksts (revision 3140033) +Valsts himna (revision 3663188) +Jānis Ģībietis (revision 3686690) +Hugo Celmiņš (revision 3619576) +Vilis Gulbis (revision 3560610) +Norvēģu valoda (revision 3521177) +Eglaines pagasts (revision 3670612) +Ernests Gulbis (politiķis) (revision 3674326) +Labās Cerības rags (revision 3540807) +Līvzemes dalīšanas līgums (revision 3611159) +Visma Enterprise (revision 3489752) +Unikods (revision 3361628) +10. septembris (revision 3676364) +1. Saeima (revision 3701704) +11. aprīlis (revision 3600273) +Kārlis Kellers (revision 3729524) +Rumāņu valoda (revision 3038303) +Operācija "Fall Weiss" (revision 3579610) +Latvijas Republikas zvejniecības valsts ministru uzskaitījums (revision 2709120) +Valoda (revision 3620000) +Būkmols (revision 3514273) +22. septembris (revision 3635215) +Jānis Goliass (revision 3675054) +Artūrs Vīgants (revision 3675316) +26. novembris (revision 3733155) +Jauns.lv (revision 3710989) +Žano Trons (revision 3675138) +Sloka (Jūrmala) (revision 3727178) +Nauris Puntulis (revision 3743563) +Palmasa (revision 3475815) +26. septembris (revision 3693368) +Latgales atbrīvošana (revision 3580703) +Jānis Mežaraups (revision 3674896) +Ceire-Cion (revision 3668866) +Roberts Bīlmanis (revision 3674311) +1985. gads (revision 3697425) +Straujumas 1. Ministru kabinets (revision 3610693) +Staņislavs Ivbuls (revision 3560935) +5. maijs (revision 3737082) == End of Parsed pages == -- Wikipedia parsing ended at: 2021-03-16 19:30:28.292124 +- Wikipedia parsing ended at: 2022-12-14 18:11:03.757816 -55 characters appeared 437791 times. +67 characters appeared 811282 times. -First 40 characters: -[ 0] Char a: 11.993622527644469 % -[ 1] Char i: 9.41179695334075 % -[ 2] Char s: 8.204599911830075 % -[ 3] Char e: 6.371761868106014 % -[ 4] Char t: 5.8011699646635035 % -[ 5] Char r: 5.772845947038655 % -[ 6] Char u: 4.945053690002764 % -[ 7] Char n: 4.437505567725239 % -[ 8] Char ā: 4.014015820334361 % -[ 9] Char l: 3.6974263975275874 % -[10] Char o: 3.597150238355745 % -[11] Char k: 3.5347917156816835 % -[12] Char m: 3.307971155185922 % -[13] Char d: 3.2337348186691823 % -[14] Char v: 2.977904982057648 % -[15] Char j: 2.8618678775945603 % -[16] Char p: 2.8296607285211435 % -[17] Char b: 2.040242946976982 % -[18] Char ī: 1.874638811670409 % -[19] Char g: 1.6240626234892905 % -[20] Char z: 1.5235580448204737 % -[21] Char ē: 1.5109949724868716 % -[22] Char c: 1.216105401892684 % -[23] Char š: 0.9225863482803439 % -[24] Char ņ: 0.45478321847639624 % -[25] Char f: 0.42691603984549703 % -[26] Char ļ: 0.3277819781585277 % -[27] Char ū: 0.29420431210326387 % -[28] Char h: 0.18616189003428577 % -[29] Char ž: 0.1815935000947941 % -[30] Char ķ: 0.126772820820894 % -[31] Char ģ: 0.11649394345703772 % -[32] Char č: 0.08382995538967224 % -[33] Char y: 0.029466115109721306 % -[34] Char w: 0.029466115109721306 % -[35] Char x: 0.012334652836627522 % -[36] Char é: 0.0050252289334408425 % -[37] Char ö: 0.0034262924546187568 % -[38] Char ü: 0.0027410339636950052 % -[39] Char q: 0.0025126144667204212 % +Most Frequent characters: +[ 0] Char a: 11.964273828335893 % +[ 1] Char i: 9.388474044783441 % +[ 2] Char s: 8.539447442442949 % +[ 3] Char e: 6.090237426690103 % +[ 4] Char t: 5.952430844022177 % +[ 5] Char r: 5.463081887679007 % +[ 6] Char u: 4.896324582574247 % +[ 7] Char n: 4.3900887730776725 % +[ 8] Char ā: 3.9818460165515814 % +[ 9] Char l: 3.8376298253874728 % +[10] Char o: 3.7158472639600038 % +[11] Char k: 3.6045419471897566 % +[12] Char d: 3.30513927339692 % +[13] Char m: 3.237591860783304 % +[14] Char v: 3.019911695316795 % +[15] Char p: 2.873353531817543 % +[16] Char j: 2.6969660364706725 % +[17] Char b: 1.8278477767286836 % +[18] Char ī: 1.7969090895644177 % +[19] Char g: 1.722828806752769 % +[20] Char z: 1.5090930157454499 % +[21] Char ē: 1.4939318264179409 % +[22] Char c: 1.2932617758066862 % +[23] Char š: 0.9434450659573367 % +[24] Char ņ: 0.42130849692215533 % +[25] Char f: 0.39049307145973905 % +[26] Char ū: 0.32072694821282854 % +[27] Char ļ: 0.31826171417583526 % +[28] Char h: 0.2571239100584014 % +[29] Char ž: 0.17121050386918482 % +[30] Char ģ: 0.14717447200850012 % +[31] Char ķ: 0.1363274422457296 % +[32] Char w: 0.09417194021314414 % +[33] Char č: 0.09059735085950385 % +[34] Char y: 0.048318587125068715 % +[35] Char x: 0.01787294676820144 % +[36] Char æ: 0.00628634679433292 % +[37] Char é: 0.004683944670287274 % +[38] Char ä: 0.0039443744591892834 % +[39] Char ü: 0.003697851055489953 % +[40] Char q: 0.0030815425462416275 % -The first 40 characters have an accumulated ratio of 0.9998857902515126. +The first 41 characters have an accumulated ratio of 0.9997978508089669. +The first 4 characters have an accumulated ratio of 0.3598243274225239. +All characters whose order is over 22 have an accumulated ratio of 0.0337872650940117. -982 sequences found. +1210 sequences found. -First 512 (typical positive ratio): 0.9904642991017133 -Next 512 (512-1024): 0.001815935000947941 -Rest: -5.377642775528102e-17 +First 612 (typical positive ratio): 0.9950080943923969 +Next 215 (827-612): 0.003994106293262911 +Rest: 0.0009977993143401864 -- Processing end: 2021-03-16 19:30:28.395006 +- Processing end: 2022-12-14 18:11:03.885482 diff --git a/script/BuildLangModelLogs/LangLithuanianModel.log b/script/BuildLangModelLogs/LangLithuanianModel.log index 5db032a..54acedf 100644 --- a/script/BuildLangModelLogs/LangLithuanianModel.log +++ b/script/BuildLangModelLogs/LangLithuanianModel.log @@ -1,166 +1,231 @@ = Logs of language model for Lithuanian (lt) = - Generated by BuildLangModel.py -- Started: 2021-03-16 19:23:31.104161 +- Started: 2022-12-14 18:27:47.903206 - Maximum depth: 4 -- Max number of pages: 100 +- Max number of pages: 200 == Parsed pages == -Karūna (laivas) (revision 5105933) -1650 (revision 5301814) -1654 (revision 5301823) -1664 (revision 5301833) -1665 (revision 5301834) -1668 (revision 5301872) -1669 (revision 5301873) -1672 (revision 5301876) -1676 (revision 5801857) -1718 (revision 5301969) -1909 (revision 6129929) -1928 (revision 6176161) -1932 (revision 6195207) -1956 (revision 6150066) -1980 (revision 6190258) -Baltijos jūra (revision 6193053) -Burinis laivas (revision 6040752) -Flagmanas (laivas) (revision 5987584) -Grimzlė (revision 5989647) -Kalmaras (Švedija) (revision 5604914) -Karo laivas (revision 5994228) -Karolis XI (revision 5480144) -Karolis XII (revision 5880104) -Kilis (revision 5995782) -Koordinačių sistema (revision 6044079) -Laivo vėliava (revision 6208955) -Liepos 1 d. (revision 5779083) -Nyderlandai (revision 6196943) -Olando mūšis (revision 6020430) -Rugpjūčio 10 (revision 5793253) -Varytuvas (revision 6020287) -Vaza (laivas) (revision 6203069) -XVIII a. (revision 6031323) -XVII a. (revision 6025004) -Švedija (revision 6205204) -Švedų kalba (revision 5560532) -1590 (revision 5801846) -1596 (revision 5552466) -1608 (revision 5637570) -1610 (revision 5301721) -1647 m. (revision 5301819) -1648 m. (revision 5301818) -1649 m. (revision 5301820) -1651 m. (revision 5301821) -1652 m. (revision 5301836) -1653 m. (revision 5301822) -1702 (revision 5301912) -1704 (revision 5301925) -1722 (revision 5301973) -1723 (revision 5301974) -1737 (revision 5302020) -2 tūkstantmetis (revision 5976362) -ATR (revision 6212255) -Abiejų Tautų Respublika (revision 6212255) -Adomas Freitagas (revision 6152308) -Armėnų kalendorius (revision 5965695) -Bahajų kalendorius (revision 6168286) -Bajorai (revision 6040220) -Berberų kalendorius (revision 4926904) -Birželio 21 (revision 6172033) -Bizantijos kalendorius (revision 5300569) -Budistų kalendorius (revision 5979182) -Dešimtmetis (revision 5982040) -Dominikonai (revision 6068818) -Dominikonų ordinas (revision 6068818) -Emanuelis Vladislovas Tiškevičius Logoiskis (revision 5761120) -Filosofas (revision 5836448) -Gegužės 26 (revision 6075204) -Grafas (titulas) (revision 5832187) -Grigaliaus kalendorius (revision 5989624) -Hebrajų kalendorius (revision 5990271) -Iraniečių kalendorius (revision 4964854) +Karūna (laivas) (revision 6615331) +1654 (revision 6421668) +Karolis XII (revision 6615329) +Švedų kalba (revision 6700452) +1980 (revision 6705517) +XVIII a. (revision 6734712) +1932 (revision 6809297) +Grimzlė (revision 6761216) +Flagmanas (laivas) (revision 6759598) +Varytuvas (revision 6786126) +Koordinačių sistema (revision 6767173) +Linksniavimas (revision 6231848) +1738 (revision 5302021) +1764 (revision 6243848) +XVII amžiaus 6-as dešimtmetis (revision 6787652) +Prancūzijos vyrų futbolo rinktinė (revision 6809656) +Rašytoja (revision 6778742) +Lietuvos Didžioji Kunigaikštystė (revision 6754017) +Kovo 18 (revision 6466532) +Ašis (revision 6792160) +N (revision 6772931) +Rugsėjo 23 (revision 6421658) +O (revision 6774214) +Spalio 24 (revision 6732189) +Liepos 21 (revision 6641384) +Škotų kalba (revision 5497571) +1765 m. (revision 5659471) +Nariuotakojai (revision 6773159) +Koja (revision 6766763) +Sausio 13 (revision 6695608) +Visuotinė lietuvių enciklopedija (revision 6600066) +1732 (revision 6092017) +1951 (revision 6653634) +Martas Kristianas Kalninš (revision 6771010) +1798 (revision 5349734) +Banginiai (revision 6804052) +Musulmonų kalendorius (revision 4705912) +Rafael Paasio (revision 4317704) Japonų kalendorius (revision 6082601) -John Churchill (revision 5350480) -Jurgis Kasakauskis (revision 5047829) -Jurgis Kazimieras Ancuta (revision 5059404) -Jurgis Mikalojus Tiškevičius (revision 5481136) -Kalijugos kalendorius (revision 5741238) -Kazimieras Tiškevičius Logoiskis (revision 5481143) -Kinų kalendorius (revision 5995873) -Koptų kalendorius (revision 5996919) -Korėjiečių kalendorius (revision 5996955) -LDK (revision 6130316) -Lapkričio 14 (revision 5943612) -Lelija (herbas) (revision 5999126) -Lietuvių kalba (revision 6201110) -Lietuvos Didžioji Kunigaikštystė (revision 6130316) +Vabzdžiai (revision 6785567) +Žuvys (revision 6505294) +Lelija (herbas) (revision 6769096) +I (revision 6762504) +Spermatozoidas (revision 6040464) +1721 (revision 5301972) +Sraigtasparnis (revision 6782175) +Lietuva (revision 6754800) +1806 (revision 6647553) +Latvija (revision 6711819) +Rugsėjo 5 (revision 6635721) +1768 m. (revision 6587295) +Senegalo vyrų futbolo rinktinė (revision 6805100) +Aleksejus Kosyginas (revision 6756761) +Lapkričio 27 (revision 6728142) +Alemanų tarmės (revision 5457500) +1993 m. Nobelio premijos laureatai (revision 6354142) +1700 (revision 5301933) +XVIII amžiaus 5-as dešimtmetis (revision 6734712) +Citoskeletas (revision 6794578) +Šikšnosparniai (revision 6215031) +Liepos 5 (revision 6565078) +Lapkričio 30 (revision 6801263) +Birželio 4 (revision 6595710) +Friedrich Rudolph Ludwig von Canitz (revision 4321334) +Aleksandras Birzeniekas (revision 6693172) +U (revision 6785091) +Lėktuvas (revision 6530878) +Povilas Karoblis (revision 6412249) +Rytų fryzų kalba (revision 6038842) +Bernadotai (revision 5581811) +Maršalas (revision 5845195) +Petras Kojelavičius-Vijūkas (revision 4953029) +Bavarų tarmė (revision 6040251) +Paukščiai (revision 6681188) +1730 (revision 6617419) +Antanas Kmieliauskas (revision 6718977) +Jonušas Radvila (revision 6568066) +Alfonsas Švirinas (revision 6490775) +Dešimtmečiai (revision 6795204) +Šimtmetis (revision 6788782) +Kraginiai paukščiai (revision 6493913) +1986 m. Afrikos Tautų taurė (revision 6469228) +Bahajų kalendorius (revision 6367987) +Vokietija (revision 6783295) +Mokslinė klasifikacija (revision 6757632) +Pavasario lygiadienis (revision 6478812) +Rugsėjo 2 (revision 6468442) +Tomas Mikuckis (revision 5755119) +Šiaurės Europa (revision 6729862) +Ketvirtadienis (revision 6470677) +Prancūzų revoliucinis kalendorius (revision 6777664) +Septynerių metų karas (revision 6635433) +Q (revision 6206776) +1931 (revision 6725995) +Rugsėjo 12 (revision 6803815) +XIX amžiaus 2-as dešimtmetis (revision 6787644) Metai (revision 5765072) -Mianmaro kalendorius (revision 5979182) -Mokslų daktaras (revision 6172930) -Motiejus Juozapas Ancuta (revision 4951716) -Musulmonų kalendorius (revision 4705912) -Nekeliamieji metai, prasidedantys šeštadienį (revision 6004293) -Profesorius (revision 6009297) -René Descartes (revision 6201538) -Saka kalendorius (revision 6109866) -Senovės indų kalendoriai (revision 6012785) -Spauda (revision 5345510) -Stanislovas Kristupas Naruševičius (revision 5481106) +Balaenoptera omurai (revision 6792336) +Albanija (revision 6652819) +Kompozitorius (revision 6812678) +1970 (revision 6425934) +Gruzijos vyrų futbolo rinktinė (revision 6475869) +Birželio 9 (revision 6638724) +Kojalavičiai (revision 5364216) +Reformacija (revision 6533113) +1632 (revision 6617414) +Jonas Manvydas (revision 5794616) +Maldininkai (revision 6271396) +Socialistinis realizmas (revision 6796870) +1965 m. Nobelio premijos laureatai (revision 6354078) +Rugsėjo 9 (revision 6637201) +1992 (revision 6626055) +Kanopa (revision 5524436) +1644 (revision 6189034) +TSKP CK (revision 6796934) +Žvaigždė (revision 6704045) +Šiluvos Švč. Mergelės Marijos Apsireiškimo koplyčia (revision 6443620) +Lietuviai (revision 6737004) +Romėniški skaičiai (revision 6779292) +1840 (revision 5472123) +Vilniaus laikinoji archeologinė komisija (revision 6786822) +1637 (revision 5477037) +Hebrajų kalendorius (revision 6761858) +Gynėjas (revision 6761523) +Šalmas (heraldika) (revision 6788373) +Vida Mildažienė (revision 6349656) +1874 (revision 5302295) +Vyrų lytiniai organai (revision 6787447) +Drevė (revision 6795787) +Elgesys (revision 6758606) +JTO (revision 6810729) +Transportinis lėktuvas (revision 6490266) +Rusijos imperija (revision 6810083) +Kontinentinė armija (revision 6767142) +Architektas (revision 6791468) +SARS (revision 6703666) +Lietuvių abėcėlė (revision 6749474) +Nobelio chemijos premija (revision 6707824) +1927 m. Nobelio premijos laureatai (revision 6354031) +Danutė Mertingaitė-Gajauskienė (revision 4955551) +Koptų kalendorius (revision 6767212) +Lapkričio 12 (revision 6675165) +EC-135 (revision 6440159) +Indija (revision 6644424) +Turkmėnų kalendorius (revision 6202388) +Jyrki Katainen (revision 6345717) +Peizažas (revision 6724226) +OCLC (revision 6490131) +1998 (revision 6490434) +Šveicarija (revision 6806915) +Lietuvos Metrika (revision 6769406) +1995 m. Nobelio premijos laureatai (revision 6354148) +Stanislovas Augustas Poniatovskis (revision 6782263) +Sausio 10 (revision 6572860) +Adolfas Večerskis (revision 6614936) +Armėnų kalendorius (revision 6791569) +1994 m. Afrikos Tautų taurė (revision 6473026) +1833 (revision 5302208) +Iraniečių kalendorius (revision 4964854) +Vardininkas (revision 6636841) +TSRS Liaudies komisarų taryba (revision 6209376) +Absoliutizmas (revision 6586253) +Keliamieji metai (revision 6766000) +1974 (revision 6342499) +XVII amžius (revision 6787652) +Buran (revision 6651693) == End of Parsed pages == -- Wikipedia parsing ended at: 2021-03-16 19:26:36.949228 +- Wikipedia parsing ended at: 2022-12-14 18:30:25.794645 -68 characters appeared 398895 times. +68 characters appeared 763346 times. -First 40 characters: -[ 0] Char i: 13.296732222765389 % -[ 1] Char a: 11.103673899146392 % -[ 2] Char s: 8.654407801551786 % -[ 3] Char o: 6.708030935459205 % -[ 4] Char e: 5.518244149462891 % -[ 5] Char r: 5.427493450657441 % -[ 6] Char t: 5.170533599067424 % -[ 7] Char n: 5.082039133105203 % -[ 8] Char u: 4.293109715589315 % -[ 9] Char k: 4.091302222389351 % -[10] Char l: 3.876208024668146 % -[11] Char m: 3.384349264844132 % -[12] Char d: 3.0411511801351234 % -[13] Char v: 2.8220459018037327 % -[14] Char j: 2.286817332882087 % -[15] Char p: 2.243196831246318 % -[16] Char g: 1.902756364456812 % -[17] Char ė: 1.5700873663495405 % -[18] Char b: 1.55980897228594 % -[19] Char y: 1.2637410847466124 % -[20] Char ų: 1.1800097770089872 % -[21] Char š: 0.9924917584828087 % -[22] Char ž: 0.8423269281389839 % -[23] Char c: 0.557289512277667 % -[24] Char č: 0.49461637774351647 % -[25] Char f: 0.40336429386179323 % -[26] Char ū: 0.3863172012685043 % -[27] Char ą: 0.36901941613707867 % -[28] Char z: 0.362501410145527 % -[29] Char h: 0.3604958698404342 % -[30] Char į: 0.3070983592173379 % -[31] Char ę: 0.15618145125910327 % -[32] Char x: 0.09777008987327492 % -[33] Char w: 0.05715789869514534 % -[34] Char ó: 0.027325486656889657 % -[35] Char á: 0.014289474673786336 % -[36] Char é: 0.011531856754283708 % -[37] Char ã: 0.011030471678010504 % -[38] Char ö: 0.008523546296644481 % -[39] Char q: 0.007270083605961468 % +Most Frequent characters: +[ 0] Char i: 13.166506407317257 % +[ 1] Char a: 11.019642468814927 % +[ 2] Char s: 8.502042324188507 % +[ 3] Char o: 6.722901541371802 % +[ 4] Char r: 5.638072381331663 % +[ 5] Char e: 5.563008124756008 % +[ 6] Char t: 5.2458518155593925 % +[ 7] Char n: 5.187555839684756 % +[ 8] Char u: 4.417786953753606 % +[ 9] Char k: 4.050718809032864 % +[10] Char l: 3.7625139844841007 % +[11] Char m: 3.423873315639304 % +[12] Char d: 2.7526180788266394 % +[13] Char v: 2.6926190744433063 % +[14] Char p: 2.5196961797140482 % +[15] Char j: 2.274591076654623 % +[16] Char g: 1.8775234297422136 % +[17] Char ė: 1.6207591315078613 % +[18] Char b: 1.4380110723053503 % +[19] Char y: 1.2438658223138654 % +[20] Char ų: 1.2032551424910853 % +[21] Char š: 1.0003327455701607 % +[22] Char ž: 0.7698998881241272 % +[23] Char c: 0.6999447170745637 % +[24] Char č: 0.4843151074348985 % +[25] Char z: 0.42222006796393774 % +[26] Char ą: 0.4184210043676131 % +[27] Char ū: 0.4146219407712885 % +[28] Char f: 0.4020457302455243 % +[29] Char h: 0.3974606534913394 % +[30] Char į: 0.32449243200331174 % +[31] Char ę: 0.15549960306335528 % +[32] Char x: 0.082793385961281 % +[33] Char w: 0.048339809208406144 % +[34] Char é: 0.008253138157532756 % +[35] Char q: 0.007598127192649205 % -The first 40 characters have an accumulated ratio of 0.9994234071622861. +The first 36 characters have an accumulated ratio of 0.9995965132456317. +The first 4 characters have an accumulated ratio of 0.39411092741692494. +All characters whose order is over 23 have an accumulated ratio of 0.03166060999861138. -1138 sequences found. +1175 sequences found. -First 512 (typical positive ratio): 0.9919219576954762 -Next 512 (512-1024): 0.008423269281389839 -Rest: 0.00033781981757727893 +First 557 (typical positive ratio): 0.995037232341861 +Next 229 (786-557): 0.0039665071105200456 +Rest: 0.0009962605476189212 -- Processing end: 2021-03-16 19:26:37.062994 +- Processing end: 2022-12-14 18:30:25.869429 diff --git a/script/BuildLangModelLogs/LangMalteseModel.log b/script/BuildLangModelLogs/LangMalteseModel.log index 76b703b..f7eaeac 100644 --- a/script/BuildLangModelLogs/LangMalteseModel.log +++ b/script/BuildLangModelLogs/LangMalteseModel.log @@ -1,147 +1,158 @@ = Logs of language model for Maltese (mt) = - Generated by BuildLangModel.py -- Started: 2021-03-16 19:30:28.553074 +- Started: 2022-12-14 18:08:52.337231 - Maximum depth: 4 -- Max number of pages: 100 +- Max number of pages: 200 == Parsed pages == -Unjoni Ewropea (revision 255663) -1951 (revision 229183) -1952 (revision 229184) -1957 (revision 229188) -1958 (revision 229189) -1973 (revision 252982) -1979 (revision 252967) -1981 (revision 253774) -1985 (revision 252978) -1986 (revision 252978) -1990 (revision 257440) -1992 (revision 249582) -1995 (revision 252258) -1 ta' Mejju (revision 258193) -2007 (revision 258027) -2013 (revision 248708) -Albanija (revision 261944) -Awstrija (revision 261959) -Awtonomija (revision 262074) -Ażores (revision 255663) -Bank Ċentrali Ewropew (revision 255748) -Belt kapitali (revision 255506) -Belġju (revision 255745) -Brussell (revision 243311) -Bulgarija (revision 261950) -Danimarka (revision 256058) -Dazji doganali (revision 255663) -De facto (revision 215102) -Dħul nazzjonali gross (revision 255663) -Estonja (revision 255711) -European Free Trade Association (revision 255663) -Ewropa (revision 259973) -Ex Repubblika Jugoslava tal-Maċedonja (revision 255663) -Federazzjoni (revision 228364) -Finlandja (revision 258210) -Frankfurt (revision 261246) -Franza (revision 259635) -Greċja (revision 259971) -Groenlandja (revision 250685) -Indja (revision 254565) -Islanda (revision 255630) -Isle of Man (revision 259978) -Istati Membri (revision 255663) -Istitut tal-Unjoni Ewropea għall-Istudji dwar is-Sigurtà (revision 256700) -Italja (revision 254814) -Kilometru kwadru (revision 247665) -Komunitajiet Ewropej (revision 256698) -Komunità Ekonomika Ewropea (revision 255663) -Kroazja (revision 249144) +Unjoni Ewropea (revision 279041) Kummissjoni Ewropea (revision 258115) -Kunsill Ewropew (revision 255754) -Kunsill tal-Ewropa (revision 255754) -Kunsill tal-Unjoni Ewropea (revision 255754) -Latvja (revision 255712) -Lista ta' pajjiżi skont id-daqs (revision 254529) -Lista ta' pajjiżi skont il-popolazzjoni (revision 260622) -Litwanja (revision 259637) -Liġijiet tal-Unjoni Ewropea (revision 255663) -Lussemburgu (revision 253431) -Lussemburgu (belt) (revision 243587) -Madejra (revision 243625) -Malta (revision 261973) -Montenegro (revision 255647) -Norveġja (revision 261168) -Olanda (revision 261407) -Organizzazzjoni Internazzjonali (revision 258039) -Organizzazzjonijiet mhux governattivi (revision 233500) -Pajjiżi l-Baxxi (revision 261407) -Pajjiżi membri tal-Unjoni Ewropea (revision 255663) -Pajjiżi ġirien li jdawru l-Unjoni Ewropea (revision 255663) +2007 (revision 258027) +Repubblika Ċeka (revision 279325) +Renju Unit (revision 282249) +Bulgarija (revision 266495) +Finlandja (revision 282145) +Danimarka (revision 280266) +Transnistrija (revision 266548) +Belġju (revision 276022) +Stati Uniti tal-Amerika (revision 280264) +Rumanija (revision 266525) +Iżlanda (revision 280133) +Turkija (revision 270987) +Praga (revision 281545) +Santa Luċija (revision 281476) +Ażerbajġan (revision 283345) +Lista ta' pajjiżi skont l-erja (revision 260621) +Awstrija (revision 273952) +Lista ta' pajjiżi skont id-densità ta' popolazzjoni (revision 272026) +Repubblika tal-Irlanda (revision 280123) +Georgia (revision 279271) +Ġermanja (revision 279831) +Litwanja (revision 281573) +Estonja (revision 274160) +Albanija (revision 272682) +San Marino (revision 279324) +Italja (revision 277251) +Abkażja (revision 266550) +Bangladexx (revision 281383) +Sri Lanka (revision 281445) +Burundi (revision 262969) +Belt tal-Vatikan (revision 282714) +Stati Uniti (revision 280264) +Spanja (revision 274001) Parlament Ewropew (revision 255748) -Politika agrikola komuni (revision 255745) -Politika reġjonali tal-Unjoni Ewropea (revision 255663) -Polonja (revision 261762) -Portugall (revision 243625) -Qorti tal-Ġustizzja tal-Unjoni Ewropea (revision 255663) -Relazzjonijiet ta' terzi pajjiżi ma l-UE (revision 255663) -Renju Unit (revision 254529) -Repubblika Federali tal-Ġermanja (revision 258687) -Repubblika tal-Irlanda (revision 250619) -Repubblika Ċeka (revision 255669) -Rumanija (revision 261954) -Segretarjat tal-Parlament Ewropew (revision 255663) -Serbja (revision 259975) -Slovakkja (revision 255727) -Slovenja (revision 261963) -Spanja (revision 258290) -Stati membri tal-Unjoni Ewropea (revision 255663) -Strasburgu (revision 243503) +Slovakkja (revision 266528) +Kosovo (revision 277587) +Lesoto (revision 281408) +Serbja (revision 266527) +Russja (revision 266526) +Ġamajka (revision 281389) +Malta (revision 280087) +Uganda (revision 281453) +Bożnija-Ħerzegovina (revision 266494) +Portugall (revision 279398) +Kroazja (revision 279781) +Norveġja (revision 279820) +Ċekja (revision 279325) +Salzburg (stat) (revision 281285) +Belarussja (revision 270102) +Montenegro (revision 279276) +Tuvalu (revision 281452) +El Salvador (revision 253460) +Filippini (revision 266237) +Kamerun (revision 281402) +Repubblika Dominikana (revision 272017) +Tajlandja (revision 279268) +Żvezja (revision 282136) +Belt kapitali (revision 274120) +Kambodja (revision 266273) +Żambja (revision 281455) +Commonwealth tan-Nazzjonijiet (revision 262942) +Lista ta' kodiċi telefoniċi (revision 257699) +Ungerija (revision 268808) +Armenja (revision 278995) +Ċina (revision 266233) +Kittieb (revision 277970) +Repubblika tal-Maċedonja (revision 281602) +Gżejjer Faroe (revision 262423) +Lussemburgu (revision 279759) +Tajwan (revision 279695) +Beliże (revision 281377) +Riga (revision 276342) +Indoneżja (revision 279426) +Żimbabwe (revision 269714) +Netherlands (revision 281883) +Liechtenstein (revision 279758) +Maldive (revision 281413) +Monaco (revision 281097) +Copenhagen (revision 264454) +Baħrejn (revision 279277) +Kanada (revision 281405) +Liżbona (revision 273944) +Żvizzera (revision 268804) +Gżejjer Aran (revision 243256) +Afganistan (revision 277361) +Andorra (revision 278993) +Ukrajna (revision 274996) +Graz (revision 276048) +Iżlam (revision 280916) +Seychelles (revision 281440) +Lingwa uffiċjali (revision 251833) +Nagorno-Karabakh (revision 274274) +Joe Biden (revision 271580) +Repubblika tal-Maċedonja ta' Fuq (revision 281602) +Kuwajt (revision 279782) +Senegal (revision 269704) == End of Parsed pages == -- Wikipedia parsing ended at: 2021-03-16 19:33:28.445834 +- Wikipedia parsing ended at: 2022-12-14 18:10:59.208295 -49 characters appeared 643393 times. +51 characters appeared 410264 times. -First 31 characters: -[ 0] Char i: 12.115145797358691 % -[ 1] Char a: 12.109705887381429 % -[ 2] Char t: 8.033037350421903 % -[ 3] Char l: 7.963095650714261 % -[ 4] Char e: 6.5463876666361 % -[ 5] Char n: 5.990118014961307 % -[ 6] Char r: 5.530834186881113 % -[ 7] Char u: 4.447514971409388 % -[ 8] Char o: 3.9081867536637795 % -[ 9] Char j: 3.7945703481386963 % -[10] Char m: 3.619405246870886 % -[11] Char s: 3.4255890256810377 % -[12] Char k: 2.5824029792055554 % -[13] Char d: 2.3040350143691337 % -[14] Char p: 2.1852895508654897 % -[15] Char b: 2.0524003214209667 % -[16] Char f: 1.9347428399127748 % -[17] Char ħ: 1.6223365812186332 % -[18] Char g: 1.4863388317871036 % -[19] Char w: 1.4324060100125429 % -[20] Char z: 1.3761417982477273 % -[21] Char ż: 0.9421924080616357 % -[22] Char h: 0.9235412881395973 % -[23] Char ġ: 0.7990450626599915 % -[24] Char ċ: 0.6618039052336597 % -[25] Char v: 0.6143989754318122 % -[26] Char x: 0.610357899448704 % -[27] Char q: 0.5511405936962324 % -[28] Char c: 0.24153200299039623 % -[29] Char à: 0.08936994962643362 % -[30] Char y: 0.061082417744675495 % +Most Frequent characters: +[ 0] Char a: 12.611879180235167 % +[ 1] Char i: 11.737807850554764 % +[ 2] Char l: 8.053838503987675 % +[ 3] Char t: 7.739650566464521 % +[ 4] Char e: 6.431712263323129 % +[ 5] Char n: 6.16895462433945 % +[ 6] Char r: 5.748006161886005 % +[ 7] Char u: 4.319413840843945 % +[ 8] Char o: 3.771473977731412 % +[ 9] Char j: 3.663982216328998 % +[10] Char m: 3.6181580640758146 % +[11] Char s: 3.4828793167326406 % +[12] Char k: 2.807460561979603 % +[13] Char d: 2.365793732815943 % +[14] Char b: 2.051605795292787 % +[15] Char p: 2.0045629155860616 % +[16] Char f: 1.9814070939687616 % +[17] Char g: 1.5919017998167033 % +[18] Char ħ: 1.552902521303356 % +[19] Char w: 1.355468673829534 % +[20] Char z: 1.2309147280775306 % +[21] Char h: 1.031287171187333 % +[22] Char ż: 1.005937640153657 % +[23] Char ġ: 0.7556110211961078 % +[24] Char v: 0.6895559932141255 % +[25] Char ċ: 0.663231480217616 % +[26] Char x: 0.5952264883099663 % +[27] Char q: 0.5294152058186924 % +[28] Char c: 0.22058966909112182 % +[29] Char à: 0.07824230251740343 % +[30] Char y: 0.06971121034260866 % -The first 31 characters have an accumulated ratio of 0.9995414933019164. +The first 31 characters have an accumulated ratio of 0.9992858257122245. +The first 3 characters have an accumulated ratio of 0.32403525534777605. +All characters whose order is over 22 have an accumulated ratio of 0.03601583370707642. -888 sequences found. +936 sequences found. -First 512 (typical positive ratio): 0.9960434044151966 -Next 512 (512-1024): 0.009421924080616357 -Rest: 1.5612511283791264e-17 +First 512 (typical positive ratio): 0.9950079702120929 +Next 197 (709-512): 0.003994232243462514 +Rest: 0.000997797544444623 -- Processing end: 2021-03-16 19:33:28.518739 +- Processing end: 2022-12-14 18:10:59.258906 diff --git a/script/BuildLangModelLogs/LangNorwegianModel.log b/script/BuildLangModelLogs/LangNorwegianModel.log index 9c066bf..709b0dd 100644 --- a/script/BuildLangModelLogs/LangNorwegianModel.log +++ b/script/BuildLangModelLogs/LangNorwegianModel.log @@ -1,234 +1,249 @@ = Logs of language model for Norwegian (no) = - Generated by BuildLangModel.py -- Started: 2022-11-30 20:26:27.916571 -- Maximum depth: 2 +- Started: 2022-12-14 18:09:10.899495 +- Maximum depth: 4 - Max number of pages: 200 == Parsed pages == -Norsk (revision 22974717) -Saft (revision 22967608) +Norsk (revision 23146192) +Saft (revision 23143694) Hund (revision 23005187) -Valg i Norge (revision 22782362) -Asia (revision 23117912) -Saarloos wolfhond (revision 22789727) -Østfold (revision 23055508) -Fårehunder (revision 22264555) -Stripesjakal (revision 18745363) -12. mai (revision 23118103) -Gullsjakal (revision 23104601) -Urhund (revision 23050226) -E (revision 22904440) -Luxembourgsk (revision 22813155) -Obstruent (revision 15267134) -Gudbrandsdalen (revision 23014277) -Norges berggrunn (revision 21768509) -Riksforsamlingen (revision 22999081) -Sosiolekt (revision 21458982) -Habitat (revision 23123646) -Norsk språkhistorie (20. århundre) (revision 22891154) -Søsterart (revision 20748512) -Halvdan Koht (revision 22303367) -Plosiver (revision 21816753) -Svorsk (revision 20789512) -Skandinavia (revision 22814296) -Partisipp (revision 22785842) -H (revision 23086416) -Kreft (revision 23050449) -Kreft hos hunder (revision 21811805) -Q (revision 23024714) -Fédération Cynologique Internationale (revision 22172054) -Rosin (revision 22818749) -Tribus (biologi) (revision 21339936) -Siste istids maksimum (revision 23141296) -Laurents Hallager (revision 22655416) -Canider (revision 22229857) -Individ (revision 20992252) +Øyelidelser hos hund (revision 21789138) +Spansk vannhund (revision 22264162) +Valnøttfamilien (revision 20450473) +Det latinske alfabetet (revision 23149062) Stortingsvalg 1945– (revision 22861299) -Svalbards geologi (revision 22935346) -Riksmålsvernet (revision 22966421) -Magedreining (hund) (revision 21661370) -Stortinget (revision 23071662) -Bokmål (revision 22928969) -Recessiv (revision 21780786) -Synkopetida (revision 22906353) -Artskompleks (revision 20848344) -Homogenitet (revision 22857280) -Pyometra (hund) (revision 22374115) -Den norske språkstriden (revision 22428585) -Gruppe (biologi) (revision 21969525) -Stående fuglehunder (revision 22264516) -Samnorsk (revision 22785915) -Fastlands-Norge (revision 23141642) -Drivende hunder (revision 22264618) +Canis (revision 23079627) Sibir (revision 22369404) -Norges demografi (revision 23034159) -FCI (revision 22172054) -Vannhunder (revision 22264145) -Prednisolon (revision 21804718) -Midtvesten (revision 22423559) -Buskerud (revision 22915767) -Sogn og Fjordane (revision 22811825) -Transport i Norge (revision 23131810) -Ustemt palatal frikativ (revision 19011330) -Anatolsk gjeterhund (revision 22303224) -Norges fylker (revision 23129287) -Tonelag (revision 22751959) -Statsforvalter (revision 23133685) -Sjokolade (revision 22988920) +Trang fremre runda vokal (revision 15245735) +Belgia (revision 23167059) +Uvular konsonant (revision 20242704) +V (revision 21759000) +Genetisk variasjon (revision 19292839) +Tysk (revision 23093771) +Palatal (revision 15212009) +Kennelklubb (revision 19859697) +Hjelpehund (revision 21788770) +Norrønt språk (revision 22742886) +Albueleddsartrose (hund) (revision 18567406) +Dogo argentino (revision 22935074) +Dansk (revision 22530698) +Skarre-r (revision 21414582) +Asia (revision 23117912) +Sørlandet (revision 23112799) +Svalbard (revision 23008013) +Bushhund (revision 23071308) +Frikativ (revision 19455562) +Ǩ (revision 15223173) +Retina (revision 22735283) +Rekombinasjon (revision 20582861) +Trud (revision 22334785) +Gane (revision 22759946) +Frisisk (revision 22523019) +Spansk mastiff (revision 22303223) +Tyskland (revision 23168007) +Gruppe (biologi) (revision 21969525) +Stortingsvalget 2009 (revision 23112355) +Naturlig seleksjon (revision 21804010) +Fransk (revision 22935160) +Bulgarsk (revision 22635586) +Ob (revision 21084101) +Engelsk (revision 22974505) +Hundeutstilling (revision 21252737) +Brukshund (revision 20464996) +Kanadisk (revision 23155430) +Fédération Cynologique Internationale (revision 22172054) +Sør-Korea (revision 22928666) +Utgard (revision 23084598) +Reformasjonen (revision 22869778) +Bull terrier (revision 22876839) +Grå stær (revision 22817613) +Asiatisk villhund (revision 23071307) +Andrée Land (revision 17780437) +Syn (gudinne) (revision 21271518) +1964 (revision 23002382) +Dorsal (fonetikk) (revision 17670154) +Spansk (revision 22290773) +Vestlandsbanen (revision 22788186) +17. juli (revision 22635308) +Dentaler (revision 15180713) +Migrasjon (revision 20727027) +Kraton (revision 22783493) +Ɇ (revision 12117250) +Hanoi (revision 21749345) +Liste over stortingsrepresentanter 1958–1961 (revision 22974893) +Sovjetunionen (revision 23072866) Nasaler (revision 16002502) -Hundens pels (revision 22900550) -Approksimanter (revision 16000119) -Tapper (revision 18322970) -Vakt- og vokterhunder (revision 23091054) -Saluki (revision 22267261) -Canis (revision 23079627) -Island (revision 23097723) -Flyball (revision 20457011) -Staffordshire bull terrier (revision 23135078) -Stockholm (revision 22770528) -Sahel (revision 19821400) -ISO 639-3 (revision 18859824) -Ny-guinea villhund (revision 22567866) -Rabies (revision 19440055) -Ordbog over det norske Folkesprog (revision 23096800) -Norge (revision 23141642) -Flåttbårne sykdommer (hund) (revision 21355504) -Bombehund (revision 22942055) -Læreboknormalen av 1959 (revision 18841941) -Tromøy (revision 22053767) -Vorstehhund korthåret (revision 22264532) -Tåkeskog (revision 20461967) -Vest-Telemark (revision 22923647) -Oslo (revision 23118371) -Tyrkia (revision 23034073) -Liste over Norges største tettsteder (revision 23138252) -Energi (revision 22979461) -Jakt med hund (revision 22890790) -Sogn fogderi (revision 22425444) -Integrated Taxonomic Information System (revision 20457376) -Tadsjikistan (revision 22864814) -Befolkningstetthet (revision 22253839) -Tøddel (revision 21641445) -Den lille istid (revision 22782643) -Norsk språkhistorie (1400–1800) (revision 21342667) -Unionen mellom Sverige og Norge (revision 22922743) -Fylkeskommune (revision 22011606) -ĸ (revision 17096887) -Degas (revision 22751270) -Gløgg (revision 22902469) -Antistoff (revision 20746889) -Norges statsminister (revision 22948566) -Lørdag (revision 23031303) -Ş (revision 12094187) -Hallingdal (revision 22811584) -1969 (revision 22958238) -Juli (revision 22359558) -Shar pei (revision 22891357) -Dyr (revision 23101991) -Ƙ (revision 15223100) -PhyloCode (revision 22857413) -Y-kromosom (revision 22783781) -Høst (revision 23087627) -Geit (revision 21989005) -Guatemala (revision 22780680) -USA (revision 22781448) +Malaysia (revision 23161382) +Kg (revision 22252484) +Artrose (revision 22564772) +Etiopisk ulv (revision 20709633) +Parafyletisk (revision 22797875) +Egypt (revision 23156817) +Kveghunder (revision 22264557) Tamhund (revision 23005187) -Populasjonsdynamikk (revision 20640003) -Christoffer Oftedahl (revision 19783269) -Mellomnorsk (revision 22546096) -1000 (revision 20456192) -Servicehund (revision 22337757) -Himalayaulv (revision 21791662) -Ø (bokstav) (revision 22617366) -Ǩ (revision 15223173) -Bordeaux dogge (revision 22266230) -Frøplanter (revision 21763501) -Ustemt bilabial plosiv (revision 22354758) -Digraf (revision 19954081) -12. århundre (revision 23123540) -Sametingsvalget 1993 (revision 21890290) -Førerhund (revision 20465384) -Grenada (revision 22948831) -Aserbajdsjans administrative inndeling (revision 22782483) -Verneområder i Norge (revision 22076171) -Pelsdyroppdrett (revision 22827568) -Kretahund (revision 22201230) -Etne (revision 22659600) -Koreansk chejudo (revision 22199018) -Riesenschnauzer (revision 23103775) -Italias regioner (revision 22182270) -Dingo (revision 23050226) -Firfisle (revision 21650282) -Dominans (revision 21160764) -CITES (revision 22637082) -Helligdager i Norge (revision 22095322) -Bunad (revision 23086915) -Barnekreftforeningen (revision 19888945) -Guttorm Hansen (revision 22098933) -Albania (revision 22939774) -Medier i Norge (revision 21776331) -Finsk (revision 22908244) -Anders Lysgaard (revision 22858529) -Bakverk (revision 15226081) -Ć (revision 15785421) -Vatikanstaten (revision 22782366) -Steinalderen i Norge (revision 23106147) -Johnny Depp (revision 22764203) -Sverre Steen (revision 22112509) -Fjellrev (revision 22812483) -Bayersk viltsporhund (revision 22805751) -Ń (revision 15222385) -Utdannelse i Norge (revision 22814897) -Espen Berntsen (revision 21025561) -Nederland (revision 23024484) -Liste over hundegrupper (revision 18570830) +Æthelred II av England (revision 22274635) +Arv (biologi) (revision 15193856) +800 (revision 20428875) +Vestnordiske språk (revision 21807502) +Rase (revision 22735680) +Njord (revision 20092192) +Mutasjon (revision 21334520) +Forklaringstegnet (revision 20695673) +Nordland (revision 23169648) +Bairisk (revision 23071928) +1900-tallet (revision 22240557) +Sørlandet sykehus (revision 22588546) +Krasnojarsk kraj (revision 19368817) +Rhinland (revision 19164408) +Helsepersonellregisteret (revision 23091708) +Svartehavet (revision 23014293) +Andebu- og revetalsmål (revision 23108601) +Norge (revision 23163578) +Erna Solbergs regjering (revision 22934041) +Tilde (revision 20755981) +Variabilitet (revision 21870681) +ʘ (revision 20546944) +Diagnose (revision 22415717) +Kristiansand (revision 23164965) +Fylkeskommune (revision 22011606) +Jakthund (revision 22264195) +Québec (revision 23141462) +Vannsøkshund (revision 16137337) +The Kennel Club (revision 22068391) +Unionen mellom Sverige og Norge (revision 22922743) +Ferskvann (revision 22508945) +Norges skoger (revision 23100526) +Merovingertiden (revision 21803717) +Øst-Timor (revision 22879852) +Praha (revision 23065718) +Ṽ (revision 21816212) +Søkshund (revision 22942055) +Nordisk Kennelunion (revision 21925391) +Art (revision 23002819) +Amerika (revision 23035336) +Vokal (revision 20326280) +Grønn stær (revision 20185449) +H (revision 23086416) +Familiehund (revision 23005187) +Norske dialekter (revision 22708152) +Ŕ (revision 21771133) +Hundefamilien (revision 22229857) +Sverige (revision 23170950) +Nordaust-Svalbard naturreservat (revision 21699944) +JSTOR (revision 22365797) +Jerevan (revision 22574944) +F (revision 20220663) +Tunge (revision 20521325) +Sydslesvig (revision 22332596) +Ȯ (revision 21775804) +Uvularer (revision 20242704) +Sanddyne (revision 21921712) +Ull (norrøn mytologi) (revision 22687123) +Koronaler (revision 15254044) +Quechua (revision 23112401) +Rogaland (revision 23169978) +Hundestørrelser (revision 18569662) +Apotek (revision 22780147) +Omsk (revision 21487721) +Zoologi (revision 21442012) +Vestfoldmål (revision 23108768) +Det sentralsibirske platå (revision 16337016) +ISBN (revision 22766668) +Konjunktiva (revision 19977579) +Nynorsk (revision 23017339) +Seksuell seleksjon (revision 17720854) +Sæbyggjenuten (revision 22791527) +Petroleum (revision 21482908) +Danmark (revision 23156784) +Avl (revision 22079075) +Pattedyr (revision 23022295) +Ḓ (revision 21787653) +1. januar (revision 22030084) +Ny-Guinea (revision 21346420) +Spania (revision 23167328) +Ch (bokstav) (revision 15186609) +Vlaamse Liberalen en Democraten (revision 22761997) +Genotype (revision 21777838) +Thimpu (revision 22402801) +Singularis (revision 22034181) +Liste over hunderaser (revision 22927266) +Moseldalen (revision 21469054) +Retroflekser (revision 23142022) +Hløkk (revision 21759102) +Landrase (revision 18506642) +Schleswig-Holstein (revision 22397077) +Frikativer (revision 19455562) +Stambok (revision 20748276) +Hornhinne (revision 19926008) +Gullsjakal (revision 23104601) +Ⅎ (revision 12125190) +Det internasjonale fonetiske alfabetet (revision 22107683) +Ⱡ (revision 21787652) +Sørlandsk (revision 23103464) +2015 (revision 22531589) +Egentlige tofrøbladete planter (revision 20092751) +Evolusjonsteori (revision 22101642) +Ă (revision 15220571) +Approksimant (revision 16000119) +Amur (revision 22771315) +1900 (revision 22880292) +Velarer (revision 18095281) +Frigg (gudinne) (revision 21154080) +Halvtrang vokal (revision 15284377) +Sametingsvalget 2005 (revision 21890369) +Labiovelarer (revision 18570510) +Norges regjering (revision 22205009) +Asturias (revision 23092397) == End of Parsed pages == -- Wikipedia parsing ended at: 2022-11-30 20:29:27.551046 +- Wikipedia parsing ended at: 2022-12-14 18:12:08.670069 -62 characters appeared 1228749 times. +66 characters appeared 1334619 times. Most Frequent characters: -[ 0] Char e: 15.049208585317261 % -[ 1] Char r: 8.84924423132796 % -[ 2] Char n: 8.422550089562636 % -[ 3] Char t: 7.726394894319344 % -[ 4] Char s: 6.64798099530498 % -[ 5] Char a: 6.28020856985438 % -[ 6] Char i: 5.99455218274847 % -[ 7] Char l: 5.422262805503809 % -[ 8] Char o: 5.386942329149403 % -[ 9] Char d: 4.534774799409806 % -[10] Char g: 3.86091870674971 % -[11] Char k: 3.6487516978650643 % -[12] Char m: 3.216197937902696 % -[13] Char v: 2.4669806445417253 % -[14] Char f: 2.0122091655822305 % -[15] Char u: 1.8136332155712844 % -[16] Char p: 1.6869189720602011 % -[17] Char b: 1.4243755233981878 % -[18] Char h: 1.3665117937023752 % -[19] Char å: 1.1134902246105591 % -[20] Char y: 0.8473658981614633 % -[21] Char ø: 0.792431977564173 % -[22] Char j: 0.7630525029928814 % -[23] Char c: 0.2926553755079353 % -[24] Char æ: 0.20012223814627725 % -[25] Char w: 0.05932863424507365 % -[26] Char z: 0.028565638710591017 % -[27] Char x: 0.023194322029967063 % -[28] Char é: 0.017171936660782636 % -[29] Char q: 0.009521879570197005 % +[ 0] Char e: 14.981429156935425 % +[ 1] Char r: 8.729682403742192 % +[ 2] Char n: 8.342530714758295 % +[ 3] Char t: 7.623973583472137 % +[ 4] Char s: 6.8181256223686315 % +[ 5] Char a: 6.5220111507478915 % +[ 6] Char i: 6.118974778569764 % +[ 7] Char l: 5.349991270917019 % +[ 8] Char o: 5.184475869143179 % +[ 9] Char d: 4.41182090169554 % +[10] Char k: 3.9232919657220524 % +[11] Char g: 3.6325722921672776 % +[12] Char m: 3.1580548456151156 % +[13] Char v: 2.3961894742994065 % +[14] Char f: 1.9287901640842815 % +[15] Char u: 1.88293438052358 % +[16] Char p: 1.7894245473801886 % +[17] Char b: 1.4544975007848682 % +[18] Char h: 1.3430050074215938 % +[19] Char å: 1.0663717510390607 % +[20] Char y: 0.8944125626864295 % +[21] Char j: 0.82008423377758 % +[22] Char ø: 0.7878653008836229 % +[23] Char c: 0.3555321780972697 % +[24] Char æ: 0.21541728388401485 % +[25] Char w: 0.08436864753161763 % +[26] Char z: 0.03701430895259246 % +[27] Char é: 0.03559068168518506 % +[28] Char x: 0.025400507560584706 % +[29] Char q: 0.023901952542261126 % -The first 30 characters have an accumulated ratio of 0.9995751776807141. +The first 30 characters have an accumulated ratio of 0.9993773503898867. +The first 4 characters have an accumulated ratio of 0.3967761585890805. +All characters whose order is over 19 have an accumulated ratio of 0.03279587657601158. -967 sequences found. +1114 sequences found. -First 442 (typical positive ratio): 0.9950425176429516 -Next 157 (599-442): 0.0039580060347621515 -Rest: 0.0009994763222862524 +First 470 (typical positive ratio): 0.9950174595185775 +Next 194 (664-470): 0.003983282132802413 +Rest: 0.0009992583486201356 -- Processing end: 2022-11-30 20:29:27.623923 +- Processing end: 2022-12-14 18:12:08.725997 diff --git a/script/BuildLangModelLogs/LangPolishModel.log b/script/BuildLangModelLogs/LangPolishModel.log index f92700b..1be55a2 100644 --- a/script/BuildLangModelLogs/LangPolishModel.log +++ b/script/BuildLangModelLogs/LangPolishModel.log @@ -1,163 +1,241 @@ = Logs of language model for Polish (pl) = - Generated by BuildLangModel.py -- Started: 2021-03-16 19:33:28.678083 +- Started: 2022-12-14 18:10:38.517066 - Maximum depth: 4 -- Max number of pages: 100 +- Max number of pages: 200 == Parsed pages == -Krasnyj Krym (revision 62415649) -11 grudnia (revision 62631194) -1913 (revision 62510480) -1915 (revision 62045210) -1925 (revision 62586144) -1929 (revision 62587250) -1935 (revision 62643677) -1936 (revision 62578718) -1939 (revision 62647877) -1941 (revision 62626183) -1942 (revision 62634977) -1943 (revision 62605793) -1944 (revision 62629763) -1949 (revision 62629889) -1953 (revision 62544578) -1957 (revision 62605043) -1959 (revision 62544562) -45 mm armata morska 21-K (revision 61708436) -76 mm armata przeciwlotnicza wz. 1914/15 (revision 62529098) -7 grudnia (revision 62636554) -AG Vulcan Stettin (revision 56402035) -Admirał Butakow (revision 61255818) -Aurora (1900) (revision 60525374) -Avro 504 (revision 62119913) -Bomba głębinowa (revision 62280686) -Brest (revision 59991108) -Burta (revision 45569092) -Cagliari (revision 57357802) -Czerwona Ukraina (revision 62415654) -Daty nowego i starego porządku (revision 60118095) -Działo przeciwlotnicze (revision 57354362) -Długość całkowita (statek) (revision 57603162) -Flota Bałtycka (revision 62436950) -Flota Czarnomorska (revision 62138173) -Gwardia (wojsko) (revision 53610648) -Hulk (okręt) (revision 61976707) -II wojna światowa (revision 62628019) -I wojna światowa (revision 61897062) -Imperium Rosyjskie (revision 62512980) -Język rosyjski (revision 62485083) -Karabin maszynowy DSzK (revision 62495075) -Karabin maszynowy Vickers 12,7 mm (revision 51917495) -Kocioł parowy (revision 62570204) -Konstrukcyjna linia wodna (revision 59497856) -Koń mechaniczny (revision 57660802) -Kronsztad (revision 58913101) -Krążownik lekki (revision 58075663) -Krążowniki lekkie typu Swietłana (revision 61255818) -Krążowniki typu Admirał Nachimow (revision 56872613) -Lend-Lease Act (revision 61097607) -Marynarka Wojenna Związku Socjalistycznych Republik Radzieckich (revision 62606797) -Maszyna sterowa (revision 28497888) -Mila morska (revision 61023950) -Mina morska (revision 61000099) -Morze Czarne (revision 61790806) -Nadbudówka (revision 57496460) -Neapol (revision 61681555) -Niszczyciele rakietowe projektu 61 (revision 61591760) -Noworosyjsk (revision 62635030) -Obrona Odessy (revision 61668078) -Odessa (revision 62609713) -Oerlikon 20 mm (revision 60068925) -Operacja desantowa kerczeńsko-teodozyjska (revision 60265054) -Parawan (trał) (revision 54434173) -Petersburg (revision 62601352) -Poti (revision 62387800) -Radar (revision 61897200) -Rangi okrętów (revision 59334819) -Rewolucja październikowa (revision 62498820) -Rosyjska Federacyjna Socjalistyczna Republika Radziecka (revision 62401382) -Rosyjska marynarka wojenna (revision 62145039) -Salwa burtowa (revision 45535265) -Sewastopol (revision 61699516) -Siewastopol (1911) (revision 61344180) -Stal Kruppa (revision 44611245) -Sudak (revision 56397428) -Szalupa (revision 50176935) -Szerokość całkowita (revision 59927053) -Tallinn (revision 62370993) -Tarmo (revision 60930043) -Teodozja (miasto) (revision 61289639) -Tuapse (revision 54506404) -Turbina parowa (revision 58882974) +Krasnyj Krym (revision 66824963) +Węzeł (jednostka prędkości) (revision 67139927) +Radar (revision 68768526) +Śruba okrętowa (revision 63706042) +Aurora (1900) (revision 68898064) +Kocioł parowy (revision 67730042) +Kronsztad (revision 68581947) +7 grudnia (revision 68948196) Typ okrętu (revision 58157719) -Wielka wojna ojczyźniana (revision 62540748) -Wielkokalibrowy karabin maszynowy (revision 60069207) -Wodnosamolot (revision 61361212) -Wojna domowa w Rosji (revision 61724197) -Wyporność (revision 61495676) -Wyrzutnia torpedowa (revision 59771268) -Węzeł (jednostka prędkości) (revision 62033661) -Zatoka Biskajska (revision 59124431) -Związek Socjalistycznych Republik Radzieckich (revision 62525734) -Śruba okrętowa (revision 62489877) -Śródokręcie (revision 45285929) -Świnoujście (revision 62151792) -(691) Lehigh (revision 60266839) -1066 (revision 62500082) +Czerwona Ukraina (revision 68695903) +Neapol (revision 68892721) +Flota Czarnomorska (revision 68696317) +Imperium Rosyjskie (revision 68856935) +Wytwornica pary (revision 55689600) +Statek wodny (revision 68480347) +3 Front Ukraiński (revision 68935419) +1950 (revision 68885081) +Cal (revision 68918178) +Biblioteka Narodowa Republiki Czeskiej (revision 67927221) +1830 (revision 68458768) +Węgle kopalne (revision 67039575) +Kocioł płomienicowo-płomieniówkowy (revision 68709469) +Modulacja (revision 66525387) +Cień radiowy (revision 60559907) +Mykołajiwśkyj sudnobudiwnyj zawod (revision 66956857) +Samorząd terytorialny (revision 67806627) +1867 (revision 69012213) +Mikołajów (revision 68983291) +Gubernia samarska (revision 65774206) +Język rosyjski (revision 68430547) +II wojna światowa (revision 68838789) +Tallinn (revision 68863189) +Kontrola autorytatywna (revision 64027474) +Sewastopol (revision 68617896) +Próżnia (revision 67081889) +HMS Rattler (1843) (revision 63597881) +WorldCat (revision 66967839) +Store norske leksikon (revision 66724573) +Biali (Rosja) (revision 65941744) +27 maja (revision 68237859) +Encyklopedia Britannica (revision 68175162) +Temperatura (revision 67703070) +Katarzyna II Wielka (revision 68871822) +Jacht żaglowy (revision 66939788) +Besarabia (revision 67455676) +Grecja (revision 68872546) +Siewastopol (1911) (revision 68695743) +Pył węglowy (revision 64459067) +Johann Nepomuk Nestroy (revision 68430100) +Front Woroneski (revision 68939618) +Patent (revision 68327537) +Burżuazja (revision 69019337) +Museo di Capodimonte (revision 66689382) +Juliusz Słowacki (revision 68813616) +Mina morska (revision 67602621) +David Bushnell (revision 60410068) +Ukraina (revision 68594366) +Wynalazek (revision 68536503) +8 Korpus Zmechanizowany (ZSRR) (revision 69022169) +Zanurzenie (revision 65932162) +Drednot (revision 66733257) +Meteorologia (revision 68534571) +Nordborg (revision 65685596) +Pozaukładowe jednostki miary (revision 67383651) +Arabowie (revision 68802751) +Królestwo Prus (revision 68755256) +24 maja (revision 68899710) +Urządzenie (revision 66859777) +Logika (revision 68431995) +Koń mechaniczny (revision 68642538) +Kocioł centralnego ogrzewania (revision 68379993) +International Standard Serial Number (revision 68149186) +1966 (revision 68980201) +Gustaw Herling-Grudziński (revision 68188962) +Ołena Stepaniw (revision 63992647) +Mareograf (revision 64498571) +Pokój frankfurcki (revision 67399930) +Josef Ressel (revision 53567099) +Hiszpania (revision 68920802) +Mila angielska (revision 67257925) +Kotka (Finlandia) (revision 68141701) +Ziemia (revision 68930518) +Marcisław (revision 51802622) +Dick Allen (revision 68993068) +Wielkokalibrowy karabin maszynowy (revision 66541348) +76 mm armata przeciwlotnicza wz. 1914/15 (revision 67777027) +1568 (revision 68644354) +Gubernia bakijska (revision 61883809) +Pędnik azymutalny (revision 60948745) +Demokracja szlachecka (revision 68868742) +Generator elektryczny (revision 67263053) +Nauplion (revision 68889146) +UNESCO (revision 68566725) +SS Archimedes (revision 66107167) +Enrique Gainzarain (revision 68164610) +44 Armia (ZSRR) (revision 68921345) +XIX wiek (revision 68884424) +Log (przyrząd pomiarowy) (revision 66762667) +Skala Beauforta (revision 63952097) +Carroll Campbell (revision 65849282) +Dania (revision 68781714) +Asyż (revision 68895232) +Palenisko (revision 58842822) +Kuba (revision 68419530) +Kaliber broni (revision 68593869) +Kilometr na godzinę (revision 66964935) +Generał-gubernatorstwo kijowskie (revision 55323461) +Joseph Bara (revision 67322180) +Koło łopatkowe (revision 65929603) +Układ SI (revision 68491769) +Długość całkowita (statek) (revision 64884292) +Avro 504 (revision 66779136) +Gubernia nowogrodzka (revision 64471559) +Wodowanie (revision 68673522) +Michaił Tuchaczewski (revision 68581944) +Francuzi (revision 68576787) +Stopa (miara) (revision 65393527) +70 Armia (ZSRR) (revision 68934813) +Atmosfera (revision 68395066) +140 p.n.e. (revision 67172676) +Lockheed F-117 Nighthawk (revision 67799011) +Biblioteka Narodowa Łotwy (revision 67552589) +Stowarzyszenie Pisarzy Polskich (revision 66999418) +Pancernik (revision 68909515) +17 Korpus Zmechanizowany (ZSRR) (revision 69022176) +Monarcha (revision 68856899) +Filozofia umysłu (revision 68283308) +Klaryn (revision 36275423) +15 kwietnia (revision 69025463) +Biblioteka Narodowa Francji (revision 68559573) +Morze Bałtyckie (revision 68103355) +Komedia (revision 63051740) +Samorząd Terytorialny (revision 65110216) +Time Almanac (revision 66990969) +Jallais (revision 68867316) +Kelwin (revision 67338262) +1853 (revision 68065540) +Urządzenie czułe na wyładowania elektrostatyczne (revision 64150985) +1534 (revision 66303317) +Organizacja (revision 63906809) +Hebertyści (revision 67688185) +Gubernia tobolska (revision 69012490) +Krążownik przeciwlotniczy (revision 58076757) +Siedmiogród (revision 68309057) +Własność intelektualna (revision 68147375) +Joachim II Hektor (revision 66343356) +Gubernia orenburska (revision 65899739) +Gwinea Bissau (revision 68938308) +Gubernia płocka (revision 65905690) +Biuletyn EBIB (revision 65107636) +Samolot wielozadaniowy (revision 56523743) +Gałacz (revision 68117500) +Imiona greckie (revision 50970638) +Gemeinsame Normdatei (revision 68165730) +Don Carlos (opera) (revision 54164593) +25 Korpus Zmechanizowany (ZSRR) (revision 69022184) +Wojna trzydziestoletnia (revision 68877523) +Autonomiczna Republika Krymu (revision 68729346) +Wielka Encyklopedia Rosyjska (revision 66373982) +Hymn (wiersz Juliusza Słowackiego 1836) (revision 68526473) +Library of Congress Control Number (revision 65475177) +Tadeusz Kościuszko (revision 68781656) +Marynarka Wojenna Związku Socjalistycznych Republik Radzieckich (revision 67702441) +Iitti (revision 68528025) +Obwód kamczacki (Imperium Rosyjskie) (revision 64610912) +Ił-62 (revision 68578659) +Zamek na Hradczanach (revision 68116469) +Jan Barylski (revision 63900376) +Ilja Riepin (revision 68429476) +Norwegia (revision 68485505) +Niuton (revision 68229453) +Herb Besarabii (revision 68131061) +Gubernia charkowska (revision 65088873) +Romanizacja (starożytność) (revision 65586801) +Niemiecka Biblioteka Narodowa (revision 68168263) == End of Parsed pages == -- Wikipedia parsing ended at: 2021-03-16 19:54:55.177499 +- Wikipedia parsing ended at: 2022-12-14 18:27:15.210762 -86 characters appeared 1860467 times. +84 characters appeared 1320205 times. -First 38 characters: -[ 0] Char a: 9.71455016401796 % -[ 1] Char i: 8.783547356658302 % -[ 2] Char o: 7.7947633578021005 % -[ 3] Char e: 6.889130524755344 % -[ 4] Char r: 6.010641414225568 % -[ 5] Char n: 5.536996893790645 % -[ 6] Char k: 5.05394613287954 % -[ 7] Char s: 5.034864902199287 % -[ 8] Char z: 4.529185414199769 % -[ 9] Char w: 4.033180916404322 % -[10] Char t: 4.019743430009777 % -[11] Char c: 3.6763887776563626 % -[12] Char y: 3.5020777041463247 % -[13] Char p: 3.0798181316841413 % -[14] Char l: 2.971941990908734 % -[15] Char d: 2.804779660160594 % -[16] Char m: 2.7137810022967352 % -[17] Char u: 2.3359726348277072 % -[18] Char j: 1.8645856121070676 % -[19] Char ł: 1.5818608983658402 % -[20] Char g: 1.402981079481657 % -[21] Char b: 1.3551436279170768 % -[22] Char h: 1.1977100373185872 % -[23] Char ę: 0.6938042975231488 % -[24] Char ą: 0.5616331813464038 % -[25] Char ó: 0.5564194366253205 % -[26] Char f: 0.5355107077954083 % -[27] Char ń: 0.43010706451659714 % -[28] Char ż: 0.42290457180912105 % -[29] Char ś: 0.3628658825982939 % -[30] Char v: 0.1491023490338716 % -[31] Char ć: 0.12942986895225767 % -[32] Char ź: 0.08433366461216459 % -[33] Char x: 0.0421399573332932 % -[34] Char é: 0.02617622349657371 % -[35] Char á: 0.02246747725167928 % -[36] Char í: 0.014136235687061367 % -[37] Char q: 0.013114986721075944 % +Most Frequent characters: +[ 0] Char a: 9.307721149366955 % +[ 1] Char i: 8.471714620077943 % +[ 2] Char o: 7.782351983214728 % +[ 3] Char e: 7.2690983597244365 % +[ 4] Char n: 5.7497888585484835 % +[ 5] Char r: 5.6479107411349 % +[ 6] Char s: 4.81061653303843 % +[ 7] Char z: 4.737597570074344 % +[ 8] Char k: 4.36008044205256 % +[ 9] Char w: 4.245249790752194 % +[10] Char t: 4.007483686245697 % +[11] Char c: 3.8036517056063266 % +[12] Char y: 3.521347063524225 % +[13] Char p: 2.9736290954813835 % +[14] Char d: 2.9238640968637446 % +[15] Char m: 2.7441192845050577 % +[16] Char l: 2.7298790718108172 % +[17] Char u: 2.5518006673206055 % +[18] Char j: 2.101491813771346 % +[19] Char ł: 1.5939191261963106 % +[20] Char g: 1.4732560473562817 % +[21] Char b: 1.3079786851284458 % +[22] Char h: 1.22405232520707 % +[23] Char ę: 0.8214633333459577 % +[24] Char ą: 0.7593517673391632 % +[25] Char ó: 0.7209486405520354 % +[26] Char ż: 0.510602520063172 % +[27] Char f: 0.5062092629553743 % +[28] Char ś: 0.4684878484780773 % +[29] Char ń: 0.34305278346923396 % +[30] Char ć: 0.1680042114671585 % +[31] Char v: 0.13096450929969208 % +[32] Char x: 0.05999068326509898 % +[33] Char ź: 0.055067205471877476 % +[34] Char é: 0.01999689442169966 % +[35] Char á: 0.014088721069833852 % +[36] Char q: 0.012498059013562286 % -The first 38 characters have an accumulated ratio of 0.9993173756911571. +The first 37 characters have an accumulated ratio of 0.9992932915721422. +The first 5 characters have an accumulated ratio of 0.38580674970932544. +All characters whose order is over 24 have an accumulated ratio of 0.030099113395268155. -1547 sequences found. +1469 sequences found. -First 512 (typical positive ratio): 0.9881622113600178 -Next 512 (512-1024): 0.0042290457180912105 -Rest: 0.0005488849902139173 +First 603 (typical positive ratio): 0.9950009015061881 +Next 297 (900-603): 0.004003405092029788 +Rest: 0.0009956934017820718 -- Processing end: 2021-03-16 19:54:55.605846 +- Processing end: 2022-12-14 18:27:15.679055 diff --git a/script/BuildLangModelLogs/LangPortugueseModel.log b/script/BuildLangModelLogs/LangPortugueseModel.log index e1f91e2..50d38a0 100644 --- a/script/BuildLangModelLogs/LangPortugueseModel.log +++ b/script/BuildLangModelLogs/LangPortugueseModel.log @@ -1,166 +1,251 @@ = Logs of language model for Portuguese (pt) = - Generated by BuildLangModel.py -- Started: 2021-03-16 19:54:55.771448 +- Started: 2022-12-14 18:11:03.435056 - Maximum depth: 4 -- Max number of pages: 100 +- Max number of pages: 200 == Parsed pages == -Papagaio-das-mascarenhas (revision 58875640) -Albinismo (revision 60544601) -Alfred Newton (revision 55613591) -Alphonse Milne-Edwards (revision 55360216) -Animalia (revision 59086849) -Asa (revision 59016280) -August von Pelzeln (revision 55658828) -Aves (revision 59780941) -Bico (revision 59270926) -BirdLife International (revision 60296296) -Carl Wilhelm Hahn (revision 58280895) -Carlos Lineu (revision 60424490) -Carolus Linnaeus (revision 60424490) -Cauda (revision 56806253) -Charles Lucien Bonaparte (revision 52587707) -Chordata (revision 60632448) -Cladograma (revision 55578666) -Classe (biologia) (revision 56051821) -Classificação científica (revision 59003514) -Coleção Leverian (revision 49939876) -Comores (revision 60033304) -Coracopsinae (revision 36946101) -Coracopsis nigra (revision 49364496) -Coracopsis vasa (revision 55904306) -Cylindraspis indica (revision 55039606) -Cúlmen (revision 59270926) -Digital object identifier (revision 59704276) -EBird (revision 54789725) -Eclectus roratus (revision 60346158) -Edward Newton (revision 52355291) -Enciclopédia da Vida (revision 53360339) -Endemismo (revision 59148596) -Epíteto específico (revision 58254455) -Espécie (revision 60480387) -Esquilo-vermelho (revision 59084882) -Estado de conservação (revision 60507425) -Extinção (revision 60618960) -Família (biologia) (revision 58605859) -Filo (revision 58307920) -Fossilworks Paleobiology Database (revision 60618977) -França (revision 60657760) -François-Nicolas Martinet (revision 43679514) -François Levaillant (revision 49358726) -Fredrik Hasselqvist (revision 52281786) -Fregilupus varius (revision 54591191) -Fumigação (revision 50600995) -George Robert Gray (revision 60662109) -Georges-Louis Leclerc, conde de Buffon (revision 53113664) -Global Biodiversity Information Facility (revision 59909217) -Género (biologia) (revision 60485207) -Hermann Schlegel (revision 58280671) -Herpetologista (revision 57406279) -Histoire Naturelle (revision 50957493) -Holótipo (revision 55228464) -INaturalist (revision 54028036) -ITIS (revision 59095296) -IUCN (revision 58907792) -Ilha da Reunião (revision 60519224) -Ilha vulcânica (revision 59932533) -Ilhas Mascarenhas (revision 60149877) -Ilhas Molucas (revision 58541748) -International Standard Book Number (revision 59096583) +Papagaio-das-mascarenhas (revision 61083234) +Alfred Newton (revision 63772066) +Bico (revision 60835473) +Sieur Dubois (revision 41590167) +Biodiversity Heritage Library (revision 64470020) +René-Primevère Lesson (revision 63229743) +Psittrichasiidae (revision 44385977) +Histoire Naturelle (revision 61014417) +Ponto quente (revision 55473520) +Herpetologista (revision 60800107) +Endemismo (revision 64450772) +Cladograma (revision 64249397) Jacques Barraband (revision 45007769) -Jean Feuilley (revision 43140791) -Johann Georg Wagler (revision 58641840) -John Gerrard Keulemans (revision 49649801) -Julian Hume (revision 41876605) -Leiolopisma (revision 49675967) -Lionel Walter Rothschild (revision 60408276) -Lista Vermelha da IUCN (revision 59379270) -Lista Vermelha da União Internacional para a Conservação da Natureza e dos Recursos Naturais (revision 58907792) -Lista Vermelha de Espécies Ameaçadas da IUCN (revision 59379270) -Lista de aves extintas (revision 56678269) -Londres (revision 60339639) -Língua inglesa (revision 60421609) -Madagascar (revision 60519261) -Mascarenotus grucheti (revision 43145662) -Mathurin Jacques Brisson (revision 51922685) -Maurício (revision 60625767) -Maximiliano I José da Baviera (revision 58499194) -Melanina (revision 59475698) -Museu Nacional de História Natural (França) (revision 59928766) -National Center for Biotechnology Information (revision 59213569) -Naturhistorisches Museum (revision 51807264) -Nesoenas duboisi (revision 57384381) -Nome científico (revision 60480452) -Nomenclatura binomial (revision 60480452) -Nycticorax duboisi (revision 57384378) -Nível do mar (revision 59494064) -Ordem (biologia) (revision 56361837) -Otto Finsch (revision 52466524) -Papagaio (revision 60655174) -Papagaio-cinzento (revision 59484957) -Papagaio-cinzento-de-maurício (revision 58875653) -Pedro Mascarenhas (c. 1484-1555) (revision 49518171) -Periquito-de-maurício (revision 54615644) -Periquito-de-reunião (revision 54615645) -Peter Mundy (revision 58162914) -Piton des Neiges (revision 57212555) -Pleistoceno (revision 59637437) -Plumagem (revision 56296594) +International Plant Names Index (revision 62639992) +Classe (biologia) (revision 63495321) +DNA (revision 63152174) +Ecologia (revision 64022144) +Ancestral comum (revision 64678633) +Dinosauria (revision 64535736) +Região Autónoma da Madeira (revision 64879506) +Natural History Museum, London (revision 64268225) +Percy Alexander MacMahon (revision 50071355) +Espécie (revision 64553712) +John Allan Broun (revision 61817860) +Ancestral comum mais recente (revision 64591096) +Sinapomorfia (revision 62321488) +Illinois (revision 62170587) +Helmintologia (revision 59020535) +Aves (revision 64642129) +Field Museum of Natural History (revision 64844966) +Limnologia (revision 58851800) +Farmacêutico (revision 64630397) +Língua inglesa (revision 64150425) +Madagáscar (revision 64725397) +Família (biologia) (revision 61575111) +Islândia (revision 64060301) +Classificação biológica (revision 61809666) +Faceted Application of Subject Terminology (revision 64779631) +Edward Albert Sharpey-Schafer (revision 64254898) +Havaí (revision 64640756) +Mutação (revision 64123827) +Coracopsinae (revision 63459971) +William Edward Ayrton (revision 62739781) +Cirurgião (revision 64668807) +Origem comum (revision 61641925) +Ferdinand von Mueller (revision 62255725) +Bibsys (revision 63644684) +Táxon (revision 63227455) +Malacologia (revision 57970258) +Desintegração radioativa (revision 64313586) +Anatomia animal (revision 58797699) +International Standard Name Identifier (revision 64790504) +Zootomia (revision 58797699) +Toleítica (revision 34762860) +Botânico (revision 61967900) +John Stenhouse (revision 62008644) +Termorregulação (revision 64495448) +John Kerr (revision 62704609) +Convecção mantélica (revision 60373806) +Psittacoidea (revision 61033148) +Saúde pública (revision 63527365) +Deriva continental (revision 64902339) +Ordem (biologia) (revision 63601075) +Open Library (revision 61955652) +Inferência Bayesiana (revision 62830176) +Academy of Natural Sciences (revision 61578144) +Nova Guiné (revision 60227023) +Reprodução (revision 63414857) +Máxima parcimônia (revision 62015609) +Médico (revision 64301033) +França (revision 64895524) +Açores (revision 64819020) +Endemia (revision 62738403) +Árvores filogenéticas (revision 61763709) +Fernando de Noronha (revision 64855367) +2005 (revision 64725143) +William Bateson (revision 55830496) +Neuroetologia (revision 60563061) +William Lassell (revision 62183853) +Arizona (revision 64879425) +Vulcanismo (revision 63510234) +Psittaciformes (revision 63932960) +International Standard Serial Number (revision 58367000) +Biologia regenerativa (revision 56549505) +Harold Jeffreys (revision 58732968) +Peixe (revision 64431170) +Ambientalismo (revision 64862203) +Alma mater (revision 57820112) +Napier Shaw (revision 56336986) +Bioquímica (revision 64244183) +Internet Archive (revision 64096543) +Edward Arthur Milne (revision 58910802) +Missouri Botanical Garden (revision 61966759) +Saxifraga cintrana (revision 61885598) +Sistema Universitário de Documentação (revision 51069528) +American Museum of Natural History (revision 64212495) +Filo (revision 63464029) +Engenharia genética (revision 63671435) +Chordata (revision 64103327) +Joseph Lister (revision 63440033) +Arthur Stanley Eddington (revision 64141109) +Febre amarela (revision 63472783) +Cristobalita (revision 61847424) +Harry Potter and the Goblet of Fire (filme) (revision 64856437) +Filogenia (revision 61260626) +Digital Object Identifier (revision 63209667) +Green New Deal (revision 64509397) +Fisiologia (revision 62258442) +Meio ambiente (revision 64545451) +Engenharia industrial (revision 62379140) +Richard Strachey (revision 56336958) +2007 (revision 63840012) +Alelo (revision 64539300) +Digital object identifier (revision 63209667) +Biblioteca Nacional da Dieta (revision 57968570) +Entoprocta (revision 63946878) +Século XIX (revision 64837318) +Livraria (revision 61700386) +Ichnotáxon (revision 63611820) +Etnobotânica (revision 60819978) +Geodinâmica (revision 49283303) +Infeção (revision 63726830) +Desoxicitidina (revision 59092754) +Homeostasia (revision 64259202) +Lista de especialidades biológicas (revision 61719144) +Biologia do desenvolvimento (revision 63818057) +Placa tectônica (revision 62769279) +Área (revision 63988916) +Anemia falciforme (revision 62334586) +Desenvolvimento sustentável (revision 64617206) +John Lindley (revision 60821316) +Distribuição t de Student (revision 64416114) +Godfrey Harold Hardy (revision 60980821) +América do Sul (revision 64858929) +Superfamília (revision 61575111) +Solstício (revision 60877451) +Raiz (revision 64680884) +Ornitologia (revision 63950590) +Evolução (revision 64809463) +Desoxicitidina trifosfato (revision 49779775) +Ancestral comum universal (revision 59916568) +Sipuncula (revision 60929185) +Fluorescência (revision 63252893) +Giganotosaurus (revision 64632514) +Peste bubônica (revision 63599249) +Crusta oceânica (revision 58498714) +Zooplâncton (revision 64800726) +Bioestatística (revision 64552825) +John Tyndall (revision 62541791) +1809 (revision 64398306) +John Hewitt Jellett (revision 62745269) +El Niño (revision 64656881) +Engenharia de sistemas (revision 64448054) +Victoria and Albert Museum (revision 64268249) +Programa das Nações Unidas para o Meio Ambiente (revision 64270781) +História evolutiva da vida (revision 62052857) +John Edward Marr (revision 62745345) +Tribo (biologia) (revision 53951385) +Teleostomi (revision 51833586) +Robert Broom (revision 54192174) +Super-reino (revision 59274824) +Cape Race (revision 43867831) +Toponímia (revision 63944441) +Heurística (revision 61085603) +Biologia celular (revision 64287445) +Microbiologia (revision 64226425) +Safim (revision 64181126) +Aurornis xui (revision 63853334) +Meteorologia (revision 63874898) +Jacob Lockhart Clarke (revision 62722236) +Witmer Stone (revision 62493170) +1869 (revision 64231456) +Atavismo (revision 64285323) +Declínio contemporâneo da biodiversidade mundial (revision 64509212) +Estrutura interna da Terra (revision 60929596) +Keith Edward Bullen (revision 62715317) +Apiaceae (revision 63941666) +Primeira guerra mundial (revision 64646038) +James Joseph Sylvester (revision 64331288) +Alfred Fowler (revision 55754858) +Língua grega (revision 64653752) +Medalha Real (revision 62976312) +Augustus Matthiessen (revision 59225915) +Phaethontiformes (revision 43440414) == End of Parsed pages == -- Wikipedia parsing ended at: 2021-03-16 19:59:19.802576 +- Wikipedia parsing ended at: 2022-12-14 18:14:13.337561 -51 characters appeared 713201 times. +57 characters appeared 1852526 times. -First 38 characters: -[ 0] Char a: 11.984419539512704 % -[ 1] Char e: 11.434925077222271 % -[ 2] Char o: 9.885712442915812 % -[ 3] Char s: 8.280835276450818 % -[ 4] Char i: 7.116787553578866 % -[ 5] Char r: 6.403664605069258 % -[ 6] Char n: 5.615948379208667 % -[ 7] Char d: 5.256442433479482 % -[ 8] Char t: 4.736673111787561 % -[ 9] Char m: 4.516118177063689 % -[10] Char c: 3.973213722358774 % -[11] Char u: 3.7191478979978996 % -[12] Char l: 3.1644655573954608 % -[13] Char p: 2.783647246708852 % -[14] Char g: 1.3397345208433526 % -[15] Char v: 1.3255730151808536 % -[16] Char f: 1.1414734415683656 % -[17] Char b: 0.9920064610116923 % -[18] Char h: 0.868759297869745 % -[19] Char ã: 0.7190118914583687 % -[20] Char é: 0.6653103402827534 % -[21] Char ç: 0.6455403175261952 % -[22] Char q: 0.5922594051326344 % -[23] Char í: 0.41138472884923044 % -[24] Char x: 0.3736674513916834 % -[25] Char á: 0.3452042271393338 % -[26] Char z: 0.3241722880366124 % -[27] Char ó: 0.2204147217965202 % -[28] Char ê: 0.204150022223749 % -[29] Char j: 0.2023272541681798 % -[30] Char õ: 0.17863126944578034 % -[31] Char y: 0.13222079049244184 % -[32] Char ú: 0.08819393130407838 % -[33] Char â: 0.08300605299207375 % -[34] Char w: 0.08174413664591049 % -[35] Char k: 0.07445306442363374 % -[36] Char à: 0.06688156634665403 % -[37] Char ô: 0.034492380128463083 % +Most Frequent characters: +[ 0] Char a: 11.88820021959206 % +[ 1] Char e: 11.503914115105538 % +[ 2] Char o: 10.007200978555767 % +[ 3] Char s: 8.321826522272833 % +[ 4] Char i: 7.063004783738528 % +[ 5] Char r: 6.534267265344725 % +[ 6] Char n: 5.444943822650802 % +[ 7] Char d: 5.305836463293902 % +[ 8] Char t: 4.949674120633125 % +[ 9] Char m: 4.542662289220233 % +[10] Char c: 3.920646727765224 % +[11] Char u: 3.6140383454807115 % +[12] Char l: 3.100631246201133 % +[13] Char p: 2.7219051176609668 % +[14] Char g: 1.345568159367264 % +[15] Char v: 1.259685424118204 % +[16] Char f: 1.1203081630163356 % +[17] Char b: 0.9877324258876798 % +[18] Char h: 0.7696518159529205 % +[19] Char ã: 0.7660891129193328 % +[20] Char ç: 0.6929457400327984 % +[21] Char q: 0.631786004622877 % +[22] Char é: 0.6198023671462641 % +[23] Char í: 0.4097108488625801 % +[24] Char á: 0.38828065031206044 % +[25] Char x: 0.3434229802982522 % +[26] Char z: 0.3036934434388505 % +[27] Char ó: 0.2662310812371864 % +[28] Char ê: 0.20636687420311509 % +[29] Char j: 0.19060461229693942 % +[30] Char õ: 0.18979490706203314 % +[31] Char y: 0.13953920214884974 % +[32] Char ú: 0.10661118926266083 % +[33] Char â: 0.09020116316856011 % +[34] Char k: 0.06752941659118414 % +[35] Char à: 0.06628786856432785 % +[36] Char w: 0.060026148081052576 % +[37] Char ô: 0.046800962577583254 % -The first 38 characters have an accumulated ratio of 0.9998261359700841. +The first 38 characters have an accumulated ratio of 0.9998742257868446. +The first 3 characters have an accumulated ratio of 0.33399315313253364. +All characters whose order is over 21 have an accumulated ratio of 0.034949037152514996. -929 sequences found. +1057 sequences found. -First 512 (typical positive ratio): 0.9952990712503466 -Next 512 (512-1024): 0.0008819393130407837 -Rest: -7.806255641895632e-18 +First 508 (typical positive ratio): 0.9950267193246717 +Next 167 (675-508): 0.003973967287456359 +Rest: 0.0009993133878719584 -- Processing end: 2021-03-16 19:59:19.891534 +- Processing end: 2022-12-14 18:14:13.491804 diff --git a/script/BuildLangModelLogs/LangRomanianModel.log b/script/BuildLangModelLogs/LangRomanianModel.log index c66f99f..5e1c038 100644 --- a/script/BuildLangModelLogs/LangRomanianModel.log +++ b/script/BuildLangModelLogs/LangRomanianModel.log @@ -1,155 +1,229 @@ = Logs of language model for Romanian (ro) = - Generated by BuildLangModel.py -- Started: 2021-03-16 19:59:20.080997 +- Started: 2022-12-14 18:11:06.127834 - Maximum depth: 4 -- Max number of pages: 100 +- Max number of pages: 200 == Parsed pages == -The Loving Kind (revision 12020391) -12 ianuarie (revision 13977250) -13 decembrie (revision 13958824) -2007 (revision 13956975) -2008 (revision 13894929) -2009 (revision 13949957) -21 noiembrie (revision 13705857) -25 ianuarie (revision 13882659) -31 ianuarie (revision 13887860) -4 Music (revision 13955370) -Billboard (revision 13092896) -Biology (revision 10112430) -Bulgaria (revision 13779617) -CD (revision 13258410) -Call The Shots (revision 13085752) -Call the Shots (revision 13085752) -Can't Speak French (revision 12018260) -Casă de discuri (revision 10611348) -Channel 4 (revision 13980413) +The Loving Kind (revision 15340411) +Bulgaria (revision 15311223) +Casă de discuri (revision 15244458) +Estonia (revision 15317542) +The Promise (revision 15302845) +Slovenia (revision 15205418) +Muzică pop (revision 15177633) +2009 (revision 15336167) +Mixed Up (revision 10112443) +Tonalitate (revision 12509051) +Ucraina (revision 15312635) +11 decembrie (revision 15031710) +Europop (revision 15140046) +Something Kinda Ooooh (revision 15206082) +Protest (revision 13755796) +Insula Jersey (revision 15311219) +Flota sovietică a Mării Baltice (revision 15238696) +The Sound of Girls Aloud (revision 10112480) +4 iulie (revision 15123115) +Companie (revision 14007537) +Croația (revision 15314677) +Gustavus Adolphus al Suediei (revision 13847267) +Wayback Machine (revision 15154168) +Marșul asupra Romei (revision 15291491) +Limbi ugrice (revision 15165135) +See the Day (revision 10112431) +Invadarea Iugoslaviei (revision 15154231) +The Show (revision 10112441) +Alterație (muzică) (revision 14948670) +15 octombrie (revision 15159460) +Mircea Geoană (revision 15335698) +12 decembrie (revision 15344758) +Hugh Hopper (revision 12022451) +J-pop (revision 15285544) +Limba engleză (revision 15174203) +EMI (revision 15018534) +Regatul Unit (revision 15335741) +Jaan Kross (revision 11954185) +Notația măsurată (revision 15079804) +Polygram (revision 8761631) Chemistry (revision 13003795) -Cheryl Cole (revision 13707613) -Chitară (revision 13704508) -Croația (revision 13662573) -Dance (revision 12713318) -Descărcare digitală (revision 10785925) -Digital Spy (revision 12038314) -Discografia formației Girls Aloud (revision 13332557) -Estonia (revision 13885094) -Europa (revision 13985083) -Fascination Records (revision 9653126) -Gen muzical (revision 13743085) +Franz Liszt (revision 15263804) +Claude Debussy (revision 15251845) +Limba italiană (revision 15119781) +Drepturi de autor (revision 15135851) +Locuri din patrimoniul mondial UNESCO (revision 15168595) +Osam (revision 13134720) +No Good Advice (revision 10112436) +Listă de termeni muzicali (revision 15275139) +Tacitus (revision 15210228) +1960 (revision 15226597) +Biology (revision 10112430) +Call the Shots (revision 15311533) +14 septembrie (revision 15144284) +Can't Speak French (revision 15243027) +16 august (revision 15269004) +Ecoregiune (revision 15137483) +Al doilea război mondial (revision 15346198) +I Think We're Alone Now (revision 15152417) +Kazumi Takada (revision 9950085) +Peninsula Balcanică (revision 15296189) +Gama muzicală (revision 14741906) +Cozonac (revision 14994369) +Limba română (revision 15288357) +Library of Congress Control Number (revision 14009408) +Integrated Authority File (revision 15145168) +Muzică cultă (revision 15163310) +Wake Me Up (revision 10112439) +Neumă (revision 15115030) +Girls Aloud (revision 15319932) +Localitate (revision 13695789) +Sorin Roșca Stănescu (revision 15206213) +Dream pop (revision 14793109) +Minoritate (revision 15292581) +Sound of the Underground (cântec) (revision 15206321) +Deviză (revision 15269481) +Sony Music Entertainment (revision 15206153) Girls A Live (revision 10112444) -Girls Aloud (revision 12017377) -Good Morning Television (revision 13079309) -Heat World (revision 12994549) -I'll Stand By You (cântec de Girls Aloud) (revision 10112432) -ITunes (revision 13985408) -I Think We're Alone Now (revision 10112427) -Irlanda (revision 13830248) -Jewels & Stone (revision 8842892) -Jump (cântec de Girls Aloud) (revision 10112438) -Lady GaGa (revision 13982113) +Regatul Unit al Marii Britanii și al Irlandei de Nord (revision 15335741) +Suită (muzică) (revision 11171277) +The World Factbook (revision 15212222) +Benzi desenate (revision 15030706) +Isar (revision 13998661) +Gamă muzicală (revision 14741906) +Muzică rock (revision 14783685) +Limba proto-indo-europeană (revision 14176445) +Manase Radnev (revision 13993534) +Sony BMG (revision 14855195) +Casă de discuri virtuală (revision 15312183) +Epoca Fierului (revision 13830131) +Companie multinațională (revision 12186827) +30 martie (revision 15157103) +Anii 1970 (revision 15053831) +Turism (revision 15214709) +Analiză muzicală (revision 12849620) +Declarația Universală a Drepturilor Omului (revision 13658791) +Scară sonoră (revision 12022964) +2 octombrie (revision 15124675) +Imperiul Persan (revision 15153489) +Oraș (revision 15184008) +Denver (revision 15136578) +CEO (revision 14493425) +Rock (revision 14783685) +Lied (revision 15325998) Life Got Cold (revision 10112437) -Limba engleză (revision 13983069) -Long Hot Summer (revision 10112429) -Love Machine (revision 10112433) -MSN Search (revision 13651565) -MTV (revision 12996766) -Mixed Up (revision 10112443) -Muzică electronică (revision 13450013) -Muzică pop (revision 13648051) -Nadine Coyle (revision 10316187) -Neil Tennant (revision 13355922) -No Good Advice (revision 10112436) -Out Of Control (revision 10112484) +Bibliothèque nationale de France (revision 15237314) +Europa de Nord (revision 11015732) +Khair ad-Din (revision 13565991) +Lom (Bulgaria) (revision 15328003) Out of Control (revision 10112484) -Pet Shop Boys (revision 13165657) -Poker Face (revision 13083515) -PopJustice (revision 12061987) -Regatul Unit (revision 13957992) -Regatul Unit al Marii Britanii și Irlandei de Nord (revision 13957992) -Regatul Unit al Marii Britanii și al Irlandei de Nord (revision 13957992) -Republica Irlanda (revision 13830248) -Romanian Top 100 (revision 13882522) -România (revision 13906545) -Sarah Harding (revision 10139259) -Sarah Hearding (revision 12017812) -See the Day (revision 10112431) -Sexy! No No No... (revision 12017812) -Slant Magazine (revision 12008416) -Slovenia (revision 13726273) -Something Kinda Ooooh (revision 10112426) +Alexandru Șafran (revision 15230090) +Obi (revision 15182800) +Uvertură (revision 13489343) +2005 (revision 15336486) +Națiune (revision 13034621) +2003 (revision 15329452) +Fundal (revision 8135248) +Marijuana (revision 14880983) +Camerun (revision 14465861) +1968 (revision 15018095) +Armele utilizate în timpul celui de-al Doilea Război Mondial (revision 14773352) +Belarus (revision 15322892) +Indiile Răsăritene (revision 10056403) +Cent (revision 12363479) +Primul dictat de la Viena (revision 14824640) +Luigi Carlo Borromeo (revision 12309158) +Call The Shots (revision 15311533) +Limba gotică crimeeană (revision 15165061) +Prăbușirea Uniunii Sovietice (revision 15155736) +Beethoven (revision 15310809) +Ruse (revision 15198972) +Portativ (revision 11356550) +2004 (revision 15092752) +Jump (cântec de Girls Aloud) (revision 10112438) +Economie (revision 15317091) +Gen muzical (revision 15348917) +Freedom House (revision 13005669) +Spania (revision 15303538) +Evul Mediu timpuriu (revision 15338691) +Lagăr de concentrare (revision 15049108) +Grafică (revision 13247501) Sound of the Underground (album) (revision 10112476) -Sound of the Underground (cântec) (revision 10112434) -Tangled Up (revision 13010794) -The Guardian (revision 12369330) -The Paul O'Grady Show (revision 12720320) -The Promise (revision 12178852) -The Show (revision 10112441) -The Sound of Girls Aloud (revision 10112480) -Times Online (revision 12014967) -Tonalitate (revision 12509051) -Turneul Out of Control (revision 10112484) -UK Mix (revision 13757304) -UK Singles Chart (revision 10226705) -Ungaria (revision 13960307) -Uniunea Europeană (revision 13689726) +Magazin istoric (revision 13592125) +Gheorghe Ciuhandu (politician) (revision 15145699) +Che Guevara (revision 15246219) +Rasism (revision 15322273) +OPEC (revision 15182665) +Capitală (revision 15149506) Untouchable (revision 12020867) -Utah Saints (revision 12270967) -Wake Me Up (revision 10112439) -What Will The Neighbours Say? (revision 10112478) -Whole Lotta History (revision 12369785) -Wideboys (revision 12030035) -Wikimedia Commons (revision 13278756) -Xenomania (revision 12020867) +Idee (revision 14806020) +Pârjoale moldovenești (revision 15277144) +Vali Ionescu (revision 15312890) +UK Albums Chart (revision 15215231) +2012 (revision 15339944) +1981 (revision 15055026) +Tetracord (revision 11205775) +Fotbal (revision 15319012) +15 februarie (revision 14861464) +Slovacia (revision 15301231) +Liban (revision 15164669) +Moduri muzicale (revision 13884611) +Regatul Ungariei (revision 15286727) +LIBRIS (revision 13007272) +Dunărea (revision 15263972) +Fotbalist (revision 14889823) +Venit (revision 13998438) +Indicele dezvoltării umane (revision 13268199) == End of Parsed pages == -- Wikipedia parsing ended at: 2021-03-16 20:04:01.198792 +- Wikipedia parsing ended at: 2022-12-14 18:14:02.581018 -63 characters appeared 1198090 times. +73 characters appeared 1717760 times. -First 33 characters: -[ 0] Char e: 11.456985702242736 % -[ 1] Char i: 11.0956605931107 % -[ 2] Char a: 10.273852548639919 % -[ 3] Char r: 7.454949127361049 % -[ 4] Char n: 7.243779682661569 % -[ 5] Char t: 6.464122060947007 % -[ 6] Char l: 5.642480948843576 % -[ 7] Char u: 5.4753816491248575 % -[ 8] Char o: 4.928594679865453 % -[ 9] Char c: 4.4603493894448665 % -[10] Char s: 3.768080862038745 % -[11] Char d: 3.7479655117729047 % -[12] Char m: 2.9085461025465533 % -[13] Char p: 2.8108906676460035 % -[14] Char ă: 2.1405737465465866 % -[15] Char g: 1.262509494278393 % -[16] Char f: 1.0879817042125384 % -[17] Char b: 1.0721231293141584 % -[18] Char ț: 1.016534650986153 % -[19] Char ș: 1.0140306654758826 % -[20] Char v: 0.9768882137402032 % -[21] Char î: 0.9654533465766345 % -[22] Char z: 0.7075428390187716 % -[23] Char h: 0.5414451335041608 % -[24] Char â: 0.45664349088966605 % -[25] Char x: 0.22627682394477877 % -[26] Char j: 0.22452403408758942 % -[27] Char k: 0.20132043502574934 % -[28] Char y: 0.16918595431061106 % -[29] Char w: 0.12970644943201262 % -[30] Char á: 0.012937258469730987 % -[31] Char é: 0.012019130449298466 % -[32] Char q: 0.007428490347135858 % +Most Frequent characters: +[ 0] Char i: 11.186661698956781 % +[ 1] Char e: 11.152256426974665 % +[ 2] Char a: 10.509558960506707 % +[ 3] Char r: 7.540343237704918 % +[ 4] Char n: 7.106988170640835 % +[ 5] Char t: 6.260536978390461 % +[ 6] Char l: 5.436440480625931 % +[ 7] Char u: 5.431899683308495 % +[ 8] Char o: 5.0735842026825635 % +[ 9] Char c: 4.54947140461997 % +[10] Char s: 3.8575819672131146 % +[11] Char d: 3.540657600596125 % +[12] Char m: 2.986331035767511 % +[13] Char p: 2.763657321162444 % +[14] Char ă: 2.165785674366617 % +[15] Char g: 1.240743759314456 % +[16] Char b: 1.1896306818181819 % +[17] Char v: 1.1200633383010432 % +[18] Char f: 1.1136014344262295 % +[19] Char ș: 0.9621250931445604 % +[20] Char ț: 0.9481534090909091 % +[21] Char î: 0.9207921944858419 % +[22] Char z: 0.7800856929955291 % +[23] Char h: 0.5981045081967213 % +[24] Char â: 0.4369062034277198 % +[25] Char j: 0.26097941505216093 % +[26] Char k: 0.23757684426229508 % +[27] Char x: 0.22762201937406856 % +[28] Char y: 0.15165098733233978 % +[29] Char w: 0.11689642324888226 % +[30] Char é: 0.021830756333830106 % +[31] Char á: 0.018628912071535022 % +[32] Char q: 0.016882451564828614 % -The first 33 characters have an accumulated ratio of 0.9995676451685602. +The first 33 characters have an accumulated ratio of 0.9992402896795829. +The first 3 characters have an accumulated ratio of 0.32848477086438155. +All characters whose order is over 20 have an accumulated ratio of 0.03787956408345752. -1066 sequences found. +1337 sequences found. -First 512 (typical positive ratio): 0.9975318123681904 -Next 512 (512-1024): 0.01016534650986153 -Rest: 4.3355868061878584e-05 +First 478 (typical positive ratio): 0.9950281325668789 +Next 301 (779-478): 0.003975199459074541 +Rest: 0.0009966679740465167 -- Processing end: 2021-03-16 20:04:01.293047 +- Processing end: 2022-12-14 18:14:02.730012 diff --git a/script/BuildLangModelLogs/LangSlovakModel.log b/script/BuildLangModelLogs/LangSlovakModel.log index 429f32e..f770461 100644 --- a/script/BuildLangModelLogs/LangSlovakModel.log +++ b/script/BuildLangModelLogs/LangSlovakModel.log @@ -1,175 +1,251 @@ = Logs of language model for Slovak (sk) = - Generated by BuildLangModel.py -- Started: 2021-03-21 12:48:41.368218 +- Started: 2022-12-14 18:12:22.481654 - Maximum depth: 4 -- Max number of pages: 100 +- Max number of pages: 200 == Parsed pages == -Európska_únia (revision 7169513) -1. decembra (revision 6792273) -1. júl (revision 7066144) -1. svetová vojna (revision 7159151) -10 centov (euro) (revision 6293215) -1952 (revision 7177031) -1957 (revision 7078231) -1958 (revision 7144704) -1960 (revision 7163978) -1967 (revision 7016805) -1968 (revision 7173483) -1973 (revision 7149623) -1979 (revision 7169115) -1981 (revision 7066520) -1985 (revision 7161691) -1986 (revision 7151177) -1987 (revision 7065067) -1990 (revision 7178863) -1992 (revision 7135542) -1993 (revision 7122277) -1995 (revision 7133683) -1999 (revision 7133241) +Európska_únia (revision 7505135) +Jednotný európsky akt (revision 6849394) +Americký dolár (revision 7355343) +2013 (revision 7417007) +Organizácia Spojených národov (revision 7452235) +Zmluvy o Európskych spoločenstvách (revision 6378643) +Francúzsko (revision 7428153) +Madrid (revision 7322736) +Sanmarínska líra (revision 5313291) 1 cent (euro) (revision 6963154) -1 euro (revision 6264994) -2003 (revision 7135529) -2004 (revision 7149802) 2007 (revision 7135534) -2008 (revision 7156084) -2009 (revision 7135536) -2013 (revision 7135522) -2016 (revision 7159554) -2017 (revision 7174262) -20 centov (euro) (revision 6293208) -23. jún (revision 7052430) -2 centy (euro) (revision 6963155) -2 eurá (revision 6452782) -31. december (revision 7149783) -50 centov (euro) (revision 6293202) -5 centov (euro) (revision 6963157) -Acquis communautaire (revision 7033703) -Al Gore (revision 7146244) -Albánsko (revision 7172414) -Americký dolár (revision 7050515) +Európske spoločenstvá (revision 5809901) +Kanada (revision 7489149) +Endeavour (raketoplán) (revision 7387837) +Európske spoločenstvo pre uhlie a oceľ (revision 7388970) +Alžírsko (revision 7356228) +1999 (revision 7359099) +1993 (revision 7331018) +Pamätné euromince roku 2010 (revision 6939319) +1955 (revision 7411933) +2009 (revision 7452090) +God Save the King (revision 7435019) +Lisabonská zmluva (revision 6839508) +Euro (revision 7496154) +Ekonómia (revision 6964325) +1951 (revision 7503833) +1. január (revision 7442121) +Monacké euromince (revision 7403197) +Americké Panenské ostrovy (revision 6888602) +Mira Figarová (revision 7459248) +Fidži (revision 7269015) +Rabat (mesto) (revision 6399996) +Anglická libra (revision 7076787) +Vendée (departement) (revision 6132679) +Česká koruna (mena) (revision 7343603) +Euratom (revision 6471920) +Angličtina (revision 7323198) +Nemecko (revision 7446359) +Fínska marka (revision 7244919) +Federácia (revision 7394077) +8. december (revision 7507681) +Chorvátska kuna (revision 7416969) +9. december (revision 7286553) +1965 (revision 7407021) +HIV (revision 7358721) +Mnajdra (revision 7039457) +Talianska líra (revision 6860722) +1952 (revision 7412021) +1. decembra (revision 7501349) +Európska menová jednotka (revision 6682421) +Kuba (revision 7351612) +Ján Pavol II. (revision 7489866) +Medzinárodný menový fond (revision 7287333) +Ille-et-Vilaine (revision 5907131) +Európsky mechanizmus výmenných kurzov (revision 7050511) +2009 na Slovensku (revision 7506114) +Citroën (revision 7444181) +Marshallove ostrovy (revision 7323029) +Pugwashské konferencie o vede a svetových záležitostiach (revision 6492456) +Jozef Adamovič (revision 7454130) +Kišiňov (revision 7220681) +Tawakkul Karmánová (revision 7463239) +19. júl (revision 7358326) +Európske hospodárske spoločenstvo (revision 6927031) +Prvé národy (revision 5972801) +1830 (revision 6598080) +Hektár (revision 7287211) +Francúzština (revision 7392820) +Grécko (revision 7505377) +15. október (revision 7484010) +Rumunský lei (revision 7055613) +Eurozóna (revision 7412153) +EHS (revision 6927031) +6. august (revision 7358963) +Nórsko (revision 7325338) +Pamätné euromince roku 2013 (revision 7340709) +Stavanger (revision 7037103) +Finistère (revision 7400442) +CBS (revision 6808813) +STS-118 (revision 6496126) +Hospodárska a menová únia (Európska únia) (revision 7449673) +Vatikánska líra (revision 5313290) +18. apríl (revision 7364744) +Úradný jazyk (revision 6663801) +1944 (revision 7499325) +Meny naviazané na euro (revision 5485629) +Teória veľkého tresku (seriál) (revision 7438745) +Havana (revision 6543679) +Kanadský dolár (revision 6788634) +Pamätné euromince roku 2009 (revision 7340706) +2 eurá (revision 7433871) +Severné Mariány (revision 6081307) +Štvrťdolár (americký dolár) (revision 5347867) +Zmluva z Nice (revision 7300592) +Charles Gordon Fullerton (revision 7459694) +1931 (revision 7493391) +Štát (revision 7405634) +Francúzsky frank (revision 7217722) +Eswatini (revision 7449861) +Zmluva o založení Európskeho spoločenstva uhlia a ocele (revision 7060412) +Mangán (revision 7321690) +Turkménsko (revision 7394403) +1958 (revision 7417166) +Malála Júsufzajová (revision 7463028) +Hugo Chávez (revision 7461838) +Vaduz (revision 7425578) +Maltská líra (revision 5546923) +Spojené kráľovstvo (revision 7479740) +Singapur (štát) (revision 7396325) +26. december (revision 7493395) +Voľba prezidenta Českej republiky v roku 2013 (revision 7421358) +Božin Laskov (revision 7464759) +Írska libra (revision 5657268) +Rímske zmluvy (revision 5859841) +Karen Blacková (revision 7456061) +Taliansko (revision 7475272) +Poľské euromince (revision 7096711) +Frederik Willem de Klerk (revision 7463655) +Arktída (revision 7359079) +Al Oerter (revision 7467537) Amsterdamská zmluva (revision 7070102) -Angličtina (revision 7148052) -Angola (revision 7035956) -Antigua a Barbuda (revision 6560340) -Argentína (revision 7171908) -Arménsko (revision 7147325) -Atény (revision 7150984) -Austrália (štát) (revision 7154003) -Azory (revision 6595058) -Bahrajn (revision 7178284) -Bangladéš (revision 7147804) -Barack Obama (revision 7158748) -Barbados (revision 7178784) -Belgicko (revision 7163339) -Belgický frank (revision 6953531) -Belize (revision 7156055) -Benin (revision 7172640) -Bolívia (revision 7111159) -Botswana (revision 7158699) -Brazília (revision 7177507) -Brettonwoodská menová sústava (revision 6710540) -Brunej (revision 6975045) -Brusel (revision 7037073) -Bulharsko (revision 7177290) +Mary Simonová (revision 7470411) +Poldolár (americký dolár) (revision 5712056) +Cantal (revision 5837843) +Utrecht (revision 7289014) +Senegal (revision 7345517) +Apartheid (revision 7227068) +Španielske euromince (revision 7334333) +40. roky 20. storočia (revision 7271198) +Zámorská správna korporácia (revision 6482452) +1914 (revision 7501675) +Turks a Caicos (revision 7435094) +Cupola (revision 7362414) +Malta (revision 7483782) +1953 (revision 7448807) +1. marec (revision 7501678) +Guyana (štát) (revision 7156315) +17. september (revision 7483746) +Kingmanov útes (revision 6935024) +Portugalské euromince (revision 7505352) +21. december (revision 7354221) +Parlamentná republika (revision 7344097) +Kolumbia (revision 7421669) +Konvertibilná marka (revision 6923722) +Ázia (revision 7360089) +1940 (revision 7493446) +30. november (revision 7500793) +2000 (revision 7250532) +Západná Virgínia (revision 6902136) +Konžská demokratická republika (revision 7476082) +Chris Brown (atlét) (revision 6414324) +Šalamúnove ostrovy (štát) (revision 7435540) +2002 (revision 7305819) +Simon Helberg (revision 7461031) +18. február (revision 7149312) +Stredoeurópsky čas (revision 6718481) +Luxembursko (revision 7499588) +Švédska koruna (revision 7343598) +Rakúske euromince (revision 7431715) +Martin Morháč (revision 6625693) +Nový Zéland (revision 7492612) +1929 (revision 7499285) +1890 (revision 6813799) +Luxemburský frank (revision 5546922) +Európska únia (revision 7505135) +Sandro Botticelli (revision 7456384) +Josef Masopust (revision 7466064) +Odvetvie národného hospodárstva (revision 7201316) +Islandská koruna (revision 7343600) Bulharský lev (revision 6230899) -Bulharčina (revision 7150125) -Burkina (revision 7158783) -Burundi (revision 7049945) -Ceuta (revision 6575679) -Charles Michel (revision 7098830) -Chorvátska kuna (revision 6935490) -Chorvátsko (revision 7131429) -Chorvátčina (revision 7178832) -Clo (revision 6894735) -Cyperská libra (revision 5964697) -Cyprus (revision 7035263) -David-Maria Sassoli (revision 7032560) -David Cameron (revision 7078464) -Demokracia (revision 7049807) -Denis Mukwege (revision 6800186) -Dominika (štát) (revision 7126694) -Dominikánska republika (revision 7080374) -Drachma (novoveké Grécko) (revision 6391564) -Druhá svetová vojna (revision 7151355) -Dunaj (revision 7150320) -Dánska koruna (revision 6125942) -Dánsko (revision 7161625) -Dánčina (revision 6557304) -Džibutsko (revision 7111764) -EHS (revision 6927031) -Eduard Kukan (revision 7079321) -Egypt (revision 7151318) -Ekvádor (revision 7073543) -Ellen Johnsonová- Sirleafová (revision 7151906) -Estónska koruna (revision 6751629) -Estónsko (revision 7148919) +Zoznam nositeľov Nobelovej ceny za mier (revision 7447902) +Lee Aronsohn (revision 7454866) +27. február (revision 7350544) +Lantán (revision 6876259) +Slovinsko (revision 7409771) == End of Parsed pages == -- Wikipedia parsing ended at: 2021-03-21 13:00:32.553701 +- Wikipedia parsing ended at: 2022-12-14 18:25:49.973891 -70 characters appeared 674892 times. +69 characters appeared 857820 times. Most Frequent characters: -[ 0] Char a: 8.935503754674821 % -[ 1] Char o: 8.347409659619613 % -[ 2] Char e: 8.052103151319026 % -[ 3] Char n: 6.170320584626874 % -[ 4] Char r: 6.046300741451965 % -[ 5] Char i: 5.852195610556948 % -[ 6] Char s: 5.3632284869282785 % -[ 7] Char k: 4.751278723114217 % -[ 8] Char t: 4.600439774067555 % -[ 9] Char l: 4.167037096305779 % -[10] Char v: 4.090580418792933 % -[11] Char m: 3.1385762462734776 % -[12] Char d: 2.7853345424156752 % -[13] Char u: 2.7336225647955525 % -[14] Char p: 2.6873929458342967 % -[15] Char c: 2.5881178025521123 % -[16] Char á: 2.0701089952170126 % -[17] Char h: 2.0477350450146097 % -[18] Char j: 1.9521641981235516 % -[19] Char b: 1.921344452149381 % -[20] Char z: 1.6398179264237835 % -[21] Char y: 1.3830361005909093 % -[22] Char ý: 1.2827237543192096 % -[23] Char í: 0.8906610242824038 % -[24] Char č: 0.8473948424340486 % -[25] Char é: 0.7884224438873183 % -[26] Char ú: 0.7808656792494206 % -[27] Char g: 0.749897761419605 % -[28] Char f: 0.6475110091688744 % -[29] Char š: 0.6189138410293795 % -[30] Char ž: 0.4720755320851336 % -[31] Char ľ: 0.4089543215803418 % -[32] Char ó: 0.3095310064425123 % -[33] Char ť: 0.24344635882481935 % -[34] Char w: 0.11735210967088068 % -[35] Char ô: 0.10297943967331069 % -[36] Char ä: 0.09142203493299668 % -[37] Char x: 0.08312441101687382 % -[38] Char ň: 0.07201152184349496 % -[39] Char ď: 0.06993711586446424 % -[40] Char q: 0.017187935254825957 % -[41] Char ë: 0.011112889173378852 % -[42] Char ř: 0.010075686183863493 % -[43] Char ü: 0.009186655049993185 % -[44] Char ě: 0.008445795771767926 % -[45] Char ö: 0.007260420926607517 % -[46] Char ĺ: 0.006371389792737208 % -[47] Char ć: 0.006223217937092157 % -[48] Char ŕ: 0.0044451556693515405 % +[ 0] Char a: 8.634095731039146 % +[ 1] Char o: 8.548646569210323 % +[ 2] Char e: 8.07651954955585 % +[ 3] Char n: 6.528758947098459 % +[ 4] Char i: 5.959175584621482 % +[ 5] Char r: 5.869296589028001 % +[ 6] Char s: 4.982863537805134 % +[ 7] Char t: 4.648644237718869 % +[ 8] Char k: 4.502109999766851 % +[ 9] Char v: 4.204028817234385 % +[10] Char l: 3.9301951458347903 % +[11] Char m: 3.220139423189014 % +[12] Char u: 2.861672611969877 % +[13] Char p: 2.850248303839966 % +[14] Char d: 2.8298477536079827 % +[15] Char c: 2.579678720477489 % +[16] Char h: 2.081089272807815 % +[17] Char á: 2.0390058520435526 % +[18] Char j: 1.9357207805833392 % +[19] Char z: 1.778111958219673 % +[20] Char b: 1.6041826956704204 % +[21] Char y: 1.4230258096104076 % +[22] Char ý: 1.271012566738943 % +[23] Char í: 0.9336457531883147 % +[24] Char č: 0.877689958266303 % +[25] Char é: 0.8265137208272131 % +[26] Char ú: 0.8254645496724256 % +[27] Char g: 0.683476720057821 % +[28] Char š: 0.6821943997575248 % +[29] Char f: 0.6049054580214963 % +[30] Char ž: 0.5438203818983004 % +[31] Char ľ: 0.3925065864633606 % +[32] Char ó: 0.32302814110186284 % +[33] Char ť: 0.2678883681891306 % +[34] Char w: 0.11809004219999533 % +[35] Char ô: 0.11680772189969924 % +[36] Char ä: 0.1057331374880511 % +[37] Char x: 0.09897181226830803 % +[38] Char ň: 0.08743092956564313 % +[39] Char ď: 0.07239280968035253 % +[40] Char q: 0.014105523303257093 % +[41] Char ë: 0.010025413256860414 % +[42] Char ĺ: 0.009792264111352032 % +[43] Char ř: 0.008626518383810124 % +[44] Char ü: 0.007227623510759833 % +[45] Char ě: 0.00547900491944697 % +[46] Char ŕ: 0.005362430346692779 % -The first 49 characters have an accumulated ratio of 0.9998118217433309. +The first 47 characters have an accumulated ratio of 0.9997924972604978. +The first 5 characters have an accumulated ratio of 0.37747196381525266. +All characters whose order is over 27 have an accumulated ratio of 0.034743885663659047. -1410 sequences found. +1383 sequences found. -First 773 (typical positive ratio): 0.9950030300775062 -Next 277 (1050-773): 0.003999347913144824 -Rest: 0.0009976220093489419 +First 768 (typical positive ratio): 0.9950176374237621 +Next 243 (1011-768): 0.0039886852153107055 +Rest: 0.0009936773609271476 -- Processing end: 2021-03-21 13:00:33.050085 +- Processing end: 2022-12-14 18:25:50.477973 diff --git a/script/BuildLangModelLogs/LangSloveneModel.log b/script/BuildLangModelLogs/LangSloveneModel.log index bd280af..310078e 100644 --- a/script/BuildLangModelLogs/LangSloveneModel.log +++ b/script/BuildLangModelLogs/LangSloveneModel.log @@ -1,53 +1,227 @@ = Logs of language model for Slovene (sl) = - Generated by BuildLangModel.py -- Started: 2021-03-21 14:46:51.759879 +- Started: 2022-12-14 18:14:43.766544 - Maximum depth: 4 -- Max number of pages: 1 +- Max number of pages: 200 == Parsed pages == -Ljubljana (revision 5468628) -1689 (revision 4230028) +Ljubljana (revision 5845001) +25. junij (revision 5725754) +Anton Vodnik (revision 5756756) +Seznam občin v Sloveniji (revision 5812503) +Protireformacija (revision 5597661) +Marija Vogelnik (revision 5530732) +Academia operosorum Labacensis (revision 5228146) +Wayback Machine (revision 5793624) +Zdravstveni dom Ljubljana (revision 5830659) +Henrika Langus (revision 5754885) +Kranj (revision 5841490) +Zdravstveni dom Ljubljana Bežigrad (revision 1248325) +Seznam slovenskih slikarjev (revision 5852050) +URL (revision 5769695) +15. julij (revision 5791850) +Barok (revision 5810195) +Avstro-Ogrska (revision 5751429) +5. junij (revision 5705966) +Občina Radovljica (revision 5721756) +1876 (revision 5571931) +Prežihov Voranc (revision 5832321) +Zdravstveni dom Ljubljana Moste-Polje (revision 687420) +Frančišek Krištof Bogataj (revision 4464230) +Seznam rimskokatoliških samostanov v Sloveniji (revision 5726922) +Programski jezik java (revision 5806424) +Občina Semič (revision 5694516) +1671 (revision 4230004) +Bajt (revision 5759314) +15. junij (revision 5427754) +6. julij (revision 5450369) +SUDOC (identifikator) (revision 5038792) +Križ na gori (revision 4470239) +CONOR (identifikator) (revision 5817017) +Siniša Anđelković (revision 5493259) +27. avgust (revision 5400489) +Turnišče (revision 5791724) +Tenetiše, Kranj (revision 5332603) +15. oktober (revision 5821286) +SICRIS (identifikator) (revision 5746332) +Črna gora (revision 5780318) +RTV Ljubljana (revision 5833945) +Ciciban (revija) (revision 5519988) +Škof (revision 5737098) +1964 (revision 5727085) +1901 (revision 5118080) +Zdravstveni dom Ljubljana Center (revision 2812862) +Slovenska biografija (revision 5744738) +31. oktober (revision 5826716) +Praga (revision 5826163) +30. november (revision 5843640) +Augsburški verski mir (revision 4762987) +15. vlada Republike Slovenije (revision 5852342) +Republika Slovenija (revision 5851315) +1960 (revision 5643848) +Josip Ribičič (revision 5776186) +19. julij (revision 5400443) +Idrija (revision 5774634) +Inkvizicija (revision 5623144) +19. april (revision 5499445) +Svetovni splet (revision 5623360) +Ajdovščina (revision 5705518) +Arhitekt (revision 5485114) +WHOIS (revision 5119932) +Ekspresionizem (revision 5713471) +OF (revision 5783709) +Socialistična federativna republika Jugoslavija (revision 5775148) +Zdravstvo (revision 5794058) +Indija (revision 5802236) +Nürnberg (revision 5845119) +Zdravstveni dom Ljubljana Vič-Rudnik (revision 687423) +Delo (časopis) (revision 5846924) +VIAF (identifikator) (revision 5545891) +Esej (revision 5649081) +Slovenski biografski leksikon (revision 5273133) +1272 (revision 4571182) +Marko Gerbec (revision 5686618) +Občina Žalec (revision 5668168) +Simonija (revision 5848400) +Spletna stran (revision 5789409) +Znanost (revision 5794271) +Mestni linijski prevoz Kranj (revision 5780663) +Zdravstveni dom Ljubljana Šiška (revision 5794056) +SBI (identifikator) (revision 5744738) +Izobraževanje (revision 5775188) +Književnost (revision 5824541) +Občina Radeče (revision 5716401) +Lirika (revision 5643045) +Seznam zdravstvenih ustanov v Ljubljani (revision 3172002) +31. marec (revision 5818816) +Kolegij (revision 5763216) +Mesto (revision 5560057) +Virginija (revision 4941396) +Nizozemska (revision 5834147) +Slovenski evrokovanci (revision 5789153) +Portret (revision 5800433) +Ustavno sodišče (revision 5837444) +Strežnik (revision 5169883) +Društvo (revision 5770980) +Zastava Slovenije (revision 5793955) +Mestna občina Ljubljana (revision 5758866) +Smrekarjeva nagrada (revision 5737064) +Tridentinski koncil (revision 5791570) +Matej Langus (revision 5755642) +Surrealizem (revision 5723650) +Izola (revision 5803485) +Novo mesto (revision 5832728) +Bovec (revision 5722914) +Identifikator digitalnega objekta (revision 5830808) +Nadškof (revision 5477348) +Občina Cerklje na Gorenjskem (revision 5343819) +Knjiga (revision 5818947) +Občina Muta (revision 5600697) +Hiperpovezava (revision 5774360) +Seznam naselij v Sloveniji (revision 5849725) +1900. (revision 5572940) +1310 (revision 5839384) +19. stoletje (revision 5369893) +Meja, Kranj (revision 5332579) +1917 (revision 5699403) +Zgodovina Slovenije (revision 5840441) +Evropa (revision 5840336) +Javornik, Kranj (revision 5332572) +Četrti lateranski koncil (revision 5794585) +Zalog, Kranj (revision 5332606) +Čile (revision 5809346) +Ernst Ludwig Kirchner (revision 4748071) +NK Maribor (revision 5827762) +Boris Kidrič (revision 5823569) +Turizem v Sloveniji (revision 5733081) +Republika Črna Gora (revision 5780318) +Novela (književnost) (revision 5618646) +Dvojezičnost (revision 5799533) +4. junij (revision 5823896) +Teologija (revision 5764463) +Avtocesta (revision 5799828) +Judje (revision 5776337) +Freskant (revision 5836014) +Tone Hrovat (revision 5811657) +Naselje (revision 5846409) +Šentvid pri Stični (revision 5794839) +Malta (revision 5779553) +Prešernov trg, Ljubljana (revision 5566256) +1880. (revision 5193207) +Merska enota (revision 4758215) +18. september (revision 5571615) +Neodvisnost (revision 4918206) +OECD (revision 5672387) +Grb Slovenije (revision 5699153) +Theodor W. Adorno (revision 5501686) +Makedonščina (revision 5834583) +Zakrament (revision 5612915) +Oceanija (revision 5805483) +Giuseppe Verdi (revision 5812764) +Antena (revision 5756674) +Napoleonske vojne (revision 5845765) +16. april (revision 5729320) +7. januar (revision 5452242) +Malo Trebeljevo (revision 5758855) +Habsburžani (revision 5758069) +Občina Starše (revision 5489237) +Advent (revision 5733291) +Japonska (revision 5849824) +Gozd, Ajdovščina (revision 5330564) +Augsburg (revision 5796795) +Dedukcija (revision 5558477) +1730 (revision 4633009) +Kalvinizem (revision 5477790) +Hroštule (revision 5774501) +Seznam desetletij (revision 5341034) +Ogrska (revision 5743966) +Tajska (revision 5815033) +Andronik III. Paleolog (revision 5197524) +Almadén (revision 5606144) == End of Parsed pages == -- Wikipedia parsing ended at: 2021-03-21 14:47:12.578759 +- Wikipedia parsing ended at: 2022-12-14 18:20:53.180577 -34 characters appeared 32235 times. +64 characters appeared 1145033 times. Most Frequent characters: -[ 0] Char e: 10.097719869706841 % -[ 1] Char a: 9.846440204746393 % -[ 2] Char i: 8.760663874670389 % -[ 3] Char o: 8.515588645881806 % -[ 4] Char n: 7.299519156196681 % -[ 5] Char l: 5.546765937645416 % -[ 6] Char j: 5.264464091825656 % -[ 7] Char r: 5.053513261982317 % -[ 8] Char s: 5.000775554521483 % -[ 9] Char t: 4.814642469365596 % -[10] Char v: 4.374127501163332 % -[11] Char k: 3.4993020009306655 % -[12] Char m: 2.9253916550333487 % -[13] Char d: 2.888165038002172 % -[14] Char p: 2.869551729486583 % -[15] Char u: 2.574841011323096 % -[16] Char b: 2.233597021870638 % -[17] Char z: 1.8458197611292075 % -[18] Char g: 1.48596246316116 % -[19] Char č: 1.181945090739879 % -[20] Char š: 1.0671630215604158 % -[21] Char h: 1.0361408407011012 % -[22] Char c: 0.9492787342950209 % -[23] Char ž: 0.5739103458973166 % -[24] Char f: 0.210950829843338 % -[25] Char x: 0.018613308515588647 % -[26] Char w: 0.018613308515588647 % -[27] Char y: 0.015511090429657206 % -[28] Char ü: 0.009306654257794323 % -[29] Char ö: 0.006204436171862882 % -[30] Char q: 0.006204436171862882 % -[31] Char ř: 0.003102218085931441 % -[32] Char á: 0.003102218085931441 % -[33] Char ý: 0.003102218085931441 %
\ No newline at end of file +[ 0] Char o: 23.108416962655227 % +[ 1] Char a: 8.59486145814138 % +[ 2] Char e: 8.358711058982578 % +[ 3] Char i: 7.870166187350058 % +[ 4] Char n: 6.085588799624116 % +[ 5] Char r: 4.668337069761308 % +[ 6] Char s: 4.369306386802826 % +[ 7] Char l: 3.989492005907253 % +[ 8] Char t: 3.8656527803128817 % +[ 9] Char v: 3.695439345416246 % +[10] Char j: 3.6508118106639724 % +[11] Char k: 3.216239182626177 % +[12] Char d: 2.745685058858566 % +[13] Char p: 2.4462177072625857 % +[14] Char m: 2.322727816578212 % +[15] Char u: 1.8705137755855072 % +[16] Char z: 1.683357597553957 % +[17] Char b: 1.499083432529892 % +[18] Char g: 1.3423193916681877 % +[19] Char č: 0.956042314937648 % +[20] Char h: 0.9555183125726507 % +[21] Char c: 0.9322002073302691 % +[22] Char š: 0.7818988623035319 % +[23] Char ž: 0.4426073309677538 % +[24] Char f: 0.37143034305561495 % + +The first 25 characters have an accumulated ratio of 0.9982262519944842. +The first 2 characters have an accumulated ratio of 0.3170327842079661. +All characters whose order is over 19 have an accumulated ratio of 0.03483655056229821. + +912 sequences found. + +First 420 (typical positive ratio): 0.9950318187902709 +Next 144 (564-420): 0.003969660901056105 +Rest: 0.0009985203086729788 + +- Processing end: 2022-12-14 18:20:53.647128 diff --git a/script/BuildLangModelLogs/LangSpanishModel.log b/script/BuildLangModelLogs/LangSpanishModel.log index 3b3611c..a6770d3 100644 --- a/script/BuildLangModelLogs/LangSpanishModel.log +++ b/script/BuildLangModelLogs/LangSpanishModel.log @@ -1,161 +1,257 @@ = Logs of language model for Spanish (es) = - Generated by BuildLangModel.py -- Started: 2021-03-16 11:26:55.275471 +- Started: 2022-12-14 18:14:38.434845 - Maximum depth: 4 -- Max number of pages: 100 +- Max number of pages: 200 == Parsed pages == -Wikipedia:Portada (revision 123425818) -15 de marzo (revision 134002604) -16 de marzo (revision 133912948) -17 de marzo (revision 134014922) -1971 (revision 133880223) -1996 (revision 133952177) -2021 (revision 134007556) -Accipitridae (revision 132476150) -Alan Tudyk (revision 133512405) -América del Norte (revision 133943336) -Aquila (animal) (revision 117806396) -Aquila chrysaetos (revision 131580419) -Aranjuez (revision 134014704) -Aurora Cornu (revision 134003763) -Beatlemanía en el Reino Unido (revision 127586904) -Bebe Daniels (revision 134008877) -Bob Walkup (revision 134014429) -Carlos IV de España (revision 133996847) -Carlos Velasco Carballo (revision 133836906) -Centre Bell (revision 121340657) -Chemancheri Kunhiraman Nair (revision 134003097) -Comunidad de Madrid (revision 133999674) -Copa Libertadores 2021 (revision 134005909) -Copa Libertadores Femenina 2020 (revision 134010574) -Derecho del consumo (revision 133897891) -Elecciones generales de los Países Bajos de 2021 (revision 133978338) -España (revision 133914408) -Especie (revision 133121989) -Estados Unidos (revision 134015635) -Eurasia (revision 133581203) -Familia (biología) (revision 132469743) -Fernando VII de España (revision 133996527) -Fernando VI de España (revision 133038817) -Frankie de la Cruz (revision 134001053) -Fundación Wikimedia (revision 133870365) -Género (biología) (revision 132578267) -Henry Darrow (revision 134014606) -Hueso oracular (revision 133943486) -Hungría (revision 133720525) -Incendios en la Patagonia argentina de 2021 (revision 134013731) -Iztapalapa (revision 133879018) -Jim Dornan (revision 134003749) -Josep Anton Codina Olivé (revision 134001289) -Laxman Pai (revision 134003882) -Lorenzo I Suárez de Figueroa (revision 130396052) -Manuel Godoy (revision 133790185) -Mark Lubotski (revision 134012323) -Marvin Hagler (revision 133992695) -Motín de Aranjuez (revision 133995861) -Orden de Santiago (revision 132833929) -Pandemia de COVID-19 (revision 133965486) -Partido judicial de Aranjuez (revision 119111968) -Pirámide de Mayo (revision 134001660) -Problema de los puentes de Königsberg (revision 133982384) -Protestas en Birmania de 2021 (revision 134001180) -Protestas en Paraguay de 2021 (revision 133946792) -Raoul Casadei (revision 134004118) -Reconquista (revision 134015214) -Reserva del Regajal-Mar de Ontígola (revision 130660000) -Reserva natural del Carrizal de Villamejor (revision 133997932) -Revolución de Octubre (revision 133949459) -Reyes Católicos (revision 133817736) -Río Jarama (revision 133989542) -Sitio de Osaka (revision 133841594) -Tajo (revision 133599349) -The Beatles (revision 133923045) -The Beatles en los Estados Unidos (revision 132489522) -Thomas E. Dewey (revision 133282206) -Tirreno-Adriático 2021 (revision 133997271) -Torneo de Acapulco 2021 (revision 134007806) -Torneo de Dubái 2021 (revision 133993636) -Wikipedia en español (revision 133678639) -Zona especial de conservación (revision 125067184) -África (revision 133842601) -Águila (revision 133882643) -0 de marzo (revision 124578320) -1086 (revision 131546903) -10 de marzo (revision 133880871) -1190 (revision 133725464) -11 de marzo (revision 133917865) -12 Horas de Sebring (revision 130945879) -12 de marzo (revision 133976376) -1311 (revision 130818429) -13 de febrero (revision 133955522) -13 de marzo (revision 133955664) -1416 (revision 130880976) -1455 (revision 130905583) -1493 (revision 130905628) -14 de marzo (revision 133988159) -1530 (revision 130937867) -1552 (revision 133149262) -1575 (revision 130983277) -1583 (revision 130984233) -1591 (revision 130984579) -1594 (revision 130984689) -15 de abril (revision 134002206) -15 de febrero (revision 133748957) -1638 (revision 131037338) -1657 (revision 131045532) -1660 (revision 131045617) -1666 (revision 132768900) +España (revision 147846877) +Golfo de Vizcaya (revision 147055501) +Baños de la Encina (revision 147079175) +Tomates (revision 147602893) +Incidente de la isla Perejil (revision 147813920) +Cultura de los Millares (revision 147715388) +Système universitaire de documentation (revision 143750095) +Esperanza de vida (revision 147875006) +Batalla de Covadonga (revision 147758019) +Catolicismo (revision 147926468) +Alfonso I de Aragón (revision 147883096) +Zona vascófona de Navarra (revision 141084534) +Oriente Próximo (revision 147910543) +Administración apostólica de Albania Meridional (revision 142851547) +Burgos de Pamplona (revision 146933644) +Universidad de Alcalá (revision 146652064) +Fantova (revision 124905015) +Arive (revision 147900679) +Galipot (revision 145299963) +El Frago (revision 147907701) +Villanueva de Aézcoa (revision 145392582) +Beinza-Labayen (revision 143806179) +Llanura abisal (revision 143251399) +Control de autoridades (revision 147901716) +Troya (revision 146801619) +Betelu (revision 143364398) +Botánica (revision 146722327) +Creciente Fértil (revision 147343273) +Premio en Ciencias Económicas en memoria de Alfred Nobel (revision 145733932) +Gemeinsame Normdatei (revision 146776905) +Ciencia (revision 147395420) +Avempace (revision 146577045) +Loira Atlántico (revision 147836674) +Irak (revision 147592690) +Bilbao (revision 147854614) +Vitamina A (revision 147834451) +Provincia de Almería (revision 147850090) +Barroco (revision 147928221) +Concarneau (revision 147558555) +Zugarramurdi (revision 147887215) +Wayback Machine (revision 147380387) +Iglesia ortodoxa (revision 147489979) +Envejecimiento humano (revision 147816794) +Gourmet (revision 138056985) +Organización Mundial de la Salud (revision 147853669) +Cella (revision 146894815) +Índice de pobreza multidimensional (revision 146948111) +Educación sexual (revision 146241369) +Radical (química) (revision 147034631) +Al-Ándalus (revision 147780774) +Instituto Nacional de Estadística (España) (revision 146251702) +Grañén (revision 147721298) +Alfabeto fonético de la OTAN (revision 147783603) +Al-Maqqari (revision 146668368) +Base Aérea de Zaragoza (revision 131156543) +Al-Hurr (revision 128585733) +Sudán del Sur (revision 147891889) +Gijón (revision 147869866) +El Cairo (revision 146975228) +722 (revision 138667902) +Vitamina B5 (revision 144008272) +Claudio Sánchez-Albornoz (revision 144045436) +Landas (departamento) (revision 147836637) +Agustín de Hipona (revision 147924302) +OpenStreetMap (revision 146301100) +Egipcio (revision 147734070) +Levante español (revision 142768407) +Materialismo histórico (revision 147477625) +Oppas (revision 147059211) +Larráun (revision 144799818) +Mohammed VI de Marruecos (revision 147567782) +Proteína (revision 147684920) +Azerbaiyán (revision 147395599) +Asturias de Santillana (revision 147530668) +Peñalosa (Baños de la Encina) (revision 146809056) +Dólar (revision 147580248) +Legión Española (revision 147326269) +Idioma azerí (revision 147695143) +2001 (revision 147862309) +Lenguas bereberes (revision 147681674) +Library of Congress Control Number (revision 140572816) +País (revision 147227956) +Mecenazgo (revision 143339455) +Canadá (revision 147657905) +Zubieta (revision 144872335) +Azuela (revision 145618154) +Zarauz (revision 147525360) +Padrón municipal (revision 138762215) +José Luis Rodríguez Zapatero (revision 147913837) +Saldías (revision 138428536) +Ejército de Tierra Español (revision 147897324) +Yizia (revision 146859865) +Equidae (revision 145612148) +Archieparquía (revision 139338762) +Almizaraque (revision 138935790) +Echarri Aranaz (revision 146790066) +Placa ibérica (revision 147730673) +Aguas internacionales (revision 145803339) +Cuenca (accidente geográfico) (revision 143138322) +Sierra Morena (Jaén) (revision 147924089) +Canal de la Mancha (revision 147523338) +Código postal (revision 147000607) +Acta de Supremacía (revision 144310990) +Navantia (revision 147650925) +Our World in Data (revision 146140389) +Salud pública (revision 147682212) +Reservas estratégicas de petróleo (revision 144165636) +Tabernas (revision 145519626) +Lascuarre (revision 144997708) +África Oriental (revision 146427397) +Bajo Almanzora (revision 146808163) +Gallaecia (revision 147129153) +Alsasua (revision 147200852) +América del Norte (revision 147904330) +Revolución Industrial (revision 147701019) +Autol (revision 147896478) +Edicto de Tesalónica (revision 145638583) +Unión Europea (revision 147719558) +Museo Pushkin (revision 147488412) +Compendio del Catecismo de la Iglesia católica (revision 131933215) +Tholos (revision 146812149) +Municipio (España) (revision 147602504) +Olea europaea (revision 147662926) +Comunidad autónoma (revision 147882369) +Ondárroa (revision 147765687) +778 (revision 145057178) +Grupo Prisa (revision 147780795) +Gran Río Artificial (revision 141803019) +Liechtenstein (revision 147718277) +China (revision 147903615) +Hégira (revision 146807833) +Pozo artesiano (revision 143464501) +Población de derecho (revision 126835996) +Encyclopædia Iranica (revision 142357780) +1784 (revision 143107902) +Capital (política) (revision 147065738) +San Juan de Gaztelugatxe (revision 147641698) +ISSN (revision 147877292) +Ciencias formales (revision 145700643) +Rebelión de Pontiac (revision 147871955) +Provincia de Toledo (revision 147626596) +Segovia (revision 147741609) +Real Academia Gallega (revision 146767599) +Software de código abierto (revision 147813773) +Población mundial (revision 147900051) +Idioma portugués (revision 147750173) +Averroes (revision 147896976) +Bellas artes (revision 147435861) +Coordenadas geográficas (revision 147900581) +Concejo abierto (revision 147602486) +Comarca (revision 147589053) +Sila Calderón (revision 146921141) +República de Artsaj (revision 147847663) +Oasys MiniHollywood (revision 145324843) +Río Saja (revision 147183959) +Museo Petrie (revision 136296852) +Carta (revision 147714437) +Groenlandia (revision 147872406) +CEDAW (revision 147048175) +Barbazan-Debat (revision 120346465) +Instituto Nacional de Estadística de España (revision 146251702) +Badajoz (revision 147897251) +Puente del Arenal (revision 144958892) +Robot (revision 147815514) +Código postal de Chile (revision 146012494) +Estructura primaria de las proteínas (revision 147166640) +Biblioteca Nacional de la Dieta (revision 144085519) +Sufragio pasivo (revision 132123198) +Cabo de Gata (revision 146811697) +Petróleos Mexicanos (revision 147789018) +Cister (revision 147853981) +Casa de la Vega (revision 132851298) +Tercera revolución industrial (revision 146951299) +Plataforma Solar de Almería (revision 142991140) +Señorío de Vizcaya (revision 147527552) +Eneas (revision 147814979) +Austen Henry Layard (revision 136667881) +Padres Barnabitas (revision 141823753) +Richard Branson (revision 147632871) +Gobernanza (revision 147702601) +Tudor (revision 146104381) +1990 (revision 147834820) +ChEMBL (revision 144031401) +Poeta (revision 143173748) +Página web (revision 147843504) +Iberoamérica (revision 147927619) +Las brujas de Zugarramurdi (revision 145399459) +Alejandro Herculano (revision 147590922) +Corriente continua (revision 147833964) +Regimiento Acorazado «Pavía» n.º 4 (revision 143396700) +Canadá en los Juegos Olímpicos (revision 146305207) +Densidad de población (revision 147587635) +Archieparquía mayor de Ernakulam-Angamaly (revision 142384800) +Archieparquía mayor de Trivandrum (revision 147056812) +Puerta de Purchena (revision 143814321) == End of Parsed pages == -- Wikipedia parsing ended at: 2021-03-16 11:33:00.148262 +- Wikipedia parsing ended at: 2022-12-14 18:18:05.348319 -54 characters appeared 1548023 times. +59 characters appeared 2847500 times. -First 33 characters: -[ 0] Char e: 12.61415366567551 % -[ 1] Char a: 11.863648020733542 % -[ 2] Char o: 8.178496055937154 % -[ 3] Char n: 7.268238262609793 % -[ 4] Char i: 6.931486160089352 % -[ 5] Char s: 6.9263182782167965 % -[ 6] Char r: 6.65558586661826 % -[ 7] Char l: 5.899266354569667 % -[ 8] Char d: 5.353731824397958 % -[ 9] Char t: 4.62951777848262 % -[10] Char c: 4.489080588595906 % -[11] Char u: 3.50666624462298 % -[12] Char m: 2.6851022239333653 % -[13] Char p: 2.477159577086387 % -[14] Char b: 1.394552923309279 % -[15] Char g: 1.3049547713438365 % -[16] Char v: 0.9327380794729794 % -[17] Char f: 0.9320274957155029 % -[18] Char y: 0.9299603429664806 % -[19] Char ó: 0.786745416573268 % -[20] Char h: 0.7480509010525037 % -[21] Char í: 0.5318396432094356 % -[22] Char q: 0.49553527305472855 % -[23] Char z: 0.47085863711327286 % -[24] Char j: 0.408844054642599 % -[25] Char á: 0.39095026365887325 % -[26] Char é: 0.305228023097848 % -[27] Char ñ: 0.23759336909076934 % -[28] Char x: 0.19883425504659816 % -[29] Char k: 0.19489374511877408 % -[30] Char ú: 0.13029522171182212 % -[31] Char w: 0.10923610308115578 % -[32] Char ü: 0.0067182464343230035 % +Most Frequent characters: +[ 0] Char e: 12.567269534679543 % +[ 1] Char a: 12.03585601404741 % +[ 2] Char o: 8.071571553994731 % +[ 3] Char n: 7.20955223880597 % +[ 4] Char s: 7.153608428446005 % +[ 5] Char i: 7.078595258999123 % +[ 6] Char r: 6.54683055311677 % +[ 7] Char l: 5.956769095697981 % +[ 8] Char d: 5.235258999122037 % +[ 9] Char c: 4.664758560140474 % +[10] Char t: 4.620403863037752 % +[11] Char u: 3.5088323090430205 % +[12] Char m: 2.6934152765583845 % +[13] Char p: 2.457243195785777 % +[14] Char b: 1.397892888498683 % +[15] Char g: 1.291624231782265 % +[16] Char v: 0.9327480245829676 % +[17] Char y: 0.8672519754170325 % +[18] Char f: 0.856505706760316 % +[19] Char ó: 0.8456540825285339 % +[20] Char h: 0.6400702370500438 % +[21] Char í: 0.5843020193151888 % +[22] Char q: 0.5208077260755049 % +[23] Char z: 0.4422827041264267 % +[24] Char á: 0.40660228270412646 % +[25] Char j: 0.3674451273046532 % +[26] Char é: 0.29892888498683057 % +[27] Char x: 0.24361720807726078 % +[28] Char ñ: 0.18121158911325724 % +[29] Char ú: 0.12684811237928006 % +[30] Char k: 0.11466198419666375 % +[31] Char w: 0.0574539069359087 % +[32] Char ü: 0.007901668129938543 % -The first 33 characters have an accumulated ratio of 0.9998830766726332. +The first 33 characters have an accumulated ratio of 0.9998377524143984. +The first 4 characters have an accumulated ratio of 0.39884249341527656. +All characters whose order is over 20 have an accumulated ratio of 0.0335206321334504. -1002 sequences found. +1131 sequences found. -First 512 (typical positive ratio): 0.9966074680689881 -Next 512 (512-1024): 6.718246434323004e-05 -Rest: 3.209238430557093e-17 +First 468 (typical positive ratio): 0.9950191343195147 +Next 177 (645-468): 0.0039894116732021034 +Rest: 0.0009914540072831768 -- Processing end: 2021-03-16 11:33:00.247475 +- Processing end: 2022-12-14 18:18:05.461637 diff --git a/script/BuildLangModelLogs/LangSwedishModel.log b/script/BuildLangModelLogs/LangSwedishModel.log index 26104e1..6e1aa5c 100644 --- a/script/BuildLangModelLogs/LangSwedishModel.log +++ b/script/BuildLangModelLogs/LangSwedishModel.log @@ -1,150 +1,249 @@ = Logs of language model for Swedish (sv) = - Generated by BuildLangModel.py -- Started: 2021-03-16 20:20:06.144954 +- Started: 2022-12-14 18:18:23.937740 - Maximum depth: 4 -- Max number of pages: 100 +- Max number of pages: 200 == Parsed pages == -Kakapo (revision 48946696) -Akut hotad (revision 45694757) -Aotearoa (revision 48764847) -Arkive (revision 45404194) -Art (revision 48819963) -Artepitet (revision 48819963) -Auckland (revision 48740415) -Auktorsnamn (revision 46648298) -BBC (revision 48945370) -Basalomsättning (revision 48638233) -Beilschmiedia tawa (revision 47662851) -Berguv (revision 47572081) -Betesmark (revision 47837257) -Biodiversity Heritage Library (revision 48152021) -Biotop (revision 48969696) -BirdLife International (revision 47616784) -British Museum (revision 48501908) -Bröstben (revision 48379566) -CITES (revision 47938046) -Dacrydium cupressinum (revision 47442085) -Digital object identifier (revision 47511062) -Djur (revision 48964290) -Djurpark (revision 48242363) +Kakapo (revision 49828871) +Ordning (biologi) (revision 49477220) +Könsdimorfism (revision 49227758) +Understam (revision 37821817) +Näbb (revision 50932877) +Ekosystem (revision 51621713) +Mana Island (revision 50706974) +Taiaha (revision 24936148) +Kelp (revision 49338041) +Sir David Attenborough (revision 51607859) +Maoripapegojor (revision 51427181) +Fasan (revision 49697043) +Ö (landområde) (revision 51150176) +Leicestershire (revision 47632046) +Proximal (revision 49685650) +Theropoder (revision 51150214) +Ekologi (revision 51390308) +Inlandsis (revision 51265091) +Årsmedeltemperatur (revision 29488423) +Britter (revision 49461730) +Hokkaido (revision 49200550) +Undersektion (revision 44004259) +National Library of Australia (revision 48833796) Domän (biologi) (revision 48975224) -Don Merton (revision 48407169) -Douglas Adams (revision 47251802) -Däggdjur (revision 48794669) -Ekologisk nisch (revision 48844778) -Ekosystem (revision 48570659) -Endemisk (revision 48546826) -Eukaryoter (revision 48898436) -Evolution (revision 49003401) -Familj (biologi) (revision 48771961) -Femininum (revision 46628147) -Fjäder (biologi) (revision 48641138) -Fjäderdräkt (revision 48641138) -Fladdermöss (revision 48746998) -Flygg (revision 48763776) -Fossilworks (revision 43519389) -Frukter (revision 48807025) -Frö (revision 46332448) -Fylum (revision 48212330) -Fågelläte (revision 48681377) -Fåglar (revision 48837894) -Fåglarnas liv (revision 48837894) -Genitiv (revision 48658908) -George Edward Grey (revision 46365447) -George Robert Gray (revision 43056128) -Global Biodiversity Information Facility (revision 40116158) -Haasts örn (revision 48440980) -Hauturu/Little Barrier Island (revision 20537378) -Hermelin (revision 48863152) -Hertz (revision 48548540) -Hjortdjur (revision 48740321) -Hund (revision 48989960) -Husdjur (revision 48155297) -Huskatt (revision 47647609) -Hāngi (revision 46574175) -IUCN (revision 49006187) -Iller (revision 48765500) -Inaturalist (revision 48552803) -Infraröd (revision 48615998) -Integrated Taxonomic Information System (revision 48591706) -Internationella naturvårdsunionen (revision 49006187) -Internet Archive (revision 48979443) -Jordbruk (revision 48448896) -Kahurangi National Park (revision 47659423) -Kamouflage (revision 47671382) -Kaniner (revision 48911042) -Kapiti Island (revision 48553791) -Katt (revision 48986224) -Kelp (revision 46077553) -Kivier (revision 48467049) +Vingtäckare (revision 51009246) +Sexuell läggning (revision 51518165) +England (revision 51638467) +Fett (revision 50390502) +Kemisk energi (revision 51629106) +Jämställdhet (revision 51603771) +International Commission on Zoological Nomenclature (revision 50077719) +Kaka (fågel) (revision 46220460) +1926 (revision 51173302) +Transperson (revision 51622251) +Land (revision 50379893) +Underserie (revision 44004261) +Infraklass (revision 44944834) +Nanofylum (revision 48212330) +Infrafylum (revision 48212330) +Habitat (revision 51634899) +Natriumglutamat (revision 51440450) +Suva (revision 49858077) +Internationella naturvårdsunionen (revision 49705198) +Kamouflage (revision 51589424) +Fauna (revision 50265422) +Solljus (revision 50272000) +Fiji Time (revision 51628863) +Internet Archive (revision 51051535) +Form (biologi) (revision 44857646) +Meter över havet (revision 49865837) +Serie (biologi) (revision 44004261) +Dicynodonter (revision 43852828) +Öken (revision 50057233) +Tredje könet (revision 51617056) +Division (biologi) (revision 46962848) +Neognathae (revision 49351226) +Könsmaktsordning (revision 49908397) +Smakförstärkare (revision 51533831) +Infraordning (revision 49477220) +Organism (revision 51537725) +Fisködlor (revision 51101913) +Nederbörd (revision 50294650) +Skrakar (revision 49421476) +Myrpiggsvin (revision 48865885) +Mansforskning (revision 47506745) +Fiji Summer Time (revision 51628863) +Rike (biologi) (revision 50937218) +Kvadratkilometer (revision 51146141) +Kannasläktet (revision 49955866) +Purdah (revision 49269676) +Litoral (revision 47388601) +Engelska kanalen (revision 50133974) +Undersläkte (revision 51622482) +Svärdfiskar (revision 51035233) Klass (biologi) (revision 44944834) -Kroppsfett (revision 39272827) -Könsdimorfism (revision 48346350) -Könsfördelning (revision 45646592) -Lamm- och fårkött (revision 48351109) -Lek (fortplantningsbeteende) (revision 30508235) -Mandel (revision 48952857) -Maori (revision 48297968) -Maorier (revision 48066510) -Maoripapegojor (revision 46078328) -Mark Carwardine (revision 48869810) -Markpapegoja (revision 47342275) -Maskulinum (revision 46628162) -Masterton (revision 48262093) -Metrosideros umbellata (revision 46936435) -Milford Sound (revision 45323524) -Morrhår (revision 48980591) -Muskelmage (revision 41849238) -Mustela (revision 48294935) -Mårddjur (revision 48435918) +Blodkärl (revision 47473904) +Flora (botanik) (revision 51339211) +Miljö (omgivning) (revision 51475610) +Anatomi (revision 51609030) +Artundergrupp (revision 51246830) +Överfamilj (revision 47122498) +Kruger nationalpark (revision 50511277) +Varietet (biologi) (revision 48198194) +Moaörn (revision 50941002) +Genusvetenskap (revision 51641870) +Latin (revision 51408565) +Överrike (revision 50937218) +Underrike (revision 50937218) +Östpapegojor (revision 46190135) +Biogas (revision 51329860) +Hedersdoktor (revision 51579005) +Underordning (revision 49477220) +Hormonterapi (transsexualism) (revision 49985314) +Serengeti (revision 50598959) +Underdivision (revision 46997002) +Sågtång (revision 43678985) +Årsnederbörd (revision 50582293) +Underfylum (revision 48212330) +Fåglar (revision 51631929) +Queer (revision 50491618) +Överhud (revision 49716509) +Trainee (revision 49688895) +Gasell (revision 46605384) +Bladtång (revision 50277892) +Area (revision 50460691) +Ryggradsdjur (revision 51433096) +Tidszon (revision 51455267) +Pleistocen (revision 49211710) +Stipendium (revision 49644010) +Undertribus (revision 46997009) +Binomial nomenklatur (revision 51484783) +Läderhud (revision 45323117) +Solförmörkelse (revision 51217582) +Vatten (revision 51556576) +Halvö (revision 51419101) +Intersektionalitet (revision 51228488) +Danmark (revision 51615196) +Vildmark (revision 49350253) +Växtriket (revision 51581458) +Plattektonik (revision 51390439) +Ocean (revision 51432545) +Brewster Kahle (revision 47526442) +Gemeinsame Normdatei (revision 46103091) +Biologi (revision 49616572) +Överklass (biologi) (revision 47122504) +Chengjiang (lagerstätte) (revision 51413388) +Biologism (revision 49913258) +RFSL (revision 51638725) +Erasistratos (revision 47910581) +Dimensionsanalys (revision 49247252) +Arkeologisk lokal (revision 50388755) +Borrflugor (revision 49571840) +Papua Nya Guinea (revision 51608607) +Nagel (revision 51401820) +Växter (revision 51581458) +Referensbibliotek (revision 43544193) +Femme (revision 48773869) +Civilekonomerna (revision 48828707) +Allians (biologi) (revision 51622482) +Aves (revision 51631929) +Farmakologi (revision 51164270) +Hen (revision 51606248) +Molekylär klocka (revision 47887818) +Kikunae Ikeda (revision 49340311) +Vatikanstatens bibliotek (revision 43158770) +Kina (revision 51635520) +Familj (biologi) (revision 50548234) +National- och universitetsbiblioteket i Zagreb (revision 43219495) +Kräftdjur (revision 49977078) +Begränsningsarea (revision 40757907) +Transgender Day of Remembrance (revision 51636616) +Tjeckiska nationalbiblioteket (revision 46514905) +Överfylum (revision 48212330) +Svavelväte (revision 51344105) +Djur (revision 51469052) +Rum (fysik) (revision 49290047) +Systematik (biologi) (revision 51506994) +Underfamilj (revision 50548234) +Gränsvärde (revision 47179480) +Plantae (revision 51581458) +Linjär algebra (revision 50044309) +Integrated Taxonomic Information System (revision 48591706) +Neuroanatomi (revision 49426339) +Tyrannosaurus (revision 51502373) +Zebror (revision 51635419) +Metangas (revision 51580655) +Pretegelen (revision 50032174) +Feminisering (revision 50209006) +Underklass (biologi) (revision 44944834) +Edmigasell (revision 48106386) +Förenta nationernas medlemsstater (revision 51630915) +Allosaurider (revision 40888601) +Elefant (revision 51244638) +Pelagial (revision 43975416) +Vetenskapligt namn (revision 46637057) +Adjunkt (lärare) (revision 47023760) +Kön (revision 51124326) +Arkiv (revision 51182072) +Charles III (revision 51633914) +Canna pedunculata (revision 46703358) +Jordens atmosfär (revision 50939215) +Kolesterol (revision 51581405) +Gödsel (revision 49711703) +Bro (revision 51285531) +Campechebukten (revision 49690649) +Auktorsnamn (revision 51253351) +Doktorsgrad (revision 51581730) +Shackletons shelfis (revision 47822557) +Leddjur (revision 50562856) +Ainu (revision 50015241) +Mittoceanisk rygg (revision 49691134) == End of Parsed pages == -- Wikipedia parsing ended at: 2021-03-16 20:24:13.933499 +- Wikipedia parsing ended at: 2022-12-14 18:21:28.823200 -49 characters appeared 513356 times. +52 characters appeared 872342 times. -First 30 characters: -[ 0] Char a: 9.801969783152433 % -[ 1] Char e: 9.753075838209742 % -[ 2] Char r: 9.263357202409244 % -[ 3] Char n: 8.249635730370347 % -[ 4] Char t: 7.409088429861539 % -[ 5] Char s: 6.03207131113691 % -[ 6] Char i: 5.692346052252238 % -[ 7] Char l: 5.428981057979258 % -[ 8] Char o: 4.548890049010823 % -[ 9] Char d: 4.4466218374773065 % -[10] Char m: 3.3119316809387636 % -[11] Char k: 3.0742798369942106 % -[12] Char g: 3.073890243807416 % -[13] Char f: 2.2676271437365103 % -[14] Char v: 2.2645103982421557 % -[15] Char u: 2.116464987260303 % -[16] Char ä: 2.0311440793523405 % -[17] Char h: 1.9354989519943275 % -[18] Char p: 1.8753068046346004 % -[19] Char å: 1.4903887360817833 % -[20] Char c: 1.4510398242155542 % -[21] Char b: 1.3084487178488222 % -[22] Char ö: 1.2946181597176227 % -[23] Char j: 0.7221109717233265 % -[24] Char y: 0.6866579917250407 % -[25] Char x: 0.22323689603316216 % -[26] Char w: 0.12096868449964547 % -[27] Char z: 0.07947701010604727 % -[28] Char é: 0.01577852406517115 % -[29] Char q: 0.013635761537802226 % +Most Frequent characters: +[ 0] Char e: 10.07827205385044 % +[ 1] Char a: 9.501663338461292 % +[ 2] Char r: 8.93342290065135 % +[ 3] Char n: 8.460672534395913 % +[ 4] Char t: 7.666717869826284 % +[ 5] Char s: 6.2858374353177995 % +[ 6] Char i: 5.936318553961635 % +[ 7] Char l: 5.365326901605105 % +[ 8] Char o: 4.766020666206602 % +[ 9] Char d: 4.2648410829697525 % +[10] Char m: 3.3875475444263827 % +[11] Char k: 3.270620926196377 % +[12] Char g: 2.9386410375747127 % +[13] Char v: 2.3831249670427423 % +[14] Char f: 2.099176699046933 % +[15] Char ä: 1.9664305971740441 % +[16] Char u: 1.9017770553292173 % +[17] Char p: 1.8738063741055688 % +[18] Char h: 1.8481283716707437 % +[19] Char c: 1.475682702426342 % +[20] Char å: 1.3009805787179798 % +[21] Char b: 1.238161179904212 % +[22] Char ö: 1.233346554447682 % +[23] Char y: 0.7070621384732135 % +[24] Char j: 0.5712209202354123 % +[25] Char x: 0.32452868255798756 % +[26] Char w: 0.08425594548926912 % +[27] Char z: 0.07164621215073905 % +[28] Char q: 0.02372922546432477 % -The first 30 characters have an accumulated ratio of 0.9998305269637442. +The first 29 characters have an accumulated ratio of 0.9995896104968004. +The first 4 characters have an accumulated ratio of 0.36974030827358995. +All characters whose order is over 21 have an accumulated ratio of 0.030157896788186284. -752 sequences found. +886 sequences found. -First 512 (typical positive ratio): 0.996987580875875 -Next 512 (512-1024): 0.012946181597176228 -Rest: 4.640385298237959e-17 +First 482 (typical positive ratio): 0.9950244403710493 +Next 121 (603-482): 0.003978503582736215 +Rest: 0.0009970560462144729 -- Processing end: 2021-03-16 20:24:14.019931 +- Processing end: 2022-12-14 18:21:28.869918 diff --git a/script/BuildLangModelLogs/LangThaiModel.log b/script/BuildLangModelLogs/LangThaiModel.log index 2b0d54c..fcff73d 100644 --- a/script/BuildLangModelLogs/LangThaiModel.log +++ b/script/BuildLangModelLogs/LangThaiModel.log @@ -1,202 +1,215 @@ = Logs of language model for Thai (th) = - Generated by BuildLangModel.py -- Started: 2021-03-22 17:27:13.636169 +- Started: 2022-12-14 18:21:35.253839 - Maximum depth: 4 -- Max number of pages: 100 +- Max number of pages: 200 == Parsed pages == -หน้าหลัก (revision 9018985) -19 มีนาคม (revision 9299253) -20 มีนาคม (revision 9310359) -21 มีนาคม (revision 9241717) -กระท่อมน้อยของลุงทอม (revision 9254683) -กองทัพพม่า (revision 9251263) -การท่องเที่ยวในกรุงเทพมหานคร (revision 9310141) -การบุกครองอิรัก พ.ศ. 2546 (revision 9216037) -การประท้วงในประเทศพม่า พ.ศ. 2564 (revision 9309885) -การประท้วงในประเทศรัสเซีย พ.ศ. 2564 (revision 9236735) -การระบาดทั่วของโควิด-19 (revision 9311180) -การระบาดทั่วของโควิด-19 ในประเทศไทย (revision 9313501) -การระบาดทั่วของไวรัสโคโรนา พ.ศ. 2562–2563 เรียงตามประเทศและดินแดน (revision 9309439) -ควาร์ก (revision 8896587) -ทรัพย์สินทางปัญญา (revision 9106943) -นกมุดน้ำ (revision 9311128) -นกเกาะคอน (revision 9235484) -นิวซีแลนด์ (revision 9099286) -บาตา (ประเทศอิเควทอเรียลกินี) (revision 8750850) -บุคคลที่เสียชีวิตในปี พ.ศ. 2564 (revision 9311174) -บูโพรพิออน (revision 9180305) -ประธานาธิบดีโซมาเลีย (revision 9145549) -ประเทศตูนิเซีย (revision 9169672) -ประเทศอิเควทอเรียลกินี (revision 9043997) -ผลกระทบทางเศรษฐกิจและสังคมของการระบาดทั่วของไวรัสโคโรนา พ.ศ. 2562–2563 (revision 9311180) -พ.ศ. 2376 (revision 9190591) -พ.ศ. 2394 (revision 9196352) -พ.ศ. 2499 (revision 9304186) -พ.ศ. 2530 (revision 9309218) -พ.ศ. 2546 (revision 9303823) -พระคเณศ (revision 9259789) -ฟุตบอล (revision 9267162) -มูลนิธิวิกิมีเดีย (revision 9155482) -ยาต้านรีโทรไวรัส (revision 7481206) -ยาต้านไวรัส (revision 9245483) -รัฐประหารในประเทศพม่า พ.ศ. 2564 (revision 9293480) -รัฐอุตตราขัณฑ์ (revision 9246094) -รายชื่อนักฟุตบอลที่ยิงประตู 500 ลูกขึ้นไป (revision 9181577) -รายชื่อบทความวันนี้ในอดีต (revision 8925803) -วลาดีมีร์ ปูติน (revision 9137037) -วัคซีนโรคติดเชื้อไวรัสโคโรนา 2019 (revision 9297189) -วันเอกราช (revision 8058565) -วิกิพีเดีย (revision 9235310) -วิกิพีเดียภาษาไทย (revision 9176821) -สงครามอิรัก (revision 9304198) -สถาปัตยกรรมกอทิก (revision 8232804) -สถาปัตยกรรมฟื้นฟูกอทิก (revision 6453482) -สนธิสัญญาไมตรีและพาณิชย์ ค.ศ. 1833 (revision 9204597) -สารานุกรม (revision 9290003) -สโมสรฟุตบอลบีจี ปทุม ยูไนเต็ด (revision 9306806) -อนุสัญญาปารีสว่าด้วยการคุ้มครองทรัพย์สินอุตสาหกรรม (revision 8759879) -อองซานซูจี (revision 9292643) -อะเลกเซย์ นาวัลนืย (revision 9230310) -อาณาจักรรัตนโกสินทร์ (สมัยสมบูรณาญาสิทธิราช) (revision 9202032) -อาลี มาห์ดิ มูฮัมหมัด (revision 9311207) -อุทกภัยจากธารน้ำแข็งแตกในรัฐอุตตราขัณฑ์ พ.ศ. 2564 (revision 9300387) -เนื้อหาเสรี (revision 9063375) -เบนจามิน เมานต์ฟอร์ต (revision 8820016) -เบอร์มิงแฮม (revision 8949103) -เส้นศูนย์สูตร (revision 9256278) -เหตุระเบิดที่บาตา พ.ศ. 2564 (revision 9301940) -เอจออฟเอ็มไพร์ส (revision 8812026) -เอชไอวี (revision 8818691) -เอแซดที (revision 5386882) -แคว้นแคนเทอร์เบอรี (revision 8763458) -แผ่นดินไหวในเกาะซูลาเวซี พ.ศ. 2564 (revision 9213896) -แฟแร็นตส์ ปุชกาช (revision 9299074) -แม่น้ำคองโก (revision 9298387) -แอฟริกัน-อเมริกัน (revision 9182892) -แฮเรียต บีเชอร์ สโตว์ (revision 6389804) -โปรตอน (โครงการดาวเทียม) (revision 9295828) -โรคติดเชื้อไวรัสโคโรนา 2019 (revision 9303763) -โรคเอดส์ (revision 8723281) -ไครสต์เชิร์ช (revision 9065152) -ไทยลีก ฤดูกาล 2563–64 (revision 9313228) -ไวรัสโคโรนาสายพันธุ์ใหม่ (SARS-CoV-2) (revision 9239363) -0 มกราคม (revision 8811984) -10 กรกฎาคม (revision 9204508) -10 กันยายน (revision 9223073) -10 กุมภาพันธ์ (revision 8791647) -10 ตุลาคม (revision 9299190) -10 ธันวาคม (revision 9187465) -10 พฤศจิกายน (revision 9255261) -10 พฤษภาคม (revision 9293733) -10 มกราคม (revision 9310096) -10 มิถุนายน (revision 8950621) -10 มีนาคม (revision 9296320) -10 สิงหาคม (revision 9287893) -10 เมษายน (revision 9239957) -11 กรกฎาคม (revision 9272225) -11 กันยายน (revision 9263121) -11 กุมภาพันธ์ (revision 9255762) -11 ตุลาคม (revision 8872097) -11 ธันวาคม (revision 9299195) -11 พฤศจิกายน (revision 9301626) -11 พฤษภาคม (revision 9295172) -11 มกราคม (revision 9273530) -11 มิถุนายน (revision 9261737) -11 มีนาคม (revision 9204281) -11 สิงหาคม (revision 9281431) +หน้าหลัก (revision 9904032) +อดอล์ฟ ฮิตเลอร์ (revision 10443141) +เลขาธิการพรรคคอมมิวนิสต์จีน (revision 10311137) +พ.ศ. 2484 (revision 10439497) +รายชื่อประธานาธิบดีสาธารณรัฐประชาชนจีน (revision 9839306) +โพรงอากาศอักเสบเรื้อรัง (revision 10253728) +ประชากรโลก (revision 10416810) +การถ่ายภาพรังสีส่วนตัดอาศัยคอมพิวเตอร์แบบความละเอียดสูง (revision 10221268) +นาซา (revision 10421933) +ประเทศอินเดีย (revision 10435810) +Add oil (revision 10416705) +ยาปฏิชีวนะ (revision 10442566) +วันประกาศเอกราช (revision 8058565) +รายนามประธานาธิบดีจีน (revision 9839306) +Nebulizer (revision 9488816) +เศรษฐกิจ (revision 8451260) +ฝรั่งเศส (revision 10470508) +OECD (revision 10175968) +ชนชาติปกครอง (revision 9325428) +อักษรจีนตัวย่อ (revision 9702441) +17 พฤษภาคม (revision 9869509) +11 มีนาคม (revision 9984720) +รัฐมหาราชา (revision 10277795) +สภาประชาชนแห่งชาติ (revision 9902737) +2019–20 Hong Kong protests (revision 10347550) +ประเทศตูนิเซีย (revision 10477262) +10 สิงหาคม (revision 10308290) +ฝ่ายอักษะ (revision 10461857) +วันชาติ (revision 10151303) +สิงคโปร์ (revision 10413782) +โรคเหตุอาชีพ (revision 8430376) +พินอิน (revision 10344015) +30 พฤศจิกายน (revision 10453860) +ฮั่นยฺหวี่พินอิน (revision 10344015) +สาธารณรัฐประชาชนจีน (revision 10433062) +โพรงเยื่อหุ้มปอด (revision 10384510) +พ.ศ. 2505 (revision 10360535) +Nasal irrigation (revision 10158613) +การป้องกัน (revision 9815460) +Cantonese (revision 9923689) +9 กรกฎาคม (revision 10030943) +โพรงเยื่อหุ้มปอดมีอากาศ (revision 8171661) +ทวีป (revision 10436320) +โครงการอวกาศโซเวียต (revision 10176372) +ซิฟิลิส (revision 10194983) +พ.ศ. 2545 (revision 10363737) +S2CID (identifier) (revision 9511789) +นีล อาร์มสตรอง (revision 10434839) +สถานีอวกาศสกายแล็บ (revision 10160690) +Central Intelligence Agency (revision 10246925) +แฟรงคลิน ดี. รูสเวลต์ (revision 10007535) +24 พฤษภาคม (revision 10117699) +อะม็อกซีซิลลิน (revision 10251261) +ฝันจีน (revision 9869393) +28 ตุลาคม (revision 10469650) +ภาษาเบงกอล (revision 10291594) +บริการสุขภาพในประเทศจีน (revision 10435037) +ไนจีเรีย (revision 9842110) +วัลเทอร์ อุลบริชท์ (revision 10188987) +ประเทศจีน (revision 10433062) +แอนาฟิแล็กซิส (revision 9745218) +รวันดา (revision 10371439) +สี จิ้นผิง (revision 10459012) +โทชิอากิ โคเซโดะ (revision 9554543) +ศรีลังกา (revision 10388909) +ภาษาจีน (revision 10406158) +ประเทศเอริเทรีย (revision 9990023) +โรคพาร์กินสัน (revision 10467833) +สหภาพโซเวียต (revision 10443093) +เจียง เจ๋อหมิน (revision 10459105) +ปากีสถาน (revision 10434940) +จ้าว จื่อหยาง (revision 10428374) +ไมเกรน (revision 10330244) +ปฏิบัติการโอเวอร์ลอร์ด (revision 10334723) +ประเทศตุรกี (revision 10463471) +รัฐบอลติก (revision 10444373) +31 ตุลาคม (revision 10287966) +6 มีนาคม (revision 9796093) +อาร์เอ็นเอ (revision 9540049) +เชโกสโลวาเกีย (revision 9894259) +ตราแผ่นดินของอินเดีย (revision 9143600) +ภาษาจีนมาตรฐาน (revision 9655673) +พ.ศ. 2541 (revision 10480110) +องค์การสนธิสัญญาป้องกันแอตแลนติกเหนือ (revision 10406526) +มะโรง (revision 10080283) +ทางเดินหายใจส่วนล่าง (revision 10451792) +พรรคกรรมกรชาติสังคมนิยมบัลแกเรีย (revision 10198328) +ดวงจันทร์ (revision 10431798) +ดิสไทเมีย (revision 10398034) +7 กรกฎาคม (revision 10004082) +อักษรเมนเด (revision 10414027) +โปแลนด์ (revision 10001295) +1 ธันวาคม (revision 10454417) +เหงียน ฟู้ จ่อง (revision 10464682) +ลัทธิเหมา (revision 10096342) +ประเทศจาเมกา (revision 10287452) +1 มีนาคม (revision 10469587) +สโลวาเกีย (revision 10047604) +ตุลาคม (revision 10348983) +PMC (identifier) (revision 9484961) +เรอูว์นียง (revision 10275504) +ภาษาอูรดู (revision 10265669) +รายชื่อประเทศและเขตการปกครองเรียงตามร้อยละของพื้นที่แหล่งน้ำ (revision 10002026) +1 มกราคม (revision 10453060) +รายชื่อประเทศเรียงตามความหนาแน่นประชากร (revision 9864334) +จิตตะกอง (revision 9621126) +ประเทศสเปน (revision 10365654) +ลิพิด (revision 9707408) +ต่อหัว (revision 9866390) +พ.ศ. 2478 (revision 10438923) +การก่อการกำเริบในทิเบต พ.ศ. 2502 (revision 8743566) +เครือจักรภพแห่งประชาชาติ (revision 10378796) +โปรตุเกส (revision 10365575) +20 กุมภาพันธ์ (revision 10133368) +ISBN (identifier) (revision 10474803) +ความเสี่ยง (revision 6948077) == End of Parsed pages == -- Wikipedia parsing ended at: 2021-03-22 17:30:46.516035 +- Wikipedia parsing ended at: 2022-12-14 18:24:15.381929 -75 characters appeared 682926 times. +70 characters appeared 869745 times. Most Frequent characters: -[ 0] Char า: 6.965029886107719 % -[ 1] Char น: 5.966825102573338 % -[ 2] Char ร: 5.703399782699736 % -[ 3] Char ก: 4.551151954970231 % -[ 4] Char อ: 4.256244454011123 % -[ 5] Char เ: 3.801290330138258 % -[ 6] Char ่: 3.7993867563982042 % -[ 7] Char ง: 3.5243935653350436 % -[ 8] Char ั: 3.4997935354635787 % -[ 9] Char ม: 3.392607691023625 % -[10] Char ี: 2.9692821769855007 % -[11] Char ว: 2.904267812325201 % -[12] Char ้: 2.7827319504602257 % -[13] Char ย: 2.766039073047446 % -[14] Char ล: 2.536878080494812 % -[15] Char ิ: 2.5346816492562882 % -[16] Char ด: 2.387960042522909 % -[17] Char ท: 2.2776991943490215 % -[18] Char ส: 2.1232168639061917 % -[19] Char ต: 1.9805952621513898 % -[20] Char ค: 1.9372523523778564 % -[21] Char ะ: 1.8903951526226852 % -[22] Char บ: 1.795216465619994 % -[23] Char ป: 1.7347413921859762 % -[24] Char แ: 1.5820162067339654 % -[25] Char ห: 1.5278375695170487 % -[26] Char พ: 1.5215411332999476 % -[27] Char จ: 1.2964801457258912 % -[28] Char ช: 1.2149193324020464 % -[29] Char ใ: 1.1090513467052068 % -[30] Char ์: 1.0768370218735266 % -[31] Char ไ: 1.0320298246076443 % -[32] Char ข: 0.9678940324427537 % -[33] Char ุ: 0.9629154549687667 % -[34] Char ื: 0.9577904487455449 % -[35] Char ศ: 0.9569118762501355 % -[36] Char ู: 0.9047832415225076 % -[37] Char โ: 0.8954118015714734 % -[38] Char ็: 0.6857258326670825 % -[39] Char ำ: 0.5846899956949948 % -[40] Char ซ: 0.5577471058357714 % -[41] Char ึ: 0.4810184412366786 % -[42] Char ผ: 0.4660827088147178 % -[43] Char ถ: 0.4076576378699888 % -[44] Char ธ: 0.3950647654357866 % -[45] Char ณ: 0.3679754468273283 % -[46] Char ษ: 0.342496844460454 % -[47] Char ภ: 0.33942184072652093 % -[48] Char ญ: 0.24028957749448696 % -[49] Char ฟ: 0.22813599130798945 % -[50] Char ฐ: 0.18347522279134196 % -[51] Char ฤ: 0.10293941071214158 % -[52] Char ฉ: 0.09532511575192627 % -[53] Char ฮ: 0.07643580710062291 % -[54] Char ๆ: 0.07555723460521345 % -[55] Char ฒ: 0.05359292221997698 % -[56] Char ฝ: 0.0509572047337486 % -[57] Char ฏ: 0.0494929172413995 % -[58] Char ฎ: 0.03675361605796235 % -[59] Char ฑ: 0.02533217361763939 % -[60] Char ฆ: 0.015814304917370257 % -[61] Char ๊: 0.015375018669665529 % -[62] Char ฬ: 0.01420358867578625 % -[63] Char ฯ: 0.009810726198738956 % -[64] Char ฺ: 0.0038071474801076545 % -[65] Char ๋: 0.0027821462354632857 % -[66] Char ฌ: 0.0023428599877585565 % -[67] Char ๅ: 0.0005857149969396391 % -[68] Char ๖: 0.00029285749846981956 % -[69] Char ๒: 0.00029285749846981956 % -[70] Char ๔: 0.00029285749846981956 % -[71] Char ๙: 0.00029285749846981956 % -[72] Char ๐: 0.00014642874923490978 % -[73] Char ๕: 0.00014642874923490978 % -[74] Char ๗: 0.00014642874923490978 % +[ 0] Char า: 7.148417064771858 % +[ 1] Char น: 6.096269596260973 % +[ 2] Char ร: 5.78341927806426 % +[ 3] Char ก: 4.61043179322675 % +[ 4] Char อ: 4.214453661705442 % +[ 5] Char เ: 3.842850490661056 % +[ 6] Char ง: 3.8121518376075745 % +[ 7] Char ่: 3.7103978752392943 % +[ 8] Char ั: 3.4182432782022314 % +[ 9] Char ม: 3.324422675611817 % +[10] Char ี: 2.9437363825029172 % +[11] Char ย: 2.853709995458439 % +[12] Char ล: 2.656065858383779 % +[13] Char ว: 2.607775842344595 % +[14] Char ้: 2.560290659906064 % +[15] Char ิ: 2.543044225606356 % +[16] Char ส: 2.260202703091136 % +[17] Char ต: 2.215936855055217 % +[18] Char ท: 2.198115539612185 % +[19] Char ด: 2.1867328929743777 % +[20] Char ป: 1.924472115390143 % +[21] Char ค: 1.8667540486004517 % +[22] Char ะ: 1.8569810691639506 % +[23] Char บ: 1.6992336834359496 % +[24] Char แ: 1.590466171119121 % +[25] Char ห: 1.538496915762666 % +[26] Char จ: 1.27692599555042 % +[27] Char ช: 1.219552857446723 % +[28] Char พ: 1.1831053929600055 % +[29] Char ใ: 1.138034711323434 % +[30] Char ข: 1.0841108600796785 % +[31] Char ์: 1.012365693392891 % +[32] Char ศ: 0.9939694968065352 % +[33] Char ุ: 0.9593616519784534 % +[34] Char โ: 0.9249837596077011 % +[35] Char ื: 0.8639313821867328 % +[36] Char ไ: 0.8308182283312924 % +[37] Char ู: 0.7648218730777412 % +[38] Char ็: 0.7122777365779625 % +[39] Char ำ: 0.5964966743125859 % +[40] Char ึ: 0.5230268641958276 % +[41] Char ซ: 0.5183128388205739 % +[42] Char ภ: 0.4408188607005502 % +[43] Char ษ: 0.42771157063277165 % +[44] Char ธ: 0.4117298748483751 % +[45] Char ถ: 0.40770570684510976 % +[46] Char ผ: 0.3322813008410511 % +[47] Char ณ: 0.3189440583159432 % +[48] Char ญ: 0.31342519934003643 % +[49] Char ฐ: 0.28134683154257856 % +[50] Char ฟ: 0.20074849524860736 % +[51] Char ฮ: 0.1356719498243738 % +[52] Char ฤ: 0.12221973107060115 % +[53] Char ฝ: 0.10957234591748156 % +[54] Char ฏ: 0.09290079276109665 % +[55] Char ๆ: 0.0789886690926651 % +[56] Char ฉ: 0.06979057079948721 % +[57] Char ฎ: 0.05392385124375536 % +[58] Char ฒ: 0.050244611926484196 % +[59] Char ฑ: 0.01736141052837326 % +[60] Char ฬ: 0.01356719498243738 % +[61] Char ๋: 0.013337242525107934 % +[62] Char ฆ: 0.011612599095137082 % +[63] Char ๊: 0.011612599095137082 % +[64] Char ฯ: 0.010462836808489844 % +[65] Char ฌ: 0.005403882747242008 % +[66] Char ฺ: 0.0029893819452828127 % +[67] Char ํ: 0.00022995245732944714 % +[68] Char ฃ: 0.00011497622866472357 % +[69] Char ๅ: 0.00011497622866472357 % -The first 75 characters have an accumulated ratio of 1.0. +The first 70 characters have an accumulated ratio of 1.0. +The first 2 characters have an accumulated ratio of 0.0755324836590035. +All characters whose order is over 43 have an accumulated ratio of 0.030663010422595123. -2422 sequences found. +2476 sequences found. -First 1646 (typical positive ratio): 0.9950175497087397 -Next 355 (2001-1646): 0.003986886339010343 -Rest: 0.0009955639522499782 +First 1642 (typical positive ratio): 0.9950041430825017 +Next 370 (2012-1642): 0.003999342904699388 +Rest: 0.000996514012798877 -- Processing end: 2021-03-22 17:30:47.001559 +- Processing end: 2022-12-14 18:24:15.979522 diff --git a/script/BuildLangModelLogs/LangTurkishModel.log b/script/BuildLangModelLogs/LangTurkishModel.log index b683c86..30a026d 100644 --- a/script/BuildLangModelLogs/LangTurkishModel.log +++ b/script/BuildLangModelLogs/LangTurkishModel.log @@ -1,161 +1,233 @@ = Logs of language model for Turkish (tr) = - Generated by BuildLangModel.py -- Started: 2021-03-16 20:29:57.369383 +- Started: 2022-12-14 18:21:37.357306 - Maximum depth: 4 -- Max number of pages: 100 +- Max number of pages: 200 == Parsed pages == -Ana_Sayfa (revision 25131171) -15 Mart (revision 25133274) -16 Mart (revision 25130723) -17 Mart (revision 25101714) -1920 (revision 24886521) -1921 (revision 24934034) -1926 (revision 24937098) -1968 (revision 25060729) -2003 (revision 25043871) -Abdullah Cevdet (revision 25117345) -Afganistan (revision 25053860) -Albanya (revision 25130585) -Anaheim, Kaliforniya (revision 25012994) -Azerbaycan Yahudileri (revision 25132094) -Georg Ohm (revision 24888782) -Haldun Taner (revision 25064462) -Hazar Kağanlığı (revision 25113376) -Interscope Records (revision 24937048) -Kaliforniya (revision 25130601) -Kamil Rıfkı Urga (revision 25105741) -Kuzey Lefkoşa (revision 24753125) -Kâbil (revision 24861920) -Latin Grammy Ödülleri (revision 22281504) -Lefkoşa (revision 24897461) -Moskova Antlaşması (revision 25031021) -Mustafa Kemal Atatürk (revision 25133394) -My Lai Katliamı (revision 25132972) -Nar (revision 25023035) -Natalia Oreiro (revision 25131895) -No Doubt (revision 24925807) -Osmanlı-Venedik Savaşı (1570-1573) (revision 24483832) -Osmanlı İmparatorluğu (revision 25136006) -Rachel Corrie (revision 24929876) -Robert H. Goddard (revision 24930216) -Rock müzik (revision 24864552) -Selimiye, Lefkoşa (revision 24306825) -Selimiye Meydanı (revision 24185756) -Selma Lagerlöf (revision 25097031) -Sovyetler Birliği (revision 25004103) -Sıcak çikolata (revision 24978056) -The Beacon Street Collection (revision 24950711) -Türbe (revision 25041350) -Türkiye Büyük Millet Meclisi (revision 25113834) -Türkçe (revision 25069652) -Vietnam Savaşı (revision 24942314) -Vikipedi (revision 25130148) -Yılın günleri listesi (revision 24802413) -Ziya Gökalp (revision 24942014) -Özgür içerik (revision 24349743) -İstanbul (revision 25106647) -İtilaf Devletleri (revision 25043005) -İttihat ve Terakki (revision 25125484) -İttik Dede Türbesi (revision 25133559) -0 Mart (revision 24329470) -0 Ocak (revision 23186786) -10 Aralık (revision 24772485) -10 Ağustos (revision 24980345) -10 Ekim (revision 24850081) -10 Eylül (revision 25090510) -10 Haziran (revision 25121277) -10 Kasım (revision 24973976) -10 Mart (revision 25105572) -10 Mayıs (revision 25120763) -10 Nisan (revision 25021557) -10 Ocak (revision 25093298) -10 Temmuz (revision 24907247) -10 Şubat (revision 25005286) -11 Aralık (revision 24822783) -11 Ağustos (revision 24750760) -11 Ekim (revision 25021451) -11 Eylül (revision 24878760) -11 Haziran (revision 24946135) -11 Kasım (revision 24751390) -11 Mart (revision 25101669) -11 Mayıs (revision 25123240) -11 Nisan (revision 25114265) -11 Ocak (revision 25121144) -11 Temmuz (revision 25018276) -11 Şubat (revision 25044631) -12 Aralık (revision 25120395) -12 Ağustos (revision 24964866) -12 Ekim (revision 24822300) -12 Eylül (revision 25105547) -12 Haziran (revision 24891411) -12 Kasım (revision 25105520) -12 Mart (revision 25105618) -12 Mayıs (revision 25084509) -12 Nisan (revision 25133262) -12 Ocak (revision 25105557) -12 Temmuz (revision 25132218) -12 Şubat (revision 25121399) -13 Aralık (revision 24801826) -13 Ağustos (revision 25136701) -13 Ekim (revision 25121155) -13 Eylül (revision 24750978) -13 Haziran (revision 24815847) -13 Kasım (revision 25084464) -13 Mart (revision 25125469) -13 Mayıs (revision 24897682) -13 Nisan (revision 25084441) -13 Ocak (revision 24756340) +Ana_Sayfa (revision 28930102) +Türkçe (revision 28917063) +Guinness Dünya Rekorları (revision 28910810) +Konstantinopolis (revision 28883632) +Kahverengi pelikan (revision 26206141) +Mercury Records (revision 27849846) +Kırım (revision 28830891) +Santiago Ezquerro (revision 23892511) +Marilyn Manson (revision 28771157) +Cahide Sonku (revision 28619115) +Yavuz (muharebe kruvazörü) (revision 28063450) +Güney Kutbu (revision 28744663) +Bob Rock (revision 24992545) +Mozaik (revision 24892452) +Beyaz Ruslar (revision 26484311) +Uluslararası Standart Kitap Numarası (revision 28628910) +Bon Jovi (revision 26600669) +21. Altın Portakal Film Festivali (revision 28207119) +Atletizmde Türkiye rekorları listesi (revision 28455400) +Haliç (revision 28866954) +Hayvan (revision 28924988) +Antalya Film Forum Ödülleri (revision 28272421) +II. Dünya Savaşı (revision 28897347) +12. Altın Portakal Film Festivali (revision 27871394) +Motley Crue (revision 26123943) +Müzik yapımcısı (revision 27438371) +İstanbul iline bağlı adalar listesi (revision 27953556) +İstanbul mimarisi (revision 28724315) +Birleşik Krallık (revision 28807606) +MusicBrainz (revision 24863204) +CA Osasuna (revision 28710721) +İncil (revision 28770178) +Uluslararası Standart Ad Tanımlayıcısı (revision 25485263) +Çalıkuşu (film, 1966) (revision 28185368) +Kırım Devlet Üniversitesi (revision 28380818) +Fatih Camii (revision 27920685) +Athletic Bilbao (revision 28238026) +Otorite kontrolü (revision 28828926) +Heybeliada Deniz Lisesi (revision 28575270) +Hazarlar (revision 28815299) +Jack Off Jill (revision 25182531) +I. Theodosius (revision 28764219) +Bibsys (revision 28772523) +Forbes en değerli futbol kulüpleri listesi (revision 28185363) +Meksika (revision 28862166) +Bulgaristan (revision 28487358) +40. kuzey enlemi (revision 26650357) +Soğdca (revision 27844795) +Kilometre (revision 21810752) +Vatan ve Namık Kemal (film, 1951) (revision 28116291) +Mangrov (revision 25873626) +Rus İmparatorluğu (revision 28832942) +Karayipler (revision 28776869) +5. güney enlemi (revision 22753480) +Tepeli pelikan (revision 27890916) +MKE Sanayi ve Teknoloji Müzesi (revision 26815145) +41. Altın Portakal Film Festivali (revision 27853118) +Uluslararası Sanal Otorite Dosyası (revision 28052013) +En kilolu insanlar listesi (revision 26679282) +Kuzey Amerika (revision 28579732) +Formula 1 sürücü rekorları listesi (revision 28853779) +Ankara (revision 28854191) +FC Barcelona (revision 28824389) +İbrani alfabesi (revision 28017994) +Atlantik (revision 28878245) +Özbekler (revision 28488298) +İsrail Ulusal Kütüphanesi (revision 27953386) +Karakurum (revision 27482861) +26. Altın Portakal Film Festivali (revision 22752701) +Şehvet Kurbanı (revision 25076011) +Napoli (revision 28376302) +Şarapnel (revision 28112315) +HMS Warrior (revision 27428893) +70. kuzey enlemi (revision 22753850) +Akdeniz Tümeni (revision 23205239) +The Fight Song (revision 25453153) +23. Altın Portakal Film Festivali (revision 22752570) +ELO (revision 28139061) +İspanya millî futbol takımı (revision 28908457) +45. kuzey enlemi (revision 22738951) +Forvet (futbol) (revision 28838176) +Ermeni Katolik Kilisesi (revision 26918075) +Azeriler (revision 28901363) +Supercopa de España (revision 28556471) +Die Antwoord (revision 27976641) +Chris Vrenna (revision 28637525) +Fetret Devri (revision 28720903) +55. kuzey enlemi (revision 25062205) +Çek Cumhuriyeti Millî Kütüphanesi (revision 27776014) +UEFA Şampiyonlar Ligi (revision 28877448) +WorldCat (revision 28632980) +31. Altın Portakal Film Festivali (revision 25165042) +Türkiye'de radyo (revision 27380661) +Ermeniler (revision 28873709) +Bas gitar (revision 26341445) +İspanya (revision 28842365) +Kongre Kütüphanesi Kontrol Numarası (revision 27257543) +Tabya (revision 28127064) +Nova Roma (revision 27798799) +Calahorra (revision 28933454) +Batı yarımküre (revision 26064934) +Yahudiler (revision 28675862) +Sara Lee Lucas (revision 28205927) +Farslar (revision 28860291) +Osmanlı İmparatorluğu dağılma dönemi (revision 28777401) +Empire (dergi) (revision 27691893) +ISU Puanlama Sistemi (revision 27320008) +Fort Lauderdale (revision 24212078) +Perekop (revision 25737249) +Kerç Boğazı (revision 25901000) +HMS Lord Nelson (1906) (revision 28867865) +Cumhuriyet dönemi Türk edebiyatı (revision 28853274) +Pelecanidae (revision 28223952) +Dieldrin (revision 27357524) +St. Anger (albüm) (revision 24931303) +Peçenekler (revision 28876192) +Amerika (revision 28878250) +Atlético Madrid (revision 28599163) +Termik Yüksek Basınç (revision 24318962) +RCD Mallorca (revision 25727828) +III. Mustafa (revision 28788967) +Alt tür (revision 27699091) +Sığınak (revision 25148004) +Çin (revision 28925157) +Osmanlı Türkleri (revision 27342672) +Marcelo Bielsa (revision 28254674) +2014-15 La Liga (revision 28850527) +Doğu Slavlar (revision 27368736) +Falkland Adaları Muharebesi (revision 24918207) +Granada (revision 28790184) +Davis Boğazı (revision 24957127) +Teknik standart (revision 25081500) +Ekoloji (revision 28816483) +George Stephenson (revision 24861072) +Bathyscaphe Trieste (revision 28096348) +La Liga (revision 28794060) +Beyoğlu (revision 28677982) +Uruk (revision 28906457) +Maisur Krallığı (revision 26243693) +Konya (revision 28928318) +Seramik (revision 27453313) +Zekeriya (Hazar) (revision 19533840) +Astronomik birim (revision 27094122) +Güzeloluk Camii (revision 27427756) +Giugliano in Campania (revision 28420065) +16. Altın Portakal Film Festivali (revision 27871453) +Yeni İspanya Genel Valiliği (revision 27704446) +Tümleşik Otorite Dosyası (revision 28256608) +Dixie Dean (revision 27323100) +Güney Asya (revision 28291928) +Avrupa Yakası (revision 28929758) +SS Lazio (revision 28148647) +Horn Burnu (revision 25039100) +Wayback Machine (revision 28510856) +Miguel Muñoz Ödülü (revision 26183854) +10. batı meridyeni (revision 26650632) +İsrail (revision 28926364) +Tan Zhongyi (revision 24316444) +Şiilik (revision 28859412) +25. batı meridyeni (revision 28852166) +Amerika Birleşik Devletleri (revision 28926820) +Darice (revision 28773404) +Araplar (revision 28922126) +Asist (futbol) (revision 28879411) +İlhanlılar (revision 28825042) +Papua Yeni Gine (revision 28905831) +Bar ve Bat Mitzvah (revision 27053807) +Sorblar (revision 28724857) +Enlem dairesi (revision 25068828) +VI. Mihail (revision 26363072) +1960 Avrupa Uluslar Kupası (revision 28440153) == End of Parsed pages == -- Wikipedia parsing ended at: 2021-03-16 20:34:51.082747 +- Wikipedia parsing ended at: 2022-12-14 18:24:38.038299 -54 characters appeared 913820 times. +55 characters appeared 1112427 times. -First 33 characters: -[ 0] Char a: 12.104681447112123 % -[ 1] Char e: 8.960189096320939 % -[ 2] Char i: 8.522575561926857 % -[ 3] Char n: 7.2878685080212735 % -[ 4] Char r: 6.9632969293734 % -[ 5] Char l: 6.837889299862117 % -[ 6] Char ı: 4.501433542710818 % -[ 7] Char k: 4.343196690814383 % -[ 8] Char t: 4.3038016239522 % -[ 9] Char d: 4.30150357838524 % -[10] Char s: 3.781816988028277 % -[11] Char m: 3.4274802477511983 % -[12] Char u: 3.119761003261036 % -[13] Char y: 2.903635289225449 % -[14] Char o: 2.639688341248824 % -[15] Char b: 2.1207677660808475 % -[16] Char ü: 1.8651375544417939 % -[17] Char ş: 1.4568514587117813 % -[18] Char v: 1.4549911361099561 % -[19] Char h: 1.216869843076317 % -[20] Char z: 1.1867763892232606 % -[21] Char g: 1.1811954214177847 % -[22] Char c: 1.125714035586877 % -[23] Char p: 0.8964566325972293 % -[24] Char ç: 0.8571709964763301 % -[25] Char ö: 0.7883390602087939 % -[26] Char ğ: 0.7411744107154582 % -[27] Char f: 0.7040773894202359 % -[28] Char j: 0.13821102624149176 % -[29] Char w: 0.07933728743078505 % -[30] Char â: 0.05865487732813902 % -[31] Char î: 0.03994222056860213 % -[32] Char û: 0.028014269768663416 % +Most Frequent characters: +[ 0] Char a: 12.122143745162605 % +[ 1] Char e: 9.162938332133255 % +[ 2] Char i: 8.517412827987814 % +[ 3] Char n: 7.819569284096844 % +[ 4] Char l: 7.247846375537451 % +[ 5] Char r: 7.244610208130512 % +[ 6] Char ı: 4.532162559880334 % +[ 7] Char k: 4.342037724722611 % +[ 8] Char d: 4.278572886130956 % +[ 9] Char t: 3.751077598799742 % +[10] Char s: 3.451822007196877 % +[11] Char m: 3.19131053093821 % +[12] Char u: 3.0403792788200934 % +[13] Char y: 2.9621718998190443 % +[14] Char o: 2.7713279163486684 % +[15] Char b: 2.1581640862726275 % +[16] Char ü: 1.786184621552695 % +[17] Char ş: 1.4678715996645173 % +[18] Char v: 1.3457062800525337 % +[19] Char g: 1.2175180933220786 % +[20] Char h: 1.1529745322614426 % +[21] Char z: 1.0875320358099902 % +[22] Char p: 1.006717744175573 % +[23] Char ç: 0.9676140546750484 % +[24] Char c: 0.8805971088439961 % +[25] Char ğ: 0.8180312056431568 % +[26] Char ö: 0.6598185768594254 % +[27] Char f: 0.5968930995022594 % +[28] Char j: 0.1306153122856601 % +[29] Char w: 0.0882754553781956 % +[30] Char â: 0.06463345459971756 % +[31] Char î: 0.06130739365369593 % +[32] Char û: 0.019057430285313103 % -The first 33 characters have an accumulated ratio of 0.9993849992339848. +The first 33 characters have an accumulated ratio of 0.9994489526054298. +The first 4 characters have an accumulated ratio of 0.37622064189380516. +All characters whose order is over 23 have an accumulated ratio of 0.0331922903705142. -1097 sequences found. +1109 sequences found. -First 512 (typical positive ratio): 0.9923593121944019 -Next 512 (512-1024): 0.014568514587117814 -Rest: 9.536163614441446e-05 +First 553 (typical positive ratio): 0.995033943100518 +Next 194 (747-553): 0.0039670697911219355 +Rest: 0.0009989871083601054 -- Processing end: 2021-03-16 20:34:51.176659 +- Processing end: 2022-12-14 18:24:38.120854 diff --git a/script/BuildLangModelLogs/LangVietnameseModel.log b/script/BuildLangModelLogs/LangVietnameseModel.log index 1fa07a6..34c340a 100644 --- a/script/BuildLangModelLogs/LangVietnameseModel.log +++ b/script/BuildLangModelLogs/LangVietnameseModel.log @@ -1,220 +1,270 @@ = Logs of language model for Vietnamese (vi) = - Generated by BuildLangModel.py -- Started: 2021-03-21 15:03:00.873505 +- Started: 2022-12-14 18:24:52.192639 - Maximum depth: 4 -- Max number of pages: 100 +- Max number of pages: 200 == Parsed pages == -Chữ_Quốc_ngữ (revision 64521024) -1651 (revision 26251708) -1838 (revision 63252802) -1865 (revision 64100421) -1869 (revision 59848285) -1888 (revision 64474933) -1902 (revision 64405865) -1918 (revision 64446780) -1919 (revision 64400438) -1938 (revision 63147818) -22 tháng 2 (revision 64199177) -26 tháng 11 (revision 60306925) -28 tháng 12 (revision 64197178) -A (revision 64616139) -ASCII (revision 64542934) -Alexandre de Rhodes (revision 64481737) -Antonio Barbosa (revision 28290803) -B (revision 63753684) -BBC (revision 64477721) -Biên khảo (revision 64480018) -Bàn phím máy tính (revision 63261029) -Bá Đa Lộc (revision 64107557) -Bán nguyên âm (revision 64635959) -Bình luận (revision 26758605) -Bảng chữ cái Bồ Đào Nha (revision 64521024) -Bảng chữ cái Hy Lạp (revision 64540140) -Bảng chữ cái Latinh (revision 64566174) -Bảng chữ cái Latinh cơ bản của ISO (revision 64566174) -Bảng chữ cái Phoenicia (revision 64540140) -Bảng mẫu tự ngữ âm quốc tế (revision 64494501) -Bắc Kỳ (revision 64538623) -Bồ Đào Nha (revision 64477762) -Bộ Giáo dục và Đào tạo (Việt Nam) (revision 64439920) -Bộ gõ tiếng Việt (revision 64399872) -C (revision 64341946) -Cao Xuân Dục (revision 64403009) -Chiều cao (revision 63620682) -Christoforo Borri (revision 39684524) -Chính tả (revision 64168374) -Chính tả tiếng Việt (revision 64566759) -Chủ tịch Hồ Chí Minh (revision 64615386) -Chữ Hán (revision 64488663) -Chữ Latinh (revision 64566174) -Chữ Nôm (revision 64497361) -Chữ b đuôi quặp (revision 63724573) -Chữ cái (revision 63906900) -Chữ số La Mã (revision 64606955) -Chữ tượng hình Ai Cập (revision 64545532) -Chữ viết tiếng Việt (revision 64521025) -Các dân tộc Việt Nam (revision 64521289) -Công giáo tại Việt Nam (revision 64479778) -Cư Jút (revision 64446849) -Cư Kuin (revision 64351798) -Cư Ê Wi (revision 64324496) -Cải cách giáo dục của Cộng hòa Xã hội chủ nghĩa Việt Nam (revision 63800666) -Cổ tự học (revision 63417312) -D (revision 64521463) -Danh sách các chữ cái Latinh (revision 64566174) -De facto (revision 64458216) -Di chúc Hồ Chí Minh (revision 64620754) -Du ký (revision 64306751) -Dòng Tên (revision 64563470) -Dấu câu (revision 64430387) -Dấu huyền (revision 64200881) -Dấu hỏi (revision 64314350) -Dấu ngã (revision 64005169) -Dấu nặng (revision 64089094) -Dấu phụ (revision 43648394) -Dấu sắc (revision 64200881) -Dấu âm ngắn (revision 64560651) -E (revision 63474436) -Ea H'leo (revision 64600906) -Ea Wy (revision 64564116) -F (revision 64556895) -Francesco Buzomi (revision 64573844) -Francisco de Pina (revision 64573938) -G (revision 63840275) -Gaspar do Amaral (revision 61771486) -Gemeinsame Normdatei (revision 63835749) -Gen (revision 64577144) -Gia Định báo (revision 64521887) -Giovanni Filippo de Marini (revision 64381034) -Girolamo Maiorica (revision 64500026) -Giáo hội Công giáo Rôma (revision 64587044) -H (revision 63175940) -Hiến pháp nước Cộng hòa Xã hội chủ nghĩa Việt Nam 2013 (revision 64587062) -Hoàng Phê (revision 63792712) -Hán học (revision 64209708) -Hệ chữ viết Latinh (revision 64566174) -Hệ thống chữ nổi tiếng Việt (revision 64158849) -Hồ Chí Minh (revision 64615386) -Hồ Dzếnh (revision 64471051) -Hội Trí Tri (revision 64593204) -I (revision 55105217) -IPA (revision 64494501) -ISBN (revision 64594093) -ISO/IEC 646 (revision 64542934) -J (revision 64280732) +Chữ_Quốc_ngữ (revision 69323365) +Lăng Chủ tịch Hồ Chí Minh (revision 69298748) +Thư pháp Đông Á (revision 69132694) +1651 (revision 66197493) +Đường kách mệnh (revision 68815747) +Viết tắt (revision 69415552) +Tự Lực văn đoàn (revision 69317098) +Jean-Louis Taberd (revision 67514265) +Chữ b đuôi quặp (revision 69392696) +Christoforo Borri (revision 67524125) +Khu di tích Phủ Chủ tịch (revision 68919870) +Tháp Rùa (revision 69105162) +Sơn La (revision 69308697) +Tắc Thiên văn tự (revision 55964733) +Doãn Quốc Sỹ (revision 69403395) +Mao Trạch Đông (revision 69175184) +Nhà thờ Lớn Hà Nội (revision 69237166) +Danh sách thập niên (revision 68854472) +TV (revision 69409669) +Máy ảnh (revision 69387279) +Phỏng Tống thể (revision 64919952) +Hồ Thiền Quang (revision 68703435) +Hoàng Phủ (revision 68019503) +Tổng binh (revision 43993985) +Quần đảo Hoàng Sa (revision 69406680) +Roma (revision 69409828) +Quế Ngọc Hải (revision 69385814) +Lịch Bắc Triều Tiên (revision 69236869) +Chiến dịch Từ Táo (revision 67065161) +Trắng (revision 68736506) +Bến Nhà Rồng (revision 69411845) +Nhà khoa học (revision 68195413) +Bảo vật quốc gia (Việt Nam) (revision 69346155) +Hiragana (revision 69360768) +Hồ Chí Minh toàn tập (revision 66841200) +Việt Nam (revision 69406532) +1820 (revision 68935390) +Lịch Gregory (revision 68970753) +Toán học (revision 69375403) +Ngày Bác Hồ ra đi tìm đường cứu nước (revision 69209816) +21 tháng 10 (revision 69161103) +Giải thưởng Hồ Chí Minh (revision 68103872) +VB (revision 68764429) +Natsume Sōseki (revision 69386133) +Hoạt động của Hồ Chí Minh trong giai đoạn 1911–1941 (revision 69329924) +Hội đồng Bộ trưởng (revision 68132742) +Lăng mộ (revision 68742278) +Tân Mão (revision 67057112) +Từ điển Taberd (revision 68243239) +Ấn Độ (revision 69373566) +Thư (revision 69170969) +Từ điển (revision 68581103) +Đàng Trong (revision 69394789) +Nhà Tấn (revision 69130408) +Phụ âm (revision 69202002) +Lệ Mật (revision 68936715) +Lan Khai (revision 69322673) +Đức (revision 69389228) +Chữ Nôm (revision 69323362) +Chủng viện (revision 67782981) +Linh mục (revision 69144511) +Nguyễn Cát Tường (revision 69170557) +Tiếng Việt (revision 69231550) +Chiến tranh du kích (revision 68759540) +Bỉ vỏ (revision 68937986) +Trần Đức Hòa (revision 68984879) +Hội An (revision 69387398) +Bảo tàng Hồ Chí Minh (revision 69254035) +2015 (revision 69408075) +Hội Thừa sai Paris (revision 65413261) +Chủ nghĩa Marx – Lenin (revision 69399705) +Liên Hợp Quốc (revision 69400525) +Đoàn Phú Tứ (revision 69324194) +Tiếng Pháp (revision 68934914) +Francesco Buzomi (revision 67525170) +Xã hội chủ nghĩa (revision 69127020) +Triều đình Huế (revision 69404625) +Yêu sách của nhân dân An Nam (revision 66967766) +Hàn Mặc Tử (revision 69191956) +DMOZ (revision 69406898) +Chương Thị Kiều (revision 69310375) +Truyền hình (revision 69409669) +Nhà Nguyễn (revision 69404625) +Système universitaire de documentation (revision 65857769) +Phùng Thế Tài (revision 69082366) +Ê (revision 69218515) +Số đỏ (revision 68955897) +Thanh Hà (revision 69213890) +Thư viện Vatican (revision 67842289) +Đền Bạch Mã (revision 68936578) +Tiếng Anh (revision 69388570) +Tiếng Ý (revision 65463286) +Đinh Tỵ (revision 44954925) +Đoàn kết chính là sức mạnh (revision 55875134) +Trụ sở Bộ Ngoại giao Việt Nam (revision 68973711) +Làng Cót (revision 68596819) +Họ kép Trung Hoa (revision 69057604) +Đền Voi Phục (revision 68936589) +Bút hiệu của Hồ Chí Minh (revision 68618443) +Lübeck (revision 69180567) +Thời kỳ cận đại (revision 66970477) +Kinh tế Brasil (revision 68059251) +Văn miếu Mao Điền (revision 68353427) +Thành Thái (revision 69411949) +Henrik Ibsen (revision 67727373) +Hội Thừa sai Việt Nam (revision 69170283) +Nhâm Tuất (revision 69064587) +Than đá (revision 69282824) +Cự thạch (revision 68633635) +Bảo tàng Hậu cần (Việt Nam) (revision 67683059) +Cộng đồng Caribe (revision 69329463) +Thi Hương (revision 69371778) +Bàn thành tứ hữu (revision 69219909) +Tên miền (revision 68454376) +Đồng minh tự trị dân chủ Đài Loan (revision 64832671) +Mario Arqués (revision 69357404) +12 tháng 3 (revision 69116686) +Tổng thống chế (revision 69239864) +NATO (revision 69319692) +Đôi dép Bác Hồ (revision 68316843) +Canh Tuất (revision 65144096) +1891 (revision 69394596) +Mậu Thìn (revision 24033237) +28 tháng 10 (revision 69243460) +Nam Kinh (revision 68941450) +Lăng Lenin (revision 68944591) +Vũ Cao Đàm (revision 69174289) +Thiên nga (revision 68936780) +Vật lý y khoa (revision 68746344) +Nguyễn Tất Thành (revision 69401333) +1983 (revision 69400593) +30 tháng 10 (revision 68619835) +Chiến tranh lấy mạng làm trung tâm (revision 68617056) +Chuyên gia (revision 69322138) +Cộng hòa Nhân Dân Trung Hoa (revision 69335955) +Nguyễn Phúc Bảo Ân (revision 68647696) +Bờ (revision 64487705) +Lào (revision 69374229) +Chương Châu (revision 64358831) +ISBN (revision 68690711) +Cố đô Huế (revision 69119630) +Toàn quyền Đông Dương (revision 69415410) +1838 (revision 66151124) +Bến Bạch Đằng (revision 69399546) +New Jersey (revision 69108160) +Sân vận động Hoa Lư (revision 68942060) +Gia đình Hồ Chí Minh (revision 68656195) == End of Parsed pages == -- Wikipedia parsing ended at: 2021-03-21 15:20:14.349683 +- Wikipedia parsing ended at: 2022-12-14 18:45:49.353245 -107 characters appeared 963942 times. +107 characters appeared 1638099 times. Most Frequent characters: -[ 0] Char n: 11.726846636000921 % -[ 1] Char h: 8.848250205925252 % -[ 2] Char t: 6.801757782107222 % -[ 3] Char c: 6.609733780663152 % -[ 4] Char i: 6.087710671388943 % -[ 5] Char g: 5.542553390141731 % -[ 6] Char a: 3.4085038311433675 % -[ 7] Char u: 2.916254297457731 % -[ 8] Char m: 2.5645733871954954 % -[ 9] Char o: 2.5096945666855475 % -[10] Char đ: 2.399210740895199 % -[11] Char à: 2.0946281000309144 % -[12] Char v: 2.0538580122040537 % -[13] Char r: 1.9629811752159363 % -[14] Char l: 1.7714758771793324 % -[15] Char á: 1.7447107813540648 % -[16] Char p: 1.5230169450029152 % -[17] Char ư: 1.4758149349234706 % -[18] Char b: 1.4370159200449821 % -[19] Char s: 1.3318228690107912 % -[20] Char y: 1.2889779675540645 % -[21] Char d: 1.1096103292521748 % -[22] Char k: 1.0497519560305495 % -[23] Char ế: 0.980349440111542 % -[24] Char e: 0.9535843442862745 % -[25] Char ộ: 0.8638486547945831 % -[26] Char ệ: 0.8230785669677221 % -[27] Char â: 0.7981808034093337 % -[28] Char ê: 0.7921638438827233 % -[29] Char ô: 0.7864581064005927 % -[30] Char ố: 0.7173668125260648 % -[31] Char ạ: 0.7026356357540184 % -[32] Char q: 0.6626954733791037 % -[33] Char ả: 0.6513877390963356 % -[34] Char ữ: 0.6236889771376286 % -[35] Char ó: 0.5890395895188715 % -[36] Char ủ: 0.5878984420224453 % -[37] Char ớ: 0.5372729894537224 % -[38] Char ề: 0.48395027916617384 % -[39] Char í: 0.47367995169833876 % -[40] Char ờ: 0.47087895329802004 % -[41] Char ợ: 0.46621062263082225 % -[42] Char ấ: 0.44618867110261823 % -[43] Char ể: 0.430108865471159 % -[44] Char ă: 0.4119542462098342 % -[45] Char ị: 0.4072859155426363 % -[46] Char ậ: 0.3685906413456411 % -[47] Char ơ: 0.36506345817486946 % -[48] Char ự: 0.3557267968404738 % -[49] Char ồ: 0.3428629523353065 % -[50] Char ụ: 0.33165895873403173 % -[51] Char ầ: 0.3292729230596862 % -[52] Char ì: 0.3276130721557936 % -[53] Char x: 0.3269906280668339 % -[54] Char ọ: 0.3178614480954248 % -[55] Char ứ: 0.293171165900023 % -[56] Char ã: 0.27926991457992284 % -[57] Char ở: 0.2768838789055773 % -[58] Char ừ: 0.24700656263551127 % -[59] Char ổ: 0.21235717501675413 % -[60] Char ắ: 0.19855966437814723 % -[61] Char ú: 0.19368385234796284 % -[62] Char ù: 0.1839322282875941 % -[63] Char ò: 0.17988634170935594 % -[64] Char ặ: 0.15924194609219228 % -[65] Char ử: 0.14046488274190772 % -[66] Char ý: 0.1310244807260188 % -[67] Char ĩ: 0.1246962991549284 % -[68] Char ằ: 0.12428133642895527 % -[69] Char ũ: 0.11764193281338503 % -[70] Char ỉ: 0.11193619533125436 % -[71] Char f: 0.104881828989711 % -[72] Char ễ: 0.10156212718192589 % -[73] Char é: 0.09398905743291609 % -[74] Char w: 0.08631224700241301 % -[75] Char ỏ: 0.06971373796348743 % -[76] Char ẩ: 0.06867633114855459 % -[77] Char ẫ: 0.06224440889597092 % -[78] Char j: 0.05259652551709542 % -[79] Char ỳ: 0.05093667461320287 % -[80] Char ẽ: 0.0438823082716595 % -[81] Char ỗ: 0.042429938730753514 % -[82] Char ỷ: 0.04004390305640796 % -[83] Char z: 0.034960609663237 % -[84] Char ỹ: 0.034545646937263856 % -[85] Char õ: 0.026453873780787642 % -[86] Char ẳ: 0.0246902821954018 % -[87] Char ỡ: 0.020333173572683834 % -[88] Char ẻ: 0.018777063350284562 % -[89] Char ẹ: 0.017635915853858427 % -[90] Char è: 0.014627436090553168 % -[91] Char ẵ: 0.011203993601274767 % -[92] Char ç: 0.005705737482130668 % -[93] Char ü: 0.00404588657823811 % -[94] Char ỵ: 0.003008479763305261 % +[ 0] Char n: 12.198591171840041 % +[ 1] Char h: 9.07637450483762 % +[ 2] Char t: 6.942498591354979 % +[ 3] Char c: 6.2973605380382995 % +[ 4] Char i: 5.664126527151289 % +[ 5] Char g: 5.387891696411511 % +[ 6] Char a: 3.391125933170095 % +[ 7] Char u: 3.1319840864318946 % +[ 8] Char đ: 2.4678606115991766 % +[ 9] Char m: 2.4540030852836123 % +[10] Char o: 2.3572445865603973 % +[11] Char à: 2.271230249209602 % +[12] Char r: 2.0874196248212105 % +[13] Char v: 1.962946073466866 % +[14] Char l: 1.9324228877497636 % +[15] Char á: 1.58555740526061 % +[16] Char ư: 1.558452816343823 % +[17] Char p: 1.5136447797111163 % +[18] Char s: 1.3848979823563776 % +[19] Char y: 1.319517318550344 % +[20] Char b: 1.2476657393722845 % +[21] Char k: 1.1433985369626622 % +[22] Char ế: 0.9981692193206881 % +[23] Char d: 0.9948116688918068 % +[24] Char ộ: 0.857945704136319 % +[25] Char e: 0.8345038975055843 % +[26] Char ạ: 0.789817953615746 % +[27] Char ố: 0.757951747727091 % +[28] Char ệ: 0.7528848988980519 % +[29] Char ô: 0.725230892638357 % +[30] Char ê: 0.7175390498376472 % +[31] Char q: 0.7004460658360697 % +[32] Char â: 0.6896408580922154 % +[33] Char ả: 0.6828036644915845 % +[34] Char ủ: 0.6210247366001689 % +[35] Char ớ: 0.6092427869133672 % +[36] Char ó: 0.5808562241964619 % +[37] Char ề: 0.5658998631950817 % +[38] Char ấ: 0.5340947036778607 % +[39] Char ă: 0.5208476410766382 % +[40] Char ờ: 0.5145598648189151 % +[41] Char ợ: 0.456504765584986 % +[42] Char ị: 0.4350164428401458 % +[43] Char í: 0.4243333278391599 % +[44] Char ơ: 0.4120630071808846 % +[45] Char ậ: 0.39515316229360986 % +[46] Char ể: 0.39326072477914953 % +[47] Char ự: 0.38532469649270285 % +[48] Char ữ: 0.3852636501212686 % +[49] Char ì: 0.3753130915774932 % +[50] Char ầ: 0.356816041032929 % +[51] Char x: 0.3403335207456937 % +[52] Char ứ: 0.32513297425857657 % +[53] Char ọ: 0.30272895594222327 % +[54] Char ở: 0.29314467562705304 % +[55] Char ã: 0.29204584094123737 % +[56] Char ồ: 0.24656629422275456 % +[57] Char ụ: 0.24552850590837308 % +[58] Char ổ: 0.23698201390758433 % +[59] Char ừ: 0.22257507024911194 % +[60] Char ắ: 0.19308967284639084 % +[61] Char ú: 0.1828949288168786 % +[62] Char ù: 0.16616822304390638 % +[63] Char ò: 0.16439787827231442 % +[64] Char ử: 0.15737754555738084 % +[65] Char ễ: 0.1475490797564738 % +[66] Char ặ: 0.1461450132134871 % +[67] Char ũ: 0.12154332552550243 % +[68] Char ĩ: 0.11592705935355556 % +[69] Char ằ: 0.11293578715327951 % +[70] Char ý: 0.11287474078184528 % +[71] Char ỉ: 0.09712477695182038 % +[72] Char f: 0.09218002086564975 % +[73] Char w: 0.08961607326541314 % +[74] Char é: 0.08485445629354514 % +[75] Char ẩ: 0.07105797634941478 % +[76] Char ỳ: 0.07002018803503329 % +[77] Char ỏ: 0.06873821423491498 % +[78] Char ẫ: 0.06129055691994195 % +[79] Char ỷ: 0.053537667747797904 % +[80] Char ỹ: 0.04938651449027195 % +[81] Char j: 0.04694465963290375 % +[82] Char ỗ: 0.045845824947088054 % +[83] Char ẽ: 0.037543518432036155 % +[84] Char z: 0.031072603060010414 % +[85] Char è: 0.019717977973248257 % +[86] Char ẳ: 0.019595885230379848 % +[87] Char õ: 0.018741236030300975 % +[88] Char ẹ: 0.017092984001577438 % +[89] Char ẻ: 0.016665659401538 % +[90] Char ỡ: 0.014834268258511848 % +[91] Char ẵ: 0.0062877762577231286 % +[92] Char ü: 0.005860451657683693 % +[93] Char ỵ: 0.002624993971670821 % -The first 95 characters have an accumulated ratio of 0.9999159700479902. +The first 94 characters have an accumulated ratio of 0.9999011048782763. +The first 4 characters have an accumulated ratio of 0.3451482480607094. +All characters whose order is over 57 have an accumulated ratio of 0.03101094622486187. -1892 sequences found. +1993 sequences found. -First 1119 (typical positive ratio): 0.9950141222722985 -Next 364 (1483-1119): 0.003989870519062855 -Rest: 0.0009960072086386829 +First 1119 (typical positive ratio): 0.9950155124227584 +Next 340 (1459-1119): 0.003985367896549574 +Rest: 0.0009991196806919955 -- Processing end: 2021-03-21 15:20:15.167861 +- Processing end: 2022-12-14 18:45:50.022872 diff --git a/script/langs/de.py b/script/langs/de.py index e004901..28a1f43 100644 --- a/script/langs/de.py +++ b/script/langs/de.py @@ -60,7 +60,7 @@ charsets = ['ISO-8859-1', 'WINDOWS-1252'] # character (provided Python algorithms know the right cases). alphabet = ['ä', 'ö', 'ü', 'ß'] # The start page. Though optional, it is advised to choose one yourself. -start_pages = ['Wikipedia:Hauptseite'] +start_pages = ['Deutschland'] # give possibility to select another code for the Wikipedia URL. wikipedia_code = code # 'a' and 'A' will be considered the same character, and so on. diff --git a/script/langs/es.py b/script/langs/es.py index 5219296..a0f0ae9 100644 --- a/script/langs/es.py +++ b/script/langs/es.py @@ -60,7 +60,7 @@ charsets = ['ISO-8859-15', 'ISO-8859-1', 'WINDOWS-1252'] # character (provided Python algorithms know the right cases). alphabet = 'ñáéíóúü' # The start page. Though optional, it is advised to choose one yourself. -start_pages = ['Wikipedia:Portada'] +start_pages = ['España'] # give possibility to select another code for the Wikipedia URL. wikipedia_code = code # 'a' and 'A' will be considered the same character, and so on. diff --git a/src/LangModels/LangArabicModel.cpp b/src/LangModels/LangArabicModel.cpp index 89157f6..7e0acf0 100644 --- a/src/LangModels/LangArabicModel.cpp +++ b/src/LangModels/LangArabicModel.cpp @@ -42,7 +42,7 @@ /** * Generated by BuildLangModel.py - * On: 2021-03-16 11:42:48.952857 + * On: 2022-12-14 17:55:19.583556 **/ /* Character Mapping Table: @@ -68,17 +68,17 @@ static const unsigned char Iso_8859_6_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 62, 63, 54, 67, 59, 70, 78, 76, 60, 83, 81, 77, 65, 71, 75, /* 4X */ - 66, 90, 68, 58, 61, 73, 69, 79, 84, 87, 88,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 36, 55, 46, 48, 37, 53, 56, 50, 38, 91, 64, 44, 47, 43, 45, /* 6X */ - 51, 80, 41, 42, 39, 52, 57, 72, 85, 49, 86,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 54, 57, 53, 60, 68, 75, 70, 72, 62, 84, 79, 74, 52, 64, 71, /* 4X */ + 65, 88, 73, 56, 63, 77, 82, 78, 91, 86, 90,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 37, 58, 46, 48, 36, 66, 51, 49, 38, 83, 61, 43, 50, 39, 40, /* 6X */ + 55, 89, 41, 44, 42, 47, 69, 67, 80, 59, 76,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ SYM,ILL,ILL,ILL,SYM,ILL,ILL,ILL,ILL,ILL,ILL,ILL,SYM,SYM,ILL,ILL, /* AX */ ILL,ILL,ILL,ILL,ILL,ILL,ILL,ILL,ILL,ILL,ILL,SYM,ILL,ILL,ILL,SYM, /* BX */ - ILL, 32, 34, 16, 35, 23, 31, 0, 8, 9, 7, 26, 19, 18, 25, 11, /* CX */ - 28, 6, 27, 12, 22, 21, 29,SYM, 33, 10, 30,ILL,ILL,ILL,ILL,ILL, /* DX */ - 40, 13, 15, 17, 1, 3, 5, 14, 4, 24, 2,SYM,SYM,SYM,SYM,SYM, /* EX */ + ILL, 32, 35, 17, 34, 24, 31, 0, 8, 9, 7, 27, 19, 18, 25, 11, /* CX */ + 29, 6, 26, 12, 21, 23, 28,SYM, 33, 10, 30,ILL,ILL,ILL,ILL,ILL, /* DX */ + 45, 13, 16, 14, 1, 3, 5, 15, 4, 22, 2,SYM,SYM,SYM,SYM,SYM, /* EX */ SYM,SYM,SYM,ILL,ILL,ILL,ILL,ILL,ILL,ILL,ILL,ILL,ILL,ILL,ILL,ILL, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -89,172 +89,173 @@ static const unsigned char Windows_1256_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 62, 63, 54, 67, 59, 70, 78, 76, 60, 83, 81, 77, 65, 71, 75, /* 4X */ - 66, 90, 68, 58, 61, 73, 69, 79, 84, 87, 88,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 36, 55, 46, 48, 37, 53, 56, 50, 38, 91, 64, 44, 47, 43, 45, /* 6X */ - 51, 80, 41, 42, 39, 52, 57, 72, 85, 49, 86,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM, 74,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 97,SYM,101, 93, 98,102, /* 8X */ - 82,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 94,SYM,103,SYM,104,SYM,SYM,105, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 99,SYM,SYM,SYM,SYM,SYM, /* AX */ + SYM, 54, 57, 53, 60, 68, 75, 70, 72, 62, 84, 79, 74, 52, 64, 71, /* 4X */ + 65, 88, 73, 56, 63, 77, 82, 78, 91, 86, 90,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 37, 58, 46, 48, 36, 66, 51, 49, 38, 83, 61, 43, 50, 39, 40, /* 6X */ + 55, 89, 41, 44, 42, 47, 69, 67, 80, 59, 76,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 81,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,100,SYM,101, 95,102,103, /* 8X */ + 85,SYM,SYM,SYM,SYM,SYM,SYM,SYM,104,SYM,105,SYM,106,SYM,SYM,107, /* 9X */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,108,SYM,SYM,SYM,SYM,SYM, /* AX */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 106, 32, 34, 16, 35, 23, 31, 0, 8, 9, 7, 26, 19, 18, 25, 11, /* CX */ - 28, 6, 27, 12, 22, 21, 29,SYM, 20, 33, 10, 30, 40, 13, 15, 17, /* DX */ - 107, 1, 96, 3, 5, 14, 4,108,109, 89,110,111, 24, 2,100,112, /* EX */ - SYM,SYM,SYM,SYM, 95,SYM,SYM,SYM,SYM,113,SYM,114, 92,SYM,SYM,115, /* FX */ + 109, 32, 35, 17, 34, 24, 31, 0, 8, 9, 7, 27, 19, 18, 25, 11, /* CX */ + 29, 6, 26, 12, 21, 23, 28,SYM, 20, 33, 10, 30, 45, 13, 16, 14, /* DX */ + 96, 1, 99, 3, 5, 15, 4, 94, 92, 87,110,111, 22, 2,112, 98, /* EX */ + SYM,SYM,SYM,SYM, 93,SYM,SYM,SYM,SYM,113,SYM,114, 97,SYM,SYM,115, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ static const int Unicode_Char_size = 64; static const unsigned int Unicode_CharOrder[] = { - 65, 62, 66, 63, 67, 54, 69, 59, 73, 60, 83, 58, 84, 61, 97, 36, - 98, 55, 99, 46, 100, 48, 101, 37, 102, 53, 103, 56, 104, 50, 105, 38, - 108, 44, 109, 47, 110, 43, 111, 45, 112, 51, 114, 41, 115, 42, 116, 39, - 117, 52, 118, 57, 121, 49, 1569, 32, 1570, 34, 1571, 16, 1572, 35,1573, 23, - 1574, 31, 1575, 0, 1576, 8, 1577, 9, 1578, 7, 1579, 26, 1580, 19,1581, 18, - 1582, 25, 1583, 11, 1584, 28, 1585, 6, 1586, 27, 1587, 12, 1588, 22,1589, 21, - 1590, 29, 1591, 20, 1592, 33, 1593, 10, 1594, 30, 1600, 40, 1601, 13,1602, 15, - 1603, 17, 1604, 1, 1605, 3, 1606, 5, 1607, 14, 1608, 4, 1609, 24,1610, 2, + 65, 54, 66, 57, 67, 53, 68, 60, 73, 62, 77, 52, 83, 56, 84, 63, + 97, 37, 98, 58, 99, 46, 100, 48, 101, 36, 103, 51, 104, 49, 105, 38, + 107, 61, 108, 43, 109, 50, 110, 39, 111, 40, 112, 55, 114, 41, 115, 44, + 116, 42, 117, 47, 121, 59, 1569, 32, 1570, 35, 1571, 17, 1572, 34,1573, 24, + 1574, 31, 1575, 0, 1576, 8, 1577, 9, 1578, 7, 1579, 27, 1580, 19,1581, 18, + 1582, 25, 1583, 11, 1584, 29, 1585, 6, 1586, 26, 1587, 12, 1588, 21,1589, 23, + 1590, 28, 1591, 20, 1592, 33, 1593, 10, 1594, 30, 1600, 45, 1601, 13,1602, 16, + 1603, 14, 1604, 1, 1605, 3, 1606, 5, 1607, 15, 1608, 4, 1609, 22,1610, 2, }; /* Model Table: - * Total sequences: 1820 - * First 512 sequences: 0.9644868613755061 - * Next 512 sequences (512-1024): 0.03359397057105059 - * Rest: 0.0019191680534433112 + * Total considered sequences: 1932 / 4096 + * - Positive sequences: first 902 (0.9950136374489401) + * - Probable sequences: next 424 (1326-902) (0.003987703013712651) + * - Neutral sequences: last 2770 (0.0009986595373472351) + * - Negative sequences: 2164 (off-ratio) * Negative sequences: TODO */ static const PRUint8 ArabicLangModel[] = { - 1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,1,1,3,3,3,3,3,3,3, - 3,2,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0, + 2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,1,3,1,3,3,3,3,3,3,3, + 3,3,3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 1,3,2,3,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,3,3,3,3,3,3,3, + 3,3,3,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3, + 0,3,3,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2, - 0,2,3,2,0,0,0,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,1,2,3,3,3,2,3,2,2, - 2,2,0,2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,0,2,3,3,2,3,2,3,2, - 0,2,2,3,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2, - 2,2,2,1,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,1,1,1, - 3,2,3,3,3,2,2,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,1,2,2,2,3,3,2,3,2, - 0,3,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, - 3,2,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,1,3,2,2,3,2,3,3,3, - 2,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,2,3,2,3,3,2, - 0,2,2,2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,3,2,3,2,2,2,2,2,2,2,2,2, - 1,2,2,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,0,1,0,1,0,0,1,1,0,1,1,1,1,1,0,1,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0, + 3,3,2,3,0,1,0,1,1,0,0,0,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,1,3,3,3,3,3,3,3, + 0,3,1,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,0,3,3, + 2,0,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3, + 0,3,3,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3, + 2,1,2,1,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 2,0,2,2,2,1,0,2,2,0,1,1,1,1,2,2,0,1,1,1,0,0,0,0,1,1,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,0,3,3,2,3,3,0,2,0,2,2,3,3,0,2,0,3,3,2,3,0,0, - 0,3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,3,2,2,2,0,0,2,1,3,3,3,2,0,0,2,2, - 2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,2,2,3,2,2,3,0,0,0,2,2,1,1,0,0,1,1, - 0,0,0,2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, - 3,3,3,2,3,3,3,3,3,3,3,2,3,2,3,3,3,3,2,2,2,3,2,3,2,2,1,2,2,3,2,2, - 1,3,1,2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,2,3,3,0,3,2,2,2,2,0,2,0,3,2,0,2,0,2,0,0,2,3,2,0,0, - 0,1,0,2,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,2,0,1,2,1,3,3,2,0,2,0,1,2,2,2,0,0, - 0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,0,3,3,3,3,3,3,0,2,3,3,2,2,2,3,2, - 0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,2,2,3,2,2,2,2,2,2,2,1,1,2,1,1,2,3,3,3,1,2,1, - 0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,2,3,3,3,3,0,3,3,3,2,3,0,3,1,3,2,3,2,0,2,0,2,2,2,3,0,0, + 3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,0,0,3,3,3,3,3,0,0,3,3,3,3,0,0, + 0,3,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,2,3,1,0,3,3,3,0,0,2,3, + 3,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,0,0,3,0,0,0,0,1,3, + 0,0,3,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,2,3, + 1,3,2,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,2,1,2,2,3,3,2,3,2,0, + 0,0,1,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,0,3,2,2,3,3,2,0,0,3,2,3,3,1,0,0,3,0,3,3,0,0, + 0,2,3,1,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,1,2,1,3,3,3,3,0,0,2,0,3,3,0,0, + 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,0,3,3,3,3,3,3,2, + 0,3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,0,1,3,3,3,3,3,0,0,3,3,3,3,0,0, + 0,3,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,1,0,1,2,1,0,0,3,1,0,3,3,3, + 0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,3,3,3,3,0,3,2,3,0,0,1,0,0,0,0,2,3, + 2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,1,2,0,0,3,1,0,0,2,3,3, + 0,1,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,0,3,1,3,3,0,3,0,3,0,3,3,0,3,0,0,0,0,3,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,3,3,3,0,3,3,3,3,0,3,3,3,3,3,2,3,0,3,3,3,3,0,3,0,3,3,3,3,3,3,0, 0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,2,1,2,1,0,0,1,0,1,0,1,3,2,0,2,2, - 0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,2,3,3,3,2,3,3,3,2,2,2,2,3,2,0,2,0,2,0,1,0,2,1,0,1,0,0,2,2, - 1,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,2,3,3,2,3,0,3,2,2,0,1,3,0,2,2,0,0,2,2,0,0,0,0,2,0, + 3,3,3,3,3,3,3,3,3,3,0,3,3,3,1,3,0,0,0,1,3,3,1,3,0,1,3,0,3,3,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,2,3,3,3,2,3,3,3,2,3,2,1,3,3,2,2,3,2,2,2,0,1,0,2,3,0,0,2,0,2,2, - 0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,3,3,3,1,3,2,2,2,0,3,2,3,2,2,3,0,2,2,2,2,2,2,0,0,2,2,2,3,3,2,0, - 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,2,3,3,3,2,1,3,2,2,2,0,0,0,0,1,3,3,2,0,0,1,0,2,3,2,0,0, - 0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,2,2,3,2,2,1,2,2,3,2,1,0,0,1,0,0,0,1,0,1,0,0,0,1,0, - 0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,2,2,3,2,2,1,2,2,2,1,2,2,2,0,0,0,0,2,2,0,1,0,0,1,2, - 2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,2,2,2,2,2,2,2,2,0,0,2,3,1,0,3,1,2,0,0,0,0,2,1,0,0,0,0,0,1, - 0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,2,3,3,3,1,3,2,2,2,3,2,0,2,2,0,0,0,2,2,2,0,0,0,2,2,0,0,0,0,2,1, - 0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,2,3,0,2,2,2,2,0,0,1,0,1,2,1,2,0,1,1,0,2,2,2,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,2,0,1,2,0,0,3,2,0,0,0,1,3, + 3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,2,2,0,2,1,3,3,1,0,0,0,1,2,0,1,0,0,1,0,0,1,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,1,3,3,3,3,3,3,0,3,0,3,0,2,3,2,3,0,3,0,0,3,0,0,1,0,3,2, + 0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,2,3,3,3,3,3,3,3,0,0,2,3,3,0,0,0,3,0,0,1,0,0,2,0,0,0,0,0,1, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,1,3,3,2,1,2,0,0,0,1,3,2,2,1,0,1,3,0,3,3,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,3,3,2,2,2,3,2,2,3,2,3,2,2,2,2,0,2,2,2,2,2,2,0,1,0,0,2,2,1,1,0, - 0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,1,0,1,1,0,0,2,0,2,0,0,1,0,2,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,2,1,2,0,0,3,1,1,3,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,1,1,1,2,0,0,3,0,3,0,0,0,0,1,3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,2,2,3,2,2,3,2,1,2,1,0,0,2,3,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0, + 3,3,3,3,3,2,3,2,2,3,0,0,0,3,0,3,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,3,2,2,2,3,2,2,2,1,0,2,2,2,1,0,0,2,2,1,0,0,2,0,0,3,2,2,2,0,1,1, - 1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,2,2,2,2,2,2,2,0,1,0,2,2,0,2,2,0,2,0,1,0,0,2,0,1,2,2,0,1,0,0,0, + 3,3,3,3,3,2,3,3,1,0,0,3,3,0,3,3,3,0,2,2,0,2,1,0,0,2,0,2,0,2,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,3,3,3,2,3,3,3,3,1,0,3,3,2,2,1,1,0,1,2,0,2,0,0,0,3,1,3,0,3,2,0, + 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,1,2,2,2,0,2,2,2,2,1,2,2,2,2,1,1,1,1,1,2,2,1,0,0,0,1,0,0, + 0,0,0,0,3,3,2,3,2,3,3,3,3,0,2,2,3,0,2,2,0,0,0,2,0,1,2,2,0,2,1,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,2,1,1,2,0,2,2,2,2,2,2,2,2,1,1,2,2,2,0,2,2,2,0,0,1,0,0,0, + 0,0,0,0,2,2,3,3,1,3,3,3,3,0,3,3,2,2,3,2,1,0,0,2,0,0,3,2,0,2,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,2,2,1,2,0,2,2,2,2,2,2,2,2,1,0,1,1,1,0,2,2,2,0,0,0,0,0,0, + 0,0,0,0,3,3,2,3,3,3,3,3,3,0,3,2,2,1,3,2,0,1,1,2,0,0,2,1,0,2,0,1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,2,2,2,1,0,2,2,1,1,2,1,1,0,2,2,1,2,1,1,0,0,1,0,0,1,0,0,0, - 1,1,0,1,0,1,0,0,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,1,0,1,1, - 0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,3,3,3,3,2,2,3,3,3,0,3,2,3,1,1,3,0,0,0,0,1,1,2,2,0,2,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,2,2,2,2,0,1,2,2,1,2,1,2,2,1,2,1,2,0,1,1,1,1,0,0,0,0,0,0, + 0,0,0,0,2,2,2,3,2,3,3,3,3,0,2,3,2,2,3,2,0,1,1,3,0,0,2,1,0,2,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,2,2,2,2,0,0,2,1,1,2,1,1,0,1,1,2,1,0,0,0,1,1,0,0,0,0,0,0, + 0,0,0,0,3,3,3,2,3,2,2,2,3,0,2,2,2,1,2,2,0,0,0,2,0,0,2,2,0,2,0,0, 0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,2,2,2,2,0,1,2,2,1,2,2,1,2,1,0,0,2,1,1,0,2,1,1,0,0,0,0,0, + 0,0,0,0,3,3,3,0,3,2,2,1,3,0,2,3,0,3,1,0,0,0,0,1,0,1,1,2,0,1,1,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,3,3,3,0,3,0,2,3,2,0,1,2,2,2,2,2,2,0,1,0,0,0,0,2,0,2,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,3,2,3,1,3,1,3,1,2,0,2,2,1,3,2,0,0,0,0,2,0,0,1,1,0,2,0,1, + 2,1,1,2,2,1,1,2,1,1,0,1,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,1,2,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,3,3,3,0,3,2,2,3,2,0,1,2,0,3,0,0,0,0,0,0,0,2,0,3,0,2,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,2,2,2,1,0,0,1,0,2,2,1,1,1,1,0,0,1,1,0,1,0,0,1,0,0,0,0,0, + 0,0,0,0,2,2,2,3,1,3,3,3,3,0,2,0,2,0,2,2,0,0,0,2,0,0,2,2,0,2,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,1,1,1,2,0,2,2,2,2,1,1,2,2,1,1,2,2,2,0,1,2,2,0,0,0,0,0,0, + 0,0,0,0,3,3,3,1,2,1,1,1,2,0,0,2,0,1,0,1,0,0,0,1,1,1,1,1,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,2,2,2,2,0,1,0,0,1,2,1,0,2,1,2,0,2,1,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,3,3,3,1,3,2,2,2,1,0,0,2,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,2,2,2,0,0,0,1,1,0,2,1,2,0,1,1,1,2,0,0,2,0,1,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,2,2,2,0,0,1,1,0,0,2,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0, - 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,2,1,0,0,0,1,2,1,2,1,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0, + 0,0,0,0,3,3,3,1,2,1,1,0,1,0,0,2,0,0,2,1,0,0,1,2,1,0,3,1,0,1,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,2,2,2,2,0,2,1,1,0,2,1,0,0,1,0,0,1,0,0,2,0,0,0,0,0,0,0,0, + 0,0,0,0,3,3,2,0,2,2,0,2,2,0,1,2,1,2,1,1,0,0,0,0,0,0,1,1,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,2,2,1,1,0,2,2,1,2,2,0,0,0,0,2,1,1,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,2,3,2,0,2,0,0,0,0,0,1,3,1,1,0,0,0,1,2,1,2,1,0,2,2,0,2,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,1,1,2,2,0,2,2,2,2,1,1,2,1,1,1,1,1,1,0,1,1,1,0,0,0,1,0,0, + 0,0,0,0,2,2,2,0,3,2,0,1,1,0,0,1,0,2,0,0,2,0,1,0,2,1,0,0,1,0,1,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,1,1,1,2,0,2,0,0,1,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0, + 0,0,0,0,1,0,1,2,0,2,2,2,1,0,1,1,1,0,1,1,1,2,1,1,1,1,1,0,1,0,0,1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,1,1,1,0,0,1,0,0,1,2,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,1,0, + 0,0,0,0,3,3,2,0,2,2,2,3,1,0,0,1,0,2,1,0,0,0,0,2,0,0,0,1,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,2,1,1,0,0,1,1,1,2,1,1,0,1,1,0,0,1,2,0,1,0,0,0,0,0,0,0,0, + 0,0,0,0,2,2,2,1,2,0,2,1,0,0,2,2,0,2,1,0,0,2,2,2,2,1,0,1,1,0,1,1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,2,2,1,0,0,1,1,1,1,1,0,0,0,1,2,0,1,0,0,1,1,0,1,0,0,0,0,0, + 0,0,0,0,2,2,2,0,2,3,0,2,0,0,0,2,0,0,0,0,0,2,2,0,1,2,0,0,1,0,2,1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,1,2,2,1,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,2,3,2,0,2,2,0,2,1,0,0,2,0,0,1,0,0,0,0,1,0,0,1,2,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,1,1,1,1,0,0,0,1,0,1,1,0,0,1,1,0,2,0,0,0,0,1,1,1,1,0,1,1, + 0,0,0,0,1,2,1,1,2,1,1,1,2,0,3,1,1,1,1,0,0,1,0,1,1,0,1,0,0,0,0,1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,1,1,0,0, + 0,0,0,0,2,2,2,0,2,1,0,1,0,0,0,2,0,0,0,0,2,2,1,0,1,2,2,1,1,0,0,1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,1,0,1,1,2,0,0,1,1,0,0,0,0,0,0,1,1,0,1,2,1,1,1,1,0, - 0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,1,1,1,0,0,1,0,0,0,1,0,0,0,1,2,0,1,0,1,0,0,0,0,0,0,0,1,0, + 0,0,0,0,2,2,2,1,2,0,0,1,2,0,0,2,0,2,0,1,0,0,0,0,0,0,0,1,0,1,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,1,0,1,0,1,1,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0, + 0,0,0,0,0,2,0,2,1,2,0,0,1,0,0,1,0,0,0,0,2,1,2,0,2,2,0,0,1,0,1,1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1, + 0,0,0,0,2,1,2,0,2,2,0,0,0,0,0,2,0,3,0,0,1,1,1,0,1,1,0,1,0,0,1,1, }; @@ -263,7 +264,7 @@ const SequenceModel Iso_8859_6ArabicModel = Iso_8859_6_CharToOrderMap, ArabicLangModel, 64, - (float)0.9644868613755061, + (float)0.9990013404626528, PR_FALSE, "ISO-8859-6", "ar" @@ -274,7 +275,7 @@ const SequenceModel Windows_1256ArabicModel = Windows_1256_CharToOrderMap, ArabicLangModel, 64, - (float)0.9644868613755061, + (float)0.9990013404626528, PR_FALSE, "WINDOWS-1256", "ar" @@ -287,5 +288,8 @@ const LanguageModel ArabicModel = 64, ArabicLangModel, 64, - (float)0.9644868613755061, + 4, + (float)0.4061396689475064, + 27, + (float)0.034834360917409636, }; diff --git a/src/LangModels/LangCroatianModel.cpp b/src/LangModels/LangCroatianModel.cpp index e1410b8..dc77fc9 100644 --- a/src/LangModels/LangCroatianModel.cpp +++ b/src/LangModels/LangCroatianModel.cpp @@ -42,7 +42,7 @@ /** * Generated by BuildLangModel.py - * On: 2021-03-16 19:18:55.486472 + * On: 2022-12-14 18:02:00.881155 **/ /* Character Mapping Table: @@ -68,18 +68,18 @@ static const unsigned char Iso_8859_2_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 11, 4, 3, /* 4X */ - 14, 30, 6, 8, 5, 12, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 11, 4, 3, /* 6X */ - 14, 30, 6, 8, 5, 12, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 0, 18, 20, 14, 2, 23, 17, 21, 1, 7, 11, 10, 12, 4, 3, /* 4X */ + 15, 30, 5, 8, 6, 9, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 18, 20, 14, 2, 23, 17, 21, 1, 7, 11, 10, 12, 4, 3, /* 6X */ + 15, 30, 5, 8, 6, 9, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM, 49,SYM, 38,SYM, 50, 51,SYM,SYM, 23, 52, 53, 54,SYM, 24, 55, /* AX */ - SYM, 56,SYM, 38,SYM, 57, 58,SYM,SYM, 23, 59, 60, 61,SYM, 24, 62, /* BX */ - 63, 39, 46, 64, 36, 65, 25, 43, 18, 31, 66, 45, 67, 68, 69, 70, /* CX */ - 26, 71, 72, 47, 73, 74, 32,SYM, 75, 76, 48, 77, 33, 78, 79, 80, /* DX */ - 81, 39, 46, 82, 36, 83, 25, 43, 18, 31, 84, 45, 85, 86, 87, 88, /* EX */ - 26, 89, 90, 47, 91, 92, 32,SYM, 93, 94, 48, 95, 33, 96, 97,SYM, /* FX */ + SYM, 50,SYM, 44,SYM, 51, 52,SYM,SYM, 22, 53, 54, 55,SYM, 24, 56, /* AX */ + SYM, 57,SYM, 44,SYM, 58, 59,SYM,SYM, 22, 60, 61, 62,SYM, 24, 63, /* BX */ + 64, 36, 39, 65, 34, 66, 25, 67, 19, 32, 68, 69, 70, 43, 71, 72, /* CX */ + 26, 47, 73, 42, 74, 75, 31,SYM, 76, 77, 78, 79, 33, 80, 81, 82, /* DX */ + 83, 36, 39, 84, 34, 85, 25, 86, 19, 32, 87, 88, 89, 43, 90, 91, /* EX */ + 26, 47, 92, 42, 93, 94, 31,SYM, 95, 96, 97, 98, 33, 99,100,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -89,18 +89,18 @@ static const unsigned char Iso_8859_13_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 11, 4, 3, /* 4X */ - 14, 30, 6, 8, 5, 12, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 11, 4, 3, /* 6X */ - 14, 30, 6, 8, 5, 12, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 0, 18, 20, 14, 2, 23, 17, 21, 1, 7, 11, 10, 12, 4, 3, /* 4X */ + 15, 30, 5, 8, 6, 9, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 18, 20, 14, 2, 23, 17, 21, 1, 7, 11, 10, 12, 4, 3, /* 6X */ + 15, 30, 5, 8, 6, 9, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 35,SYM, 98,SYM,SYM,SYM,SYM, 99, /* AX */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 35,SYM,100,SYM,SYM,SYM,SYM,101, /* BX */ - 102,103,104, 25, 36,105,106,107, 18, 31,108,109,110,111,112,113, /* CX */ - 23,114,115, 47, 40, 37, 32,SYM,116, 38,117,118, 33,119, 24,120, /* DX */ - 121,122,123, 25, 36,124,125,126, 18, 31,127,128,129,130,131,132, /* EX */ - 23,133,134, 47, 40, 37, 32,SYM,135, 38,136,137, 33,138, 24,SYM, /* FX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 35,SYM,101,SYM,SYM,SYM,SYM,102, /* AX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 35,SYM,103,SYM,SYM,SYM,SYM,104, /* BX */ + 105,106, 38, 25, 34, 40,107, 48, 19, 32,108,109,110,111, 49,112, /* CX */ + 22, 47,113, 42, 45,114, 31,SYM,115, 44,116,117, 33,118, 24,119, /* DX */ + 120,121, 38, 25, 34, 40,122, 48, 19, 32,123,124,125,126, 49,127, /* EX */ + 22, 47,128, 42, 45,129, 31,SYM,130, 44,131,132, 33,133, 24,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -110,18 +110,18 @@ static const unsigned char Iso_8859_16_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 11, 4, 3, /* 4X */ - 14, 30, 6, 8, 5, 12, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 11, 4, 3, /* 6X */ - 14, 30, 6, 8, 5, 12, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 0, 18, 20, 14, 2, 23, 17, 21, 1, 7, 11, 10, 12, 4, 3, /* 4X */ + 15, 30, 5, 8, 6, 9, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 18, 20, 14, 2, 23, 17, 21, 1, 7, 11, 10, 12, 4, 3, /* 6X */ + 15, 30, 5, 8, 6, 9, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,139,140, 38,SYM,SYM, 23,SYM, 23,SYM,141,SYM,142,SYM,143,144, /* AX */ - SYM,SYM, 18, 38, 24,SYM,SYM,SYM, 24, 18,145,SYM, 44, 44,146,147, /* BX */ - 42, 39, 46,148, 36, 25,149, 43, 34, 31, 41, 45,150,151,152,153, /* CX */ - 26,154,155, 47,156,157, 32,158,159,160, 48,161, 33,162,163,164, /* DX */ - 42, 39, 46,165, 36, 25,166, 43, 34, 31, 41, 45,167,168,169,170, /* EX */ - 26,171,172, 47,173,174, 32,175,176,177, 48,178, 33,179,180,181, /* FX */ + SYM,134,135, 44,SYM,SYM, 22,SYM, 22,SYM,136,SYM,137,SYM,138,139, /* AX */ + SYM,SYM, 19, 44, 24,SYM,SYM,SYM, 24, 19,140,SYM,141,142,143,144, /* BX */ + 41, 36, 39,145, 34, 25,146,147, 37, 32, 46,148,149, 43,150,151, /* CX */ + 26, 47,152, 42,153,154, 31,155,156,157,158,159, 33,160,161,162, /* DX */ + 41, 36, 39,163, 34, 25,164,165, 37, 32, 46,166,167, 43,168,169, /* EX */ + 26, 47,170, 42,171,172, 31,173,174,175,176,177, 33,178,179,180, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -131,18 +131,18 @@ static const unsigned char Windows_1250_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 11, 4, 3, /* 4X */ - 14, 30, 6, 8, 5, 12, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 11, 4, 3, /* 6X */ - 14, 30, 6, 8, 5, 12, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM, 23,SYM,182,183, 24,184, /* 8X */ - ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 23,SYM,185,186, 24,187, /* 9X */ - SYM,SYM,SYM, 38,SYM,188,SYM,SYM,SYM,SYM,189,SYM,SYM,SYM,SYM,190, /* AX */ - SYM,SYM,SYM, 38,SYM,SYM,SYM,SYM,SYM,191,192,SYM,193,SYM,194,195, /* BX */ - 196, 39, 46,197, 36,198, 25, 43, 18, 31,199, 45,200,201,202,203, /* CX */ - 26,204,205, 47,206,207, 32,SYM,208,209, 48,210, 33,211,212,213, /* DX */ - 214, 39, 46,215, 36,216, 25, 43, 18, 31,217, 45,218,219,220,221, /* EX */ - 26,222,223, 47,224,225, 32,SYM,226,227, 48,228, 33,229,230,SYM, /* FX */ + SYM, 0, 18, 20, 14, 2, 23, 17, 21, 1, 7, 11, 10, 12, 4, 3, /* 4X */ + 15, 30, 5, 8, 6, 9, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 18, 20, 14, 2, 23, 17, 21, 1, 7, 11, 10, 12, 4, 3, /* 6X */ + 15, 30, 5, 8, 6, 9, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM, 22,SYM,181,182, 24,183, /* 8X */ + ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 22,SYM,184,185, 24,186, /* 9X */ + SYM,SYM,SYM, 44,SYM,187,SYM,SYM,SYM,SYM,188,SYM,SYM,SYM,SYM,189, /* AX */ + SYM,SYM,SYM, 44,SYM,SYM,SYM,SYM,SYM,190,191,SYM,192,SYM,193,194, /* BX */ + 195, 36, 39,196, 34,197, 25,198, 19, 32,199,200,201, 43,202,203, /* CX */ + 26, 47,204, 42,205,206, 31,SYM,207,208,209,210, 33,211,212,213, /* DX */ + 214, 36, 39,215, 34,216, 25,217, 19, 32,218,219,220, 43,221,222, /* EX */ + 26, 47,223, 42,224,225, 31,SYM,226,227,228,229, 33,230,231,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -152,17 +152,17 @@ static const unsigned char Ibm852_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 11, 4, 3, /* 4X */ - 14, 30, 6, 8, 5, 12, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 11, 4, 3, /* 6X */ - 14, 30, 6, 8, 5, 12, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */ - 43, 33, 31, 46, 36,231, 25, 43, 38, 45,232,233,234,235, 36, 25, /* 8X */ - 31,236,237,238, 32,239,240,241,242, 32, 33,243,244, 38,SYM, 18, /* 9X */ - 39,245, 47, 48,246,247, 24, 24,248,249,SYM,249, 18,249,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 39, 46,249,249,SYM,SYM,SYM,SYM,249,249,SYM, /* BX */ + SYM, 0, 18, 20, 14, 2, 23, 17, 21, 1, 7, 11, 10, 12, 4, 3, /* 4X */ + 15, 30, 5, 8, 6, 9, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 18, 20, 14, 2, 23, 17, 21, 1, 7, 11, 10, 12, 4, 3, /* 6X */ + 15, 30, 5, 8, 6, 9, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */ + 232, 33, 32, 39, 34,233, 25,234, 44,235,236,237,238,239, 34, 25, /* 8X */ + 32,240,241,242, 31,243,244,245,246, 31, 33,247,248, 44,SYM, 19, /* 9X */ + 36, 43, 42,249,249,249, 24, 24,249,249,SYM,249, 19,249,SYM,SYM, /* AX */ + SYM,SYM,SYM,SYM,SYM, 36, 39,249,249,SYM,SYM,SYM,SYM,249,249,SYM, /* BX */ SYM,SYM,SYM,SYM,SYM,SYM,249,249,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* CX */ - 26, 26,249, 45,249,249,249,249,249,SYM,SYM,SYM,SYM,249,249,SYM, /* DX */ - 47,249,249,249,249,249, 23, 23,249, 48,249,249,249,249,249,SYM, /* EX */ + 26, 26,249,249,249,249, 43,249,249,SYM,SYM,SYM,SYM,249,249,SYM, /* DX */ + 42,249,249, 47, 47,249, 22, 22,249,249,249,249,249,249,249,SYM, /* EX */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,249,249,249,SYM,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -173,75 +173,76 @@ static const unsigned char Mac_Centraleurope_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 11, 4, 3, /* 4X */ - 14, 30, 6, 8, 5, 12, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 19, 20, 15, 2, 22, 17, 21, 1, 7, 9, 10, 11, 4, 3, /* 6X */ - 14, 30, 6, 8, 5, 12, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */ - 36,249,249, 31,249, 32, 33, 39,249, 18, 36, 18, 25, 25, 31,249, /* 8X */ - 249,249,249,249,249,249,249, 47,249,249, 32, 37, 48,249,249, 33, /* 9X */ + SYM, 0, 18, 20, 14, 2, 23, 17, 21, 1, 7, 11, 10, 12, 4, 3, /* 4X */ + 15, 30, 5, 8, 6, 9, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 18, 20, 14, 2, 23, 17, 21, 1, 7, 11, 10, 12, 4, 3, /* 6X */ + 15, 30, 5, 8, 6, 9, 13, 28, 29, 27, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */ + 34, 38, 38, 32,249, 31, 33, 36,249, 19, 34, 19, 25, 25, 32,249, /* 8X */ + 249,249, 43,249, 48, 48,249, 42,249,249, 31,249,249,249,249, 33, /* 9X */ SYM,SYM,249,SYM,SYM,SYM,SYM,249,SYM,SYM,SYM,249,SYM,SYM,249,249, /* AX */ - 249,249,SYM,SYM,249,249,SYM,SYM, 38,249,249,249,249,249,249,249, /* BX */ - 249,249,SYM,SYM,249,249,SYM,SYM,SYM,SYM,SYM,249,249, 37,249, 40, /* CX */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 40,249,249,249,SYM,SYM,249,249, /* DX */ - 249, 23,SYM,SYM, 23,249,249, 39,249,249,249, 24, 24,249, 47,249, /* EX */ - 249,249, 48,249,249,249,249,249,249,249,249,249, 38,249,249,SYM, /* FX */ + 249, 49,SYM,SYM, 49,249,SYM,SYM, 44,249,249,249,249,249,249,249, /* BX */ + 249, 47,SYM,SYM, 47,249,SYM,SYM,SYM,SYM,SYM,249,249,249,249, 45, /* CX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 45,249,249,249,SYM,SYM,249,249, /* DX */ + 249, 22,SYM,SYM, 22,249,249, 36,249,249, 43, 24, 24,249, 42,249, /* EX */ + 249,249,249,249,249,249,249,249,249,249,249,249, 44,249,249,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ static const int Unicode_Char_size = 62; static const unsigned int Unicode_CharOrder[] = { - 65, 0, 66, 19, 67, 20, 68, 15, 69, 2, 70, 22, 71, 17, 72, 21, - 73, 1, 74, 7, 75, 9, 76, 10, 77, 11, 78, 4, 79, 3, 80, 14, - 81, 30, 82, 6, 83, 8, 84, 5, 85, 12, 86, 13, 87, 28, 88, 29, - 89, 27, 90, 16, 97, 0, 98, 19, 99, 20, 100, 15, 101, 2,102, 22, - 103, 17, 104, 21, 105, 1, 106, 7, 107, 9, 108, 10, 109, 11,110, 4, - 111, 3, 112, 14, 113, 30, 114, 6, 115, 8, 116, 5, 117, 12,118, 13, - 119, 28, 120, 29, 121, 27, 122, 16, 262, 25, 263, 25, 268, 18,269, 18, - 272, 26, 273, 26, 352, 23, 353, 23, 381, 24, 382, 24, + 65, 0, 66, 18, 67, 20, 68, 14, 69, 2, 70, 23, 71, 17, 72, 21, + 73, 1, 74, 7, 75, 11, 76, 10, 77, 12, 78, 4, 79, 3, 80, 15, + 81, 30, 82, 5, 83, 8, 84, 6, 85, 9, 86, 13, 87, 28, 88, 29, + 89, 27, 90, 16, 97, 0, 98, 18, 99, 20, 100, 14, 101, 2,102, 23, + 103, 17, 104, 21, 105, 1, 106, 7, 107, 11, 108, 10, 109, 12,110, 4, + 111, 3, 112, 15, 113, 30, 114, 5, 115, 8, 116, 6, 117, 9,118, 13, + 119, 28, 120, 29, 121, 27, 122, 16, 262, 25, 263, 25, 268, 19,269, 19, + 272, 26, 273, 26, 352, 22, 353, 22, 381, 24, 382, 24, }; /* Model Table: - * Total sequences: 725 - * First 512 sequences: 0.9990568119867879 - * Next 512 sequences (512-1024): 0.0009431880132121777 - * Rest: -4.0440741033709315e-17 + * Total considered sequences: 809 / 961 + * - Positive sequences: first 410 (0.9950584932401488) + * - Probable sequences: next 135 (545-410) (0.0039469726326971655) + * - Neutral sequences: last 416 (0.000994534127154001) + * - Negative sequences: 152 (off-ratio) * Negative sequences: TODO */ static const PRUint8 CroatianLangModel[] = { - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,0,2,2,2,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,2,3,3,3,0,0,0,0,3,2,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,0,0, - 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,2,3,3,3,2,2,3,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,3,3,3,3,0,0,0,0,3,2,0,3, - 3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,2,2,3,3,0,3,2,2,3,0,2,0,2,3,0,0, - 3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,3,0,3,3,2,3,0,0,0,0,3,2,2,0, - 3,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3,0,3,2,3,3,2,3,0,0,0,0,2,3,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,0,2,0, - 3,3,3,3,3,0,3,3,3,3,3,2,3,0,2,3,0,0,2,2,3,2,2,3,0,0,0,2,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,2,2,2,2,3,3,0,2,0,3,0,2,0,2,0, - 3,3,3,3,3,2,3,3,3,0,3,3,3,3,0,2,3,3,0,3,2,2,3,0,3,0,0,2,3,2,0, - 3,3,3,3,3,0,3,3,2,0,3,3,3,3,0,3,2,3,0,3,0,3,0,0,0,0,0,2,2,0,0, - 3,3,3,3,3,2,3,2,2,0,3,3,3,2,2,3,3,2,0,0,0,3,2,0,0,0,0,2,2,0,0, - 3,3,3,3,3,0,2,3,0,3,3,0,3,3,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,2,3,3,2,0,3,2,3,2,0,3,3,0,0,2,3,0,3,0,0,0,0,3,0,0,2, - 3,3,3,3,2,3,3,3,3,3,3,3,3,2,0,3,0,2,0,0,2,3,0,0,0,0,0,2,0,0,3, - 3,3,3,3,3,3,3,0,3,2,3,3,3,3,0,2,3,2,0,3,3,2,2,0,0,0,0,3,2,2,0, - 3,3,3,3,3,3,3,2,3,2,3,2,3,0,2,2,0,2,0,0,0,0,3,0,0,0,0,0,0,0,0, - 3,3,3,2,3,3,2,0,0,3,3,2,3,3,3,0,0,0,3,0,2,0,0,0,0,3,0,0,0,0,0, - 3,3,3,3,3,0,2,2,0,0,2,0,3,0,0,3,0,0,0,3,0,0,0,0,0,0,2,0,0,0,0, - 3,3,3,3,3,0,0,0,0,2,2,0,3,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0, - 3,3,3,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,2,3,3,2,2,2,2,3,2,3,2,3,0,0,0,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,2,3,2,2,3,0,0,0,3,0,0,0,0,0,0,2,2,3,2,0,0,0,0,2,2,0,2, - 3,3,2,0,0,2,2,0,0,0,0,0,2,2,2,0,0,2,0,0,0,0,2,0,0,0,0,0,3,0,0, - 0,3,0,0,0,0,3,0,0,0,0,0,3,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,0,2,1, + 3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,0, + 3,3,3,3,3,2,3,3,3,3,2,3,2,2,3,3,3,3,2,3,3,2,1,3,2,0,1,2,1,1,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,2,2,2,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,1,0,2,3,0,1,0,0,0,3,2,0,0, + 3,3,3,3,3,2,2,2,3,3,2,3,3,3,2,2,2,1,2,3,2,1,2,1,1,0,0,1,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,1,1,2,1,3,3,0,3,1,0,0,2,1,0,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,1,0,1,0, + 3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,2,2,2,3,0,3,2,0,3,1,0,0,2,1,2,0, + 3,3,3,3,3,3,3,2,3,3,3,1,2,3,0,2,1,2,0,2,3,2,2,1,0,1,0,2,1,0,0, + 3,3,3,3,3,3,2,3,3,3,3,2,3,1,2,3,0,2,3,1,2,2,0,2,0,1,0,1,1,0,0, + 3,3,3,3,3,3,1,3,3,3,3,2,0,0,2,1,0,1,1,2,2,0,2,1,0,0,0,2,0,1,0, + 3,3,3,3,3,3,2,3,3,3,3,0,3,3,2,2,2,3,3,1,1,2,0,1,3,0,0,2,2,1,1, + 3,3,3,3,3,3,3,3,3,3,3,3,2,1,1,2,1,1,1,2,3,3,1,1,0,3,0,1,0,1,0, + 3,3,3,3,3,3,1,2,1,3,3,0,3,3,3,0,1,3,3,0,1,2,0,0,0,0,0,1,1,0,0, + 3,3,3,3,3,3,2,1,2,3,3,1,2,1,3,1,2,2,1,0,1,2,0,1,0,0,0,2,1,0,0, + 3,3,3,3,3,3,1,3,1,3,3,0,1,1,2,0,3,0,1,0,1,1,0,2,0,0,0,2,0,0,1, + 3,3,3,3,3,0,0,3,0,3,3,3,1,3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,2,3,3,3,2,3,3,3,2,2,3,1,1,0,1,0,2,3,0,1,0,0,0,1,0,0,2, + 3,3,3,3,3,3,3,1,2,3,3,1,2,3,1,0,2,2,3,0,2,2,0,1,0,0,0,2,1,1,0, + 3,3,3,2,3,1,3,0,0,3,3,3,1,3,0,2,0,0,0,2,1,0,0,0,0,3,0,0,0,0,0, + 3,3,3,3,2,3,2,1,2,3,3,1,1,0,0,1,0,1,1,0,1,0,0,2,0,0,0,0,0,0,1, + 3,3,3,2,3,0,0,2,0,3,1,0,0,1,2,0,0,0,3,0,0,0,0,0,0,0,1,0,0,0,0, + 3,3,3,3,3,0,0,1,0,3,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0, + 3,3,3,1,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 2,2,2,2,1,2,1,2,3,2,3,1,1,0,1,1,1,1,1,0,2,0,0,0,0,0,0,0,0,0,1, + 3,3,3,2,2,1,2,1,2,2,1,1,1,1,0,0,0,0,1,0,1,1,0,1,0,0,0,1,1,0,1, + 2,2,1,0,1,1,1,0,0,1,1,0,0,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,3,1,0, + 0,2,0,0,0,1,0,0,0,2,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, }; @@ -250,7 +251,7 @@ const SequenceModel Iso_8859_2CroatianModel = Iso_8859_2_CharToOrderMap, CroatianLangModel, 31, - (float)0.9990568119867879, + (float)0.999005465872846, PR_TRUE, "ISO-8859-2", "hr" @@ -261,7 +262,7 @@ const SequenceModel Iso_8859_13CroatianModel = Iso_8859_13_CharToOrderMap, CroatianLangModel, 31, - (float)0.9990568119867879, + (float)0.999005465872846, PR_TRUE, "ISO-8859-13", "hr" @@ -272,7 +273,7 @@ const SequenceModel Iso_8859_16CroatianModel = Iso_8859_16_CharToOrderMap, CroatianLangModel, 31, - (float)0.9990568119867879, + (float)0.999005465872846, PR_TRUE, "ISO-8859-16", "hr" @@ -283,7 +284,7 @@ const SequenceModel Windows_1250CroatianModel = Windows_1250_CharToOrderMap, CroatianLangModel, 31, - (float)0.9990568119867879, + (float)0.999005465872846, PR_TRUE, "WINDOWS-1250", "hr" @@ -294,7 +295,7 @@ const SequenceModel Ibm852CroatianModel = Ibm852_CharToOrderMap, CroatianLangModel, 31, - (float)0.9990568119867879, + (float)0.999005465872846, PR_TRUE, "IBM852", "hr" @@ -305,7 +306,7 @@ const SequenceModel Mac_CentraleuropeCroatianModel = Mac_Centraleurope_CharToOrderMap, CroatianLangModel, 31, - (float)0.9990568119867879, + (float)0.999005465872846, PR_TRUE, "MAC-CENTRALEUROPE", "hr" @@ -318,5 +319,8 @@ const LanguageModel CroatianModel = 62, CroatianLangModel, 31, - (float)0.9990568119867879, + 4, + (float)0.3895378997873738, + 19, + (float)0.039852386200447516, }; diff --git a/src/LangModels/LangCzechModel.cpp b/src/LangModels/LangCzechModel.cpp index 75d9dea..c2f49ff 100644 --- a/src/LangModels/LangCzechModel.cpp +++ b/src/LangModels/LangCzechModel.cpp @@ -42,7 +42,7 @@ /** * Generated by BuildLangModel.py - * On: 2021-03-16 18:50:25.564246 + * On: 2022-12-14 18:07:45.113195 **/ /* Character Mapping Table: @@ -68,18 +68,18 @@ static const unsigned char Iso_8859_2_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 3, 22, 14, 13, 1, 31, 30, 17, 4, 21, 11, 10, 16, 2, 0, /* 4X */ - 9, 39, 8, 6, 5, 15, 7, 35, 34, 20, 18,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 3, 22, 14, 13, 1, 31, 30, 17, 4, 21, 11, 10, 16, 2, 0, /* 6X */ - 9, 39, 8, 6, 5, 15, 7, 35, 34, 20, 18,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 2, 22, 16, 14, 1, 31, 30, 17, 5, 21, 10, 9, 11, 3, 0, /* 4X */ + 13, 40, 7, 6, 4, 12, 8, 35, 34, 20, 19,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 2, 22, 16, 14, 1, 31, 30, 17, 5, 21, 10, 9, 11, 3, 0, /* 6X */ + 13, 40, 7, 6, 4, 12, 8, 35, 34, 20, 19,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM, 48,SYM, 49,SYM, 44, 45,SYM,SYM, 29, 50, 38, 51,SYM, 26, 52, /* AX */ - SYM, 53,SYM, 54,SYM, 44, 45,SYM,SYM, 29, 55, 38, 56,SYM, 26, 57, /* BX */ - 58, 19, 59, 60, 41, 61, 62, 63, 25, 24, 64, 65, 23, 12, 66, 40, /* CX */ - 67, 68, 36, 37, 69, 70, 42,SYM, 27, 32, 33, 71, 43, 28, 72, 46, /* DX */ - 73, 19, 74, 75, 41, 76, 77, 78, 25, 24, 79, 80, 23, 12, 81, 40, /* EX */ - 82, 83, 36, 37, 84, 85, 42,SYM, 27, 32, 33, 86, 43, 28, 87,SYM, /* FX */ + SYM, 65,SYM, 46,SYM, 51, 54,SYM,SYM, 29, 61, 38, 70,SYM, 28, 58, /* AX */ + SYM, 65,SYM, 46,SYM, 51, 54,SYM,SYM, 29, 61, 38, 71,SYM, 28, 58, /* BX */ + 72, 18, 73, 50, 43, 74, 47, 45, 27, 24, 60, 57, 23, 15, 56, 39, /* CX */ + 59, 55, 36, 37, 48, 66, 41,SYM, 25, 32, 33, 64, 42, 26, 69, 52, /* DX */ + 75, 18, 76, 50, 43, 77, 47, 45, 27, 24, 60, 57, 23, 15, 56, 39, /* EX */ + 59, 55, 36, 37, 48, 66, 41,SYM, 25, 32, 33, 64, 42, 26, 69,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -89,18 +89,18 @@ static const unsigned char Windows_1250_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 3, 22, 14, 13, 1, 31, 30, 17, 4, 21, 11, 10, 16, 2, 0, /* 4X */ - 9, 39, 8, 6, 5, 15, 7, 35, 34, 20, 18,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 3, 22, 14, 13, 1, 31, 30, 17, 4, 21, 11, 10, 16, 2, 0, /* 6X */ - 9, 39, 8, 6, 5, 15, 7, 35, 34, 20, 18,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM, 29,SYM, 45, 38, 26, 88, /* 8X */ - ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 29,SYM, 45, 38, 26, 89, /* 9X */ - SYM,SYM,SYM, 90,SYM, 91,SYM,SYM,SYM,SYM, 92,SYM,SYM,SYM,SYM, 93, /* AX */ - SYM,SYM,SYM, 94,SYM,SYM,SYM,SYM,SYM, 95, 96,SYM, 44,SYM, 44, 97, /* BX */ - 98, 19, 99,100, 41,101,102,103, 25, 24,104,105, 23, 12,106, 40, /* CX */ - 107,108, 36, 37,109,110, 42,SYM, 27, 32, 33,111, 43, 28,112, 46, /* DX */ - 113, 19,114,115, 41,116,117,118, 25, 24,119,120, 23, 12,121, 40, /* EX */ - 122,123, 36, 37,124,125, 42,SYM, 27, 32, 33,126, 43, 28,127,SYM, /* FX */ + SYM, 2, 22, 16, 14, 1, 31, 30, 17, 5, 21, 10, 9, 11, 3, 0, /* 4X */ + 13, 40, 7, 6, 4, 12, 8, 35, 34, 20, 19,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 2, 22, 16, 14, 1, 31, 30, 17, 5, 21, 10, 9, 11, 3, 0, /* 6X */ + 13, 40, 7, 6, 4, 12, 8, 35, 34, 20, 19,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM, 29,SYM, 54, 38, 28, 78, /* 8X */ + ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 29,SYM, 54, 38, 28, 79, /* 9X */ + SYM,SYM,SYM, 46,SYM, 65,SYM,SYM,SYM,SYM, 61,SYM,SYM,SYM,SYM, 58, /* AX */ + SYM,SYM,SYM, 46,SYM,SYM,SYM,SYM,SYM, 65, 61,SYM, 51,SYM, 51, 58, /* BX */ + 80, 18, 81, 50, 43, 82, 47, 45, 27, 24, 60, 57, 23, 15, 56, 39, /* CX */ + 59, 55, 36, 37, 48, 66, 41,SYM, 25, 32, 33, 64, 42, 26, 69, 52, /* DX */ + 83, 18, 84, 50, 43, 85, 47, 45, 27, 24, 60, 57, 23, 15, 56, 39, /* EX */ + 59, 55, 36, 37, 48, 66, 41,SYM, 25, 32, 33, 64, 42, 26, 69,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -110,18 +110,18 @@ static const unsigned char Ibm852_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 3, 22, 14, 13, 1, 31, 30, 17, 4, 21, 11, 10, 16, 2, 0, /* 4X */ - 9, 39, 8, 6, 5, 15, 7, 35, 34, 20, 18,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 3, 22, 14, 13, 1, 31, 30, 17, 4, 21, 11, 10, 16, 2, 0, /* 6X */ - 9, 39, 8, 6, 5, 15, 7, 35, 34, 20, 18,SYM,SYM,SYM,SYM,CTR, /* 7X */ - 128, 43, 24,129, 41, 32,130,131,132,133,134,135,136,137, 41,138, /* 8X */ - 24,139,140,141, 42, 44, 44, 45, 45, 42, 43, 38, 38,142,SYM, 25, /* 9X */ - 19, 12, 37, 33,143,144, 26, 26,145,146,SYM,147, 25,148,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 19,149, 23,150,SYM,SYM,SYM,SYM,151,152,SYM, /* BX */ - SYM,SYM,SYM,SYM,SYM,SYM,153,154,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* CX */ - 155,156, 40,157, 40, 36, 12,158, 23,SYM,SYM,SYM,SYM,159, 32,SYM, /* DX */ - 37, 46,160,161,162, 36, 29, 29,163, 33,164,165, 28, 28,166,SYM, /* EX */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,167, 27, 27,SYM,SYM, /* FX */ + SYM, 2, 22, 16, 14, 1, 31, 30, 17, 5, 21, 10, 9, 11, 3, 0, /* 4X */ + 13, 40, 7, 6, 4, 12, 8, 35, 34, 20, 19,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 2, 22, 16, 14, 1, 31, 30, 17, 5, 21, 10, 9, 11, 3, 0, /* 6X */ + 13, 40, 7, 6, 4, 12, 8, 35, 34, 20, 19,SYM,SYM,SYM,SYM,CTR, /* 7X */ + 45, 42, 24, 86, 43, 32, 47, 45, 46, 57, 66, 66, 56, 87, 43, 47, /* 8X */ + 24, 88, 89, 48, 41, 51, 51, 54, 54, 41, 42, 38, 38, 46,SYM, 27, /* 9X */ + 18, 15, 37, 33, 65, 65, 28, 28, 60, 60,SYM, 90, 27, 61,SYM,SYM, /* AX */ + SYM,SYM,SYM,SYM,SYM, 18, 91, 23, 61,SYM,SYM,SYM,SYM, 58, 58,SYM, /* BX */ + SYM,SYM,SYM,SYM,SYM,SYM, 50, 50,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* CX */ + 59, 59, 39, 57, 39, 36, 15, 56, 23,SYM,SYM,SYM,SYM, 69, 32,SYM, /* DX */ + 37, 52, 48, 55, 55, 36, 29, 29, 92, 33, 93, 64, 26, 26, 69,SYM, /* EX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 64, 25, 25,SYM,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -131,129 +131,130 @@ static const unsigned char Mac_Centraleurope_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 3, 22, 14, 13, 1, 31, 30, 17, 4, 21, 11, 10, 16, 2, 0, /* 4X */ - 9, 39, 8, 6, 5, 15, 7, 35, 34, 20, 18,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 3, 22, 14, 13, 1, 31, 30, 17, 4, 21, 11, 10, 16, 2, 0, /* 6X */ - 9, 39, 8, 6, 5, 15, 7, 35, 34, 20, 18,SYM,SYM,SYM,SYM,CTR, /* 7X */ - 41,168,169, 24,170, 42, 43, 19,171, 25, 41, 25,172,173, 24,174, /* 8X */ - 175, 40, 12, 40, 47, 47,176, 37,177,178, 42,179, 33, 23, 23, 43, /* 9X */ - SYM,SYM,180,SYM,SYM,SYM,SYM, 46,SYM,SYM,SYM,181,SYM,SYM,182,183, /* AX */ - 184,185,SYM,SYM,186,187,SYM,SYM,188,189,190, 44, 44,191,192,193, /* BX */ - 194,195,SYM,SYM,196, 36,SYM,SYM,SYM,SYM,SYM, 36,197,198,199,200, /* CX */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,201,202,203, 27,SYM,SYM, 27,204, /* DX */ - 205, 29,SYM,SYM, 29, 45, 45, 19, 38, 38, 12, 26, 26,206, 37,207, /* EX */ - 208, 32, 33, 32,209,210,211,212, 28, 28,213,214,215,216,217,SYM, /* FX */ + SYM, 2, 22, 16, 14, 1, 31, 30, 17, 5, 21, 10, 9, 11, 3, 0, /* 4X */ + 13, 40, 7, 6, 4, 12, 8, 35, 34, 20, 19,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 2, 22, 16, 14, 1, 31, 30, 17, 5, 21, 10, 9, 11, 3, 0, /* 6X */ + 13, 40, 7, 6, 4, 12, 8, 35, 34, 20, 19,SYM,SYM,SYM,SYM,CTR, /* 7X */ + 43, 44, 44, 24, 65, 41, 42, 18, 65, 27, 43, 27, 47, 47, 24, 94, /* 8X */ + 95, 39, 15, 39, 63, 63, 53, 37, 53, 48, 41, 67, 33, 23, 23, 42, /* 9X */ + SYM,SYM, 60,SYM,SYM,SYM,SYM, 52,SYM,SYM,SYM, 60,SYM,SYM, 96, 97, /* AX */ + 98, 49,SYM,SYM, 49, 99,SYM,SYM, 46,100,101, 51, 51,102,103,104, /* BX */ + 105, 55,SYM,SYM, 55, 36,SYM,SYM,SYM,SYM,SYM, 36, 66, 67, 66, 62, /* CX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 62,106,107, 25,SYM,SYM, 25,108, /* DX */ + 109, 29,SYM,SYM, 29, 54, 54, 18, 38, 38, 15, 28, 28, 68, 37, 48, /* EX */ + 68, 32, 33, 32, 64, 64,110,111, 26, 26,112, 58, 46, 58,113,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ static const int Unicode_Char_size = 82; static const unsigned int Unicode_CharOrder[] = { - 65, 3, 66, 22, 67, 14, 68, 13, 69, 1, 70, 31, 71, 30, 72, 17, - 73, 4, 74, 21, 75, 11, 76, 10, 77, 16, 78, 2, 79, 0, 80, 9, - 81, 39, 82, 8, 83, 6, 84, 5, 85, 15, 86, 7, 87, 35, 88, 34, - 89, 20, 90, 18, 97, 3, 98, 22, 99, 14, 100, 13, 101, 1,102, 31, - 103, 30, 104, 17, 105, 4, 106, 21, 107, 11, 108, 10, 109, 16,110, 2, - 111, 0, 112, 9, 113, 39, 114, 8, 115, 6, 116, 5, 117, 15,118, 7, - 119, 35, 120, 34, 121, 20, 122, 18, 193, 19, 201, 24, 205, 12,211, 37, - 218, 33, 221, 28, 225, 19, 233, 24, 237, 12, 243, 37, 250, 33,253, 28, - 268, 25, 269, 25, 270, 40, 271, 40, 282, 23, 283, 23, 327, 36,328, 36, - 344, 27, 345, 27, 352, 29, 353, 29, 356, 38, 357, 38, 366, 32,367, 32, - 381, 26, 382, 26, + 65, 2, 66, 22, 67, 16, 68, 14, 69, 1, 70, 31, 71, 30, 72, 17, + 73, 5, 74, 21, 75, 10, 76, 9, 77, 11, 78, 3, 79, 0, 80, 13, + 81, 40, 82, 7, 83, 6, 84, 4, 85, 12, 86, 8, 87, 35, 88, 34, + 89, 20, 90, 19, 97, 2, 98, 22, 99, 16, 100, 14, 101, 1,102, 31, + 103, 30, 104, 17, 105, 5, 106, 21, 107, 10, 108, 9, 109, 11,110, 3, + 111, 0, 112, 13, 113, 40, 114, 7, 115, 6, 116, 4, 117, 12,118, 8, + 119, 35, 120, 34, 121, 20, 122, 19, 193, 18, 201, 24, 205, 15,211, 37, + 218, 33, 221, 26, 225, 18, 233, 24, 237, 15, 243, 37, 250, 33,253, 26, + 268, 27, 269, 27, 270, 39, 271, 39, 282, 23, 283, 23, 327, 36,328, 36, + 344, 25, 345, 25, 352, 29, 353, 29, 356, 38, 357, 38, 366, 32,367, 32, + 381, 28, 382, 28, }; /* Model Table: - * Total sequences: 1037 - * First 512 sequences: 0.9751874547460189 - * Next 512 sequences (512-1024): 0.024780958582584566 - * Rest: 3.158667139656693e-05 + * Total considered sequences: 1359 / 1681 + * - Positive sequences: first 747 (0.995024712172107) + * - Probable sequences: next 199 (946-747) (0.003977666094002408) + * - Neutral sequences: last 735 (0.000997621733890619) + * - Negative sequences: 322 (off-ratio) * Negative sequences: TODO */ static const PRUint8 CzechLangModel[] = { - 2,2,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2, - 2,3,3,0,0,3,3,3,0,2,3,3,0,0,3,2,2,0,2,0,0, - 3,2,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,2, - 2,3,3,0,0,3,3,3,0,3,3,3,2,2,3,2,2,1,2,2,2, - 3,3,3,3,3,3,3,2,2,2,2,3,3,3,3,3,2,2,3,3, - 3,2,2,3,3,3,2,0,3,2,3,3,3,0,2,1,0,2,0,2,0, - 2,2,3,2,2,3,3,3,3,3,3,3,0,3,3,3,3,3,3,0, - 3,3,3,0,0,3,3,3,0,3,3,3,0,0,2,2,2,0,2,0,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,1,3, + 2,3,3,0,0,3,0,3,3,3,3,3,0,2,3,3,2,0,3,2,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3, + 3,3,3,0,0,3,0,3,3,3,3,3,0,2,3,3,3,0,1,2,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,0,3, + 3,3,3,0,0,3,0,3,3,3,3,3,0,2,3,3,3,0,2,3,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3, + 3,2,3,3,3,0,3,3,3,3,3,3,3,1,1,2,0,3,0,1,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,2,0,3,2,2,3,1,1,3,2,2,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 1,3,3,0,3,3,0,3,3,3,3,3,1,2,3,2,2,2,2,1,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2, + 3,2,3,0,3,1,2,2,0,1,2,3,3,1,0,2,2,2,2,0,2, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3, - 2,2,3,0,2,3,3,2,0,3,3,3,0,0,2,2,2,2,2,2,0, - 3,3,3,3,3,2,3,3,3,2,3,3,3,2,2,3,2,3,3,3, - 3,2,2,3,3,1,0,3,3,3,2,2,3,0,0,2,2,2,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,2,3, - 3,2,3,0,2,2,0,0,1,0,2,2,2,2,0,2,0,0,2,0,0, - 3,3,3,3,3,2,3,0,3,2,3,3,3,3,2,3,0,2,3,3, - 3,2,2,3,3,2,2,2,3,3,0,0,3,0,0,0,2,2,0,0,0, - 3,3,3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,2,3,3, - 3,0,3,0,3,3,2,0,3,3,3,2,3,0,2,2,2,2,0,2,0, - 3,3,3,3,3,3,3,2,3,2,3,2,3,2,2,3,2,2,0,3, - 2,2,2,3,2,2,0,3,2,2,0,0,3,0,0,0,2,0,0,0,0, - 3,3,3,3,3,3,3,2,2,2,3,3,3,2,2,3,3,2,3,3, - 3,0,2,0,3,2,2,0,3,3,2,2,3,0,0,0,2,2,0,0,0, - 3,3,3,3,3,3,2,3,3,2,3,2,0,3,3,3,2,2,2,3, - 3,2,2,0,3,2,2,2,3,0,2,2,3,0,0,0,0,2,0,0,0, - 0,2,3,2,2,3,3,3,3,3,3,3,0,3,3,0,3,3,3,0, - 0,3,2,0,0,3,3,2,0,2,2,0,0,0,0,0,0,0,2,0,0, - 3,3,3,3,3,2,3,3,3,3,3,3,3,2,3,3,3,2,2,3, - 3,2,2,3,3,2,2,2,2,2,2,2,3,0,2,2,2,0,0,0,0, - 3,3,3,3,3,3,2,2,2,2,2,3,3,3,2,2,2,3,2,2, - 2,0,2,0,2,0,0,0,0,0,0,2,2,0,0,0,2,0,0,2,0, - 2,2,3,3,2,3,3,3,3,3,3,3,0,3,3,2,3,3,3,3, - 2,3,3,0,0,3,3,2,0,3,2,2,0,0,2,2,2,0,2,0,2, - 3,3,3,3,3,2,2,0,3,3,3,2,3,2,2,3,2,2,2,3, - 3,1,3,3,3,2,3,2,2,2,2,2,3,0,0,0,0,2,0,0,0, - 3,3,3,3,3,3,2,3,3,2,3,2,2,2,2,3,2,0,0,3, - 3,2,2,0,2,2,2,2,3,0,0,2,2,2,0,2,0,2,2,0,0, - 3,3,3,3,3,3,2,3,2,3,3,3,3,3,2,3,3,3,2,3, - 3,2,2,0,2,2,0,2,2,2,0,0,2,2,0,2,2,2,0,0,0, - 0,0,3,2,0,3,3,3,3,3,3,3,0,3,3,0,3,3,3,0, - 0,2,2,0,0,2,3,2,0,3,0,2,0,0,0,0,2,0,0,0,0, - 2,2,3,2,2,3,3,3,2,3,3,3,2,2,3,2,3,3,3,0, - 0,2,3,0,0,2,3,2,0,3,2,1,0,2,0,0,0,0,2,0,0, - 2,3,3,3,3,2,3,2,2,2,2,2,3,2,2,2,3,2,2,3, - 0,2,2,0,0,2,0,0,0,3,0,2,2,2,0,0,2,2,0,0,2, - 3,3,3,3,3,3,3,2,3,0,3,2,3,2,2,3,2,2,2,3, - 3,3,2,3,2,2,0,2,3,2,0,0,2,2,0,0,2,2,0,0,0, - 0,0,3,2,0,3,3,2,3,3,3,3,0,3,3,0,3,3,2,0, - 0,3,2,0,0,2,3,3,0,2,0,0,0,0,0,0,2,0,2,0,2, - 2,0,3,0,0,3,2,2,2,2,2,3,0,2,0,0,3,3,2,0, - 0,0,0,0,0,3,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,2,0,2,0,3,3,3,0,2,3,0,0,0,3, - 0,0,3,0,0,0,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0, - 3,3,3,2,3,2,2,0,2,0,2,2,3,3,2,2,2,0,0,3, - 0,0,2,0,0,0,0,0,0,2,0,0,2,0,0,0,2,0,0,0,2, - 2,3,2,3,3,0,2,0,0,0,0,2,3,0,0,2,0,0,2,2, - 0,0,2,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, - 0,0,2,0,0,3,3,3,2,2,2,2,0,2,3,2,3,2,3,0, - 0,2,3,0,0,2,2,2,0,3,0,1,0,0,0,0,0,0,0,0,0, - 2,3,3,3,3,3,2,2,0,2,3,3,3,0,0,3,0,0,0,2, - 0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,2,0,0, - 3,3,3,3,3,2,2,2,3,2,3,1,0,2,2,3,2,2,0,2, - 2,0,0,0,2,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0, - 3,3,2,3,3,2,2,0,3,2,3,0,1,2,2,3,2,0,2,2, - 3,2,2,0,2,0,0,0,2,0,2,2,2,2,0,0,0,2,0,0,0, - 0,0,2,0,0,0,3,3,2,0,2,2,0,2,2,0,3,2,3,0, - 0,2,2,0,0,2,3,2,0,0,0,0,0,0,0,0,1,0,0,0,0, - 0,0,2,0,0,2,3,2,2,2,2,2,0,2,2,0,2,2,3,0, - 0,2,2,0,0,3,2,2,0,0,0,2,0,0,0,0,0,0,0,0,0, - 2,2,2,2,3,3,0,0,2,2,2,2,2,2,2,2,2,0,0,2, - 2,0,0,0,0,0,0,0,0,0,0,0,2,0,2,2,0,0,0,0,0, - 2,2,2,2,3,0,2,0,2,0,0,2,0,0,2,2,0,2,0,0, - 0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0, - 2,0,0,0,0,0,2,0,0,0,0,2,0,0,0,3,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, - 0,0,2,0,0,2,2,0,0,0,2,0,0,2,0,0,2,0,3,0, - 0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0, - 2,0,0,2,0,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,0,0,2,0,2,0,0,0,0,0,0,0,0,0,2,2,0,0,0, + 3,1,3,0,3,0,3,3,3,3,3,3,3,1,2,2,2,2,2,0,1, + 3,3,3,3,2,3,3,3,1,3,3,1,3,2,3,3,3,3,3,3, + 3,1,3,3,3,3,3,3,3,3,1,1,3,1,0,0,3,1,0,0,0, + 3,3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,3,3,3, + 3,2,3,0,3,0,3,2,2,3,3,3,3,0,1,1,3,2,0,0,1, + 3,3,3,3,3,3,3,3,3,3,2,3,3,1,3,0,3,2,3,2, + 3,1,2,0,3,3,3,3,2,1,3,1,3,1,0,1,0,3,0,0,0, + 3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,1,3,1, + 3,2,3,3,3,2,3,2,3,1,2,2,3,1,1,1,1,2,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,1,3,3,3,3, + 1,3,3,0,1,3,0,3,3,3,3,2,0,1,2,1,3,1,2,2,1, + 3,3,3,3,3,3,3,3,1,3,3,2,3,3,2,3,3,3,3,0, + 3,2,1,3,3,3,1,2,0,3,1,1,3,0,1,0,2,3,1,0,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2, + 3,2,3,3,3,3,3,3,2,2,2,2,3,1,1,3,2,1,0,0,0, + 1,0,1,3,3,1,3,3,3,3,3,3,0,3,3,0,3,3,0,3, + 0,3,3,0,0,3,0,3,3,3,1,0,0,0,0,0,2,0,2,0,1, + 3,3,3,3,3,3,2,3,2,3,3,3,3,2,2,3,3,3,2,3, + 3,1,1,0,3,0,0,1,0,0,2,0,3,0,1,1,1,1,0,0,2, + 3,3,3,3,3,3,2,3,3,3,3,3,3,1,3,1,3,1,3,1, + 3,1,1,0,3,3,3,2,3,1,1,1,3,1,0,2,1,2,1,0,0, + 1,1,1,3,3,1,3,3,3,3,3,3,1,3,3,0,3,3,0,3, + 0,3,3,0,0,3,0,3,3,3,3,0,0,0,0,0,2,0,1,1,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2, + 3,3,3,0,2,3,3,2,0,3,1,1,3,2,0,1,2,2,0,0,0, + 3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,0,3,3,1,3, + 0,3,3,0,0,3,0,3,3,3,2,2,1,2,1,1,2,0,2,0,0, + 3,3,3,3,2,3,3,2,3,3,2,3,3,3,3,3,2,2,3,2, + 1,2,2,0,1,0,0,3,2,3,1,1,3,2,0,1,1,1,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,2,3,1,3,3,3,2,3,2, + 3,3,2,3,3,3,3,3,1,2,0,0,2,1,0,1,1,2,0,0,0, + 0,0,0,3,3,0,3,3,3,3,3,3,0,3,3,1,3,3,0,3, + 0,3,1,0,0,3,0,3,3,3,0,0,0,0,0,0,2,0,3,2,0, + 1,1,0,3,3,0,3,3,3,3,3,3,0,2,3,0,2,3,0,3, + 0,0,2,0,0,3,0,3,3,1,1,2,0,0,1,0,0,0,0,0,1, + 3,3,3,3,1,3,3,0,1,1,3,1,3,1,0,3,1,1,3,1, + 0,0,2,0,0,0,0,1,0,1,0,0,2,0,0,0,0,0,0,0,0, + 0,0,0,2,3,0,3,3,3,2,3,3,2,2,3,0,3,3,0,3, + 0,3,3,0,0,1,0,2,2,3,0,1,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,2,3,1,3,3,1,3,1,0,3,2,0,3,1, + 0,0,3,0,0,0,0,0,0,0,0,0,2,0,0,0,1,1,0,0,0, + 3,3,3,3,2,3,3,2,1,3,3,2,3,0,3,3,2,1,3,0, + 1,0,2,0,1,0,0,0,0,3,0,0,2,0,0,0,3,1,0,2,0, + 3,3,3,3,3,3,3,2,3,3,3,1,3,3,0,3,2,0,2,0, + 1,0,1,0,1,0,0,1,0,3,0,0,1,1,0,0,1,1,3,0,0, + 3,3,3,3,3,3,2,3,2,3,2,3,3,1,2,1,2,3,3,0, + 3,1,1,1,1,1,0,0,0,1,2,1,2,1,0,1,0,1,0,0,0, + 3,3,3,2,3,3,2,3,1,3,2,2,3,1,1,1,1,0,3,0, + 3,1,1,1,3,0,2,0,0,0,2,3,1,2,0,0,0,2,0,0,0, + 0,0,0,2,2,1,3,3,3,3,3,3,0,1,3,0,2,1,0,3, + 0,3,3,0,0,2,0,2,3,1,0,1,0,1,0,0,1,0,0,0,0, + 0,0,0,3,3,1,3,3,2,2,2,3,0,3,3,0,2,2,0,3, + 0,1,2,0,0,3,0,3,2,1,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,2,3,3,0,1,1,1,1,1,3,3,1,1,2,1,1,0, + 3,0,0,0,1,0,0,0,0,0,0,1,2,0,2,1,0,1,0,0,0, + 3,3,3,2,2,3,2,2,1,2,1,1,2,0,1,1,1,2,1,0, + 2,1,1,1,1,0,0,0,0,0,1,1,0,0,0,3,0,0,0,0,0, + 3,0,2,0,0,0,2,0,0,0,3,1,3,0,0,0,2,1,1,0, + 0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0, + 0,1,0,3,1,0,1,3,0,2,1,1,0,0,3,0,1,1,0,3, + 0,2,1,0,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,0,0, + 3,0,2,0,0,1,1,0,0,0,1,0,3,0,0,0,0,0,2,0, + 0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, + 2,0,3,0,1,0,1,0,0,0,1,1,2,0,0,0,1,0,1,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,0,0,0,1,0,1,1,0,1,0,0,3,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, }; @@ -262,7 +263,7 @@ const SequenceModel Iso_8859_2CzechModel = Iso_8859_2_CharToOrderMap, CzechLangModel, 41, - (float)0.9751874547460189, + (float)0.9990023782661094, PR_TRUE, "ISO-8859-2", "cs" @@ -273,7 +274,7 @@ const SequenceModel Windows_1250CzechModel = Windows_1250_CharToOrderMap, CzechLangModel, 41, - (float)0.9751874547460189, + (float)0.9990023782661094, PR_TRUE, "WINDOWS-1250", "cs" @@ -284,7 +285,7 @@ const SequenceModel Ibm852CzechModel = Ibm852_CharToOrderMap, CzechLangModel, 41, - (float)0.9751874547460189, + (float)0.9990023782661094, PR_TRUE, "IBM852", "cs" @@ -295,7 +296,7 @@ const SequenceModel Mac_CentraleuropeCzechModel = Mac_Centraleurope_CharToOrderMap, CzechLangModel, 41, - (float)0.9751874547460189, + (float)0.9990023782661094, PR_TRUE, "MAC-CENTRALEUROPE", "cs" @@ -308,5 +309,8 @@ const LanguageModel CzechModel = 82, CzechLangModel, 41, - (float)0.9751874547460189, + 6, + (float)0.39747705231553015, + 27, + (float)0.03495288022220359, }; diff --git a/src/LangModels/LangDanishModel.cpp b/src/LangModels/LangDanishModel.cpp index 0abdd97..0bf9967 100644 --- a/src/LangModels/LangDanishModel.cpp +++ b/src/LangModels/LangDanishModel.cpp @@ -42,7 +42,7 @@ /** * Generated by BuildLangModel.py - * On: 2022-11-30 20:52:37.003457 + * On: 2022-12-14 17:54:15.598693 **/ /* Character Mapping Table: @@ -68,18 +68,18 @@ static const unsigned char Iso_8859_15_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 4, 16, 23, 7, 0, 13, 10, 18, 5, 24, 12, 8, 11, 2, 9, /* 4X */ - 17, 30, 1, 6, 3, 15, 14, 25, 27, 22, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 4, 16, 23, 7, 0, 13, 10, 18, 5, 24, 12, 8, 11, 2, 9, /* 6X */ - 17, 30, 1, 6, 3, 15, 14, 25, 27, 22, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 4, 16, 22, 7, 0, 13, 10, 18, 5, 24, 11, 8, 12, 2, 9, /* 4X */ + 17, 29, 1, 6, 3, 15, 14, 25, 27, 20, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 4, 16, 22, 7, 0, 13, 10, 18, 5, 24, 11, 8, 12, 2, 9, /* 6X */ + 17, 29, 1, 6, 3, 15, 14, 25, 27, 20, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM, 44,SYM, 44,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM, 55, 56,SYM,SYM, 55,SYM,SYM,SYM, 63, 64, 65,SYM, /* BX */ - 46, 34, 52, 53, 38, 21, 20, 40, 39, 28, 43, 36, 66, 35, 67, 62, /* CX */ - 32, 49, 60, 29, 48, 68, 33,SYM, 19, 61, 37, 59, 31, 42, 41, 45, /* DX */ - 46, 34, 52, 53, 38, 21, 20, 40, 39, 28, 43, 36, 69, 35, 70, 62, /* EX */ - 32, 49, 60, 29, 48, 71, 33,SYM, 19, 61, 37, 59, 31, 42, 41, 72, /* FX */ + SYM,SYM,SYM,SYM,SYM,SYM, 40,SYM, 40,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ + SYM,SYM,SYM,SYM, 58, 59,SYM,SYM, 60,SYM,SYM,SYM, 61, 62, 63,SYM, /* BX */ + 42, 33, 37, 57, 31, 21, 23, 38, 36, 28, 41, 49, 64, 39, 50, 44, /* CX */ + 35, 43, 65, 34, 46, 66, 32,SYM, 19, 67, 45, 47, 30, 54, 68, 55, /* DX */ + 42, 33, 37, 57, 31, 21, 23, 38, 36, 28, 41, 49, 69, 39, 50, 44, /* EX */ + 35, 43, 70, 34, 46, 71, 32,SYM, 19, 72, 45, 47, 30, 54, 73, 74, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -89,18 +89,18 @@ static const unsigned char Iso_8859_1_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 4, 16, 23, 7, 0, 13, 10, 18, 5, 24, 12, 8, 11, 2, 9, /* 4X */ - 17, 30, 1, 6, 3, 15, 14, 25, 27, 22, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 4, 16, 23, 7, 0, 13, 10, 18, 5, 24, 12, 8, 11, 2, 9, /* 6X */ - 17, 30, 1, 6, 3, 15, 14, 25, 27, 22, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 4, 16, 22, 7, 0, 13, 10, 18, 5, 24, 11, 8, 12, 2, 9, /* 4X */ + 17, 29, 1, 6, 3, 15, 14, 25, 27, 20, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 4, 16, 22, 7, 0, 13, 10, 18, 5, 24, 11, 8, 12, 2, 9, /* 6X */ + 17, 29, 1, 6, 3, 15, 14, 25, 27, 20, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 56,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 46, 34, 52, 53, 38, 21, 20, 40, 39, 28, 43, 36, 73, 35, 74, 62, /* CX */ - 32, 49, 60, 29, 48, 75, 33,SYM, 19, 61, 37, 59, 31, 42, 41, 45, /* DX */ - 46, 34, 52, 53, 38, 21, 20, 40, 39, 28, 43, 36, 76, 35, 77, 62, /* EX */ - 32, 49, 60, 29, 48, 78, 33,SYM, 19, 61, 37, 59, 31, 42, 41, 79, /* FX */ + SYM,SYM,SYM,SYM,SYM, 75,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 42, 33, 37, 57, 31, 21, 23, 38, 36, 28, 41, 49, 76, 39, 50, 44, /* CX */ + 35, 43, 77, 34, 46, 78, 32,SYM, 19, 79, 45, 47, 30, 54, 80, 55, /* DX */ + 42, 33, 37, 57, 31, 21, 23, 38, 36, 28, 41, 49, 81, 39, 50, 44, /* EX */ + 35, 43, 82, 34, 46, 83, 32,SYM, 19, 84, 45, 47, 30, 54, 85, 86, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -110,18 +110,18 @@ static const unsigned char Windows_1252_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 4, 16, 23, 7, 0, 13, 10, 18, 5, 24, 12, 8, 11, 2, 9, /* 4X */ - 17, 30, 1, 6, 3, 15, 14, 25, 27, 22, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 4, 16, 23, 7, 0, 13, 10, 18, 5, 24, 12, 8, 11, 2, 9, /* 6X */ - 17, 30, 1, 6, 3, 15, 14, 25, 27, 22, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM,ILL,SYM, 80,SYM,SYM,SYM,SYM,SYM,SYM, 44,SYM, 81,ILL, 55,ILL, /* 8X */ - ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 44,SYM, 82,ILL, 55, 83, /* 9X */ + SYM, 4, 16, 22, 7, 0, 13, 10, 18, 5, 24, 11, 8, 12, 2, 9, /* 4X */ + 17, 29, 1, 6, 3, 15, 14, 25, 27, 20, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 4, 16, 22, 7, 0, 13, 10, 18, 5, 24, 11, 8, 12, 2, 9, /* 6X */ + 17, 29, 1, 6, 3, 15, 14, 25, 27, 20, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM,ILL,SYM, 87,SYM,SYM,SYM,SYM,SYM,SYM, 40,SYM, 88,ILL, 89,ILL, /* 8X */ + ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 40,SYM, 90,ILL, 91, 92, /* 9X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 56,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 46, 34, 52, 53, 38, 21, 20, 40, 39, 28, 43, 36, 84, 35, 85, 62, /* CX */ - 32, 49, 60, 29, 48, 86, 33,SYM, 19, 61, 37, 59, 31, 42, 41, 45, /* DX */ - 46, 34, 52, 53, 38, 21, 20, 40, 39, 28, 43, 36, 87, 35, 88, 62, /* EX */ - 32, 49, 60, 29, 48, 89, 33,SYM, 19, 61, 37, 59, 31, 42, 41, 90, /* FX */ + SYM,SYM,SYM,SYM,SYM, 93,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 42, 33, 37, 57, 31, 21, 23, 38, 36, 28, 41, 49, 94, 39, 50, 44, /* CX */ + 35, 43, 95, 34, 46, 96, 32,SYM, 19, 97, 45, 47, 30, 54, 98, 55, /* DX */ + 42, 33, 37, 57, 31, 21, 23, 38, 36, 28, 41, 49, 99, 39, 50, 44, /* EX */ + 35, 43,100, 34, 46,101, 32,SYM, 19,102, 45, 47, 30, 54,103,104, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -131,76 +131,75 @@ static const unsigned char Ibm865_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 4, 16, 23, 7, 0, 13, 10, 18, 5, 24, 12, 8, 11, 2, 9, /* 4X */ - 17, 30, 1, 6, 3, 15, 14, 25, 27, 22, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 4, 16, 23, 7, 0, 13, 10, 18, 5, 24, 12, 8, 11, 2, 9, /* 6X */ - 17, 30, 1, 6, 3, 15, 14, 25, 27, 22, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */ - 40, 31, 28, 52, 38, 46, 21, 40, 43, 36, 39, 62, 91, 92, 38, 21, /* 8X */ - 28, 20, 20, 48, 33, 60, 59, 61, 93, 33, 31, 19,SYM, 19,SYM,SYM, /* 9X */ - 34, 35, 29, 37, 49, 49,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ + SYM, 4, 16, 22, 7, 0, 13, 10, 18, 5, 24, 11, 8, 12, 2, 9, /* 4X */ + 17, 29, 1, 6, 3, 15, 14, 25, 27, 20, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 4, 16, 22, 7, 0, 13, 10, 18, 5, 24, 11, 8, 12, 2, 9, /* 6X */ + 17, 29, 1, 6, 3, 15, 14, 25, 27, 20, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */ + 38, 30, 28, 37, 31, 42, 21, 38, 41, 49, 36, 44, 50,105, 31, 21, /* 8X */ + 28, 23, 23, 46, 32,106, 47,107,108, 32, 30, 19,SYM, 19,SYM,SYM, /* 9X */ + 33, 39, 34, 45, 43, 43,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* CX */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* DX */ - 47, 45, 94, 54, 57, 57, 56, 58, 50, 95, 96, 97, 98, 50, 51,SYM, /* EX */ + 52, 55,109, 51, 48, 48,110,111, 53,112,113,114,115, 53, 56,SYM, /* EX */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const int Unicode_Char_size = 62; +static const int Unicode_Char_size = 60; static const unsigned int Unicode_CharOrder[] = { - 65, 4, 66, 16, 67, 23, 68, 7, 69, 0, 70, 13, 71, 10, 72, 18, - 73, 5, 74, 24, 75, 12, 76, 8, 77, 11, 78, 2, 79, 9, 80, 17, - 81, 30, 82, 1, 83, 6, 84, 3, 85, 15, 86, 14, 87, 25, 88, 27, - 89, 22, 90, 26, 97, 4, 98, 16, 99, 23, 100, 7, 101, 0,102, 13, - 103, 10, 104, 18, 105, 5, 106, 24, 107, 12, 108, 8, 109, 11,110, 2, - 111, 9, 112, 17, 113, 30, 114, 1, 115, 6, 116, 3, 117, 15,118, 14, - 119, 25, 120, 27, 121, 22, 122, 26, 197, 21, 198, 20, 201, 28,211, 29, - 216, 19, 229, 21, 230, 20, 233, 28, 243, 29, 248, 19, + 65, 4, 66, 16, 67, 22, 68, 7, 69, 0, 70, 13, 71, 10, 72, 18, + 73, 5, 74, 24, 75, 11, 76, 8, 77, 12, 78, 2, 79, 9, 80, 17, + 81, 29, 82, 1, 83, 6, 84, 3, 85, 15, 86, 14, 87, 25, 88, 27, + 89, 20, 90, 26, 97, 4, 98, 16, 99, 22, 100, 7, 101, 0,102, 13, + 103, 10, 104, 18, 105, 5, 106, 24, 107, 11, 108, 8, 109, 12,110, 2, + 111, 9, 112, 17, 113, 29, 114, 1, 115, 6, 116, 3, 117, 15,118, 14, + 119, 25, 120, 27, 121, 20, 122, 26, 197, 21, 198, 23, 201, 28,216, 19, + 229, 21, 230, 23, 233, 28, 248, 19, }; /* Model Table: - * Total considered sequences: 1079 / 961 - * - Positive sequences: first 508 (0.995012453333286) - * - Probable sequences: next 198 (706-508) (0.003993410296057376) - * - Neutral sequences: last 255 (0.0009941363706565953) - * - Negative sequences: -118 (off-ratio) + * Total considered sequences: 1014 / 900 + * - Positive sequences: first 503 (0.9950476836368553) + * - Probable sequences: next 175 (678-503) (0.003954982568821541) + * - Neutral sequences: last 222 (0.0009973337943232075) + * - Negative sequences: -114 (off-ratio) * Negative sequences: TODO */ static const PRUint8 DanishLangModel[] = { - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,2,3,3,3,3,2,3,1,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,1, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,1,2,2,1, - 3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,1,2,3,0, - 3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,0,1,0,3,3,3,3,3,3,0,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,1,1,3,3,1,3,3,1,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,0,1,1,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,1,0,2,2,1, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,1,1,0,2,1,1, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,1,3,3,3,3,2,2,1,0,1, - 3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,3,3,3,3,3,2,3,2,0,0,1,1,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,1,1,1,2,1,0, - 3,3,3,3,3,3,3,2,3,3,2,3,3,2,3,3,2,2,3,3,3,3,3,1,3,1,1,1,1,1,0, - 3,3,3,3,3,3,3,2,3,3,3,2,2,3,2,3,2,2,3,3,3,3,3,2,3,1,1,2,1,1,0, - 3,3,3,3,3,3,3,3,3,3,2,3,2,2,2,3,2,2,2,3,3,3,2,1,3,0,0,0,1,0,0, - 3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,1,1,0,2,3,2,2,2,3,1,0,0, - 3,3,3,3,3,3,3,2,3,3,0,2,1,1,1,3,3,1,2,3,3,3,3,2,3,1,1,0,1,2,0, - 3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,3,3,2,3,3,3,1,1,0,1,0,2,1,0, - 3,3,3,3,3,3,3,2,3,3,1,3,2,2,3,3,1,1,2,3,3,3,3,1,3,3,0,1,1,1,2, - 3,3,3,3,1,2,3,3,3,1,3,3,3,2,3,1,3,2,1,0,0,0,2,0,3,0,0,0,0,0,0, - 2,3,3,3,1,3,3,3,3,3,3,3,3,3,3,2,3,2,1,0,0,0,0,2,0,0,0,0,0,0,0, - 3,3,3,3,0,0,3,3,3,1,2,1,3,2,3,0,3,1,1,1,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,2,3,3,3,3,3,3,3,2,3,2,3,3,2,1,0,0,0,2,2,1,2,1,0,0,0, - 3,3,1,3,3,3,2,3,3,3,1,2,3,1,1,3,1,1,3,0,1,0,3,3,1,1,1,0,1,0,1, - 3,3,3,3,3,3,3,3,3,3,2,1,2,1,1,3,2,1,2,3,3,1,3,0,0,0,0,0,1,2,0, - 3,2,2,2,3,3,2,1,2,3,0,1,1,1,0,2,2,0,2,0,1,0,1,1,1,2,1,0,0,0,0, - 3,1,1,1,3,3,1,1,1,3,1,2,1,1,0,2,1,1,1,0,0,0,2,1,2,1,3,0,0,0,0, - 2,1,1,1,2,3,1,1,2,2,0,0,1,1,1,1,1,2,2,1,0,0,1,2,0,1,0,1,0,0,0, - 2,2,3,2,1,0,2,2,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0, - 0,3,2,2,1,0,1,0,2,0,1,0,2,0,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0, - 2,0,0,0,2,2,0,0,0,0,0,0,0,1,1,3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,2,3,2,3,3,2,3,0,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,3,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,2, + 3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,1,2,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,0,3,1,3,3,3,3,1,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,3,2,3,2,3,3,1,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,0,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,0,2,1, + 3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,1,2,0,2,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,1,3,0,3,3,3,2,1,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,1,3,3,2,1,0,1,0, + 3,3,3,3,3,3,3,2,3,3,2,3,3,2,3,3,2,2,3,3,3,3,2,3,3,2,2,1,1,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,2,3,0,2,0,2,3,1, + 3,3,3,3,3,3,3,2,3,3,3,2,2,3,2,3,2,2,3,3,3,3,2,3,3,0,0,2,2,1, + 3,3,3,3,3,3,3,3,3,3,2,2,2,2,1,3,2,1,2,3,2,3,1,3,3,0,0,0,1,0, + 3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,1,2,0,3,1,2,1,2,2,2,1, + 3,3,3,3,3,3,3,2,3,3,1,1,2,1,2,3,3,1,2,3,3,3,2,3,3,2,0,0,1,0, + 3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,3,3,2,2,3,2,3,1,1,1,0,1,0, + 3,3,3,3,3,3,3,2,3,3,1,1,3,1,3,3,2,1,2,3,3,3,1,3,3,3,1,1,1,0, + 3,3,3,3,1,2,3,3,3,2,3,3,3,2,3,0,3,1,1,0,2,0,1,0,3,0,0,0,0,0, + 3,3,3,3,3,2,3,3,3,3,3,3,3,2,3,2,3,3,2,0,1,0,2,0,1,2,2,0,0,1, + 3,3,3,3,0,0,3,3,3,0,2,3,1,2,3,0,3,1,1,1,0,1,2,0,0,0,0,0,0,0, + 3,3,1,3,3,3,2,2,3,3,0,3,2,1,2,3,1,1,3,1,3,0,3,2,1,0,1,0,1,2, + 2,3,3,3,1,3,3,3,3,2,3,3,3,3,3,2,3,2,1,0,0,0,2,1,0,0,0,0,0,0, + 3,3,2,3,3,2,3,3,3,3,1,2,2,2,1,3,2,1,2,3,3,1,0,3,1,0,0,0,1,0, + 3,2,2,2,3,3,2,1,2,3,0,1,1,1,0,1,1,1,2,0,2,0,1,0,0,2,1,0,0,0, + 3,1,1,2,3,3,1,1,1,3,1,1,2,1,1,2,2,1,2,0,3,0,1,0,1,1,3,0,0,1, + 2,0,1,2,2,3,1,1,1,2,0,0,0,1,2,0,0,1,1,1,1,0,2,0,0,1,0,1,0,0, + 2,2,2,2,2,0,2,2,1,1,1,2,2,1,1,1,2,2,1,0,0,0,1,0,1,0,1,0,0,0, + 0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0, }; @@ -208,8 +207,8 @@ const SequenceModel Iso_8859_15DanishModel = { Iso_8859_15_CharToOrderMap, DanishLangModel, - 31, - (float)0.9990058636293434, + 30, + (float)0.9990026662056768, PR_TRUE, "ISO-8859-15", "da" @@ -219,8 +218,8 @@ const SequenceModel Iso_8859_1DanishModel = { Iso_8859_1_CharToOrderMap, DanishLangModel, - 31, - (float)0.9990058636293434, + 30, + (float)0.9990026662056768, PR_TRUE, "ISO-8859-1", "da" @@ -230,8 +229,8 @@ const SequenceModel Windows_1252DanishModel = { Windows_1252_CharToOrderMap, DanishLangModel, - 31, - (float)0.9990058636293434, + 30, + (float)0.9990026662056768, PR_TRUE, "WINDOWS-1252", "da" @@ -241,8 +240,8 @@ const SequenceModel Ibm865DanishModel = { Ibm865_CharToOrderMap, DanishLangModel, - 31, - (float)0.9990058636293434, + 30, + (float)0.9990026662056768, PR_TRUE, "IBM865", "da" @@ -252,8 +251,11 @@ const LanguageModel DanishModel = { "da", Unicode_CharOrder, - 62, + 60, DanishLangModel, - 31, - (float)0.9992516135038306, + 30, + 4, + (float)0.38104323327928524, + 20, + (float)0.030308128394864135, }; diff --git a/src/LangModels/LangEnglishModel.cpp b/src/LangModels/LangEnglishModel.cpp index e06d15c..63b70ba 100644 --- a/src/LangModels/LangEnglishModel.cpp +++ b/src/LangModels/LangEnglishModel.cpp @@ -42,7 +42,7 @@ /** * Generated by BuildLangModel.py - * On: 2022-12-03 20:32:27.947524 + * On: 2022-12-14 17:58:33.140818 **/ /* Character Mapping Table: @@ -68,18 +68,18 @@ static const unsigned char Iso_8859_1_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 1, 17, 11, 10, 0, 15, 16, 9, 3, 24, 21, 8, 13, 4, 5, /* 4X */ - 14, 25, 7, 6, 2, 12, 20, 19, 22, 18, 23,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 1, 17, 11, 10, 0, 15, 16, 9, 3, 24, 21, 8, 13, 4, 5, /* 6X */ - 14, 25, 7, 6, 2, 12, 20, 19, 22, 18, 23,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 1, 18, 11, 10, 0, 15, 16, 8, 3, 23, 21, 9, 13, 4, 5, /* 4X */ + 14, 25, 7, 6, 2, 12, 20, 19, 22, 17, 24,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 1, 18, 11, 10, 0, 15, 16, 8, 3, 23, 21, 9, 13, 4, 5, /* 6X */ + 14, 25, 7, 6, 2, 12, 20, 19, 22, 17, 24,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 52,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 39, 36, 30, 47, 46, 58, 43, 31, 27, 26, 38, 44, 50, 35, 40, 42, /* CX */ - 48, 29, 53, 37, 32, 59, 28,SYM, 49, 34, 55, 45, 33, 51, 60, 57, /* DX */ - 39, 36, 30, 47, 46, 61, 43, 31, 27, 26, 38, 44, 50, 35, 40, 42, /* EX */ - 48, 29, 53, 37, 32, 62, 28,SYM, 49, 34, 55, 45, 33, 51, 63, 64, /* FX */ + SYM,SYM,SYM,SYM,SYM, 59,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 42, 32, 38, 37, 39, 55, 29, 30, 31, 26, 43, 46, 58, 33, 41, 49, /* CX */ + 35, 36, 53, 34, 40, 56, 27,SYM, 48, 60, 44, 52, 28, 54, 57, 47, /* DX */ + 42, 32, 38, 37, 39, 55, 29, 30, 31, 26, 43, 46, 58, 33, 41, 49, /* EX */ + 35, 36, 53, 34, 40, 56, 27,SYM, 48, 61, 44, 52, 28, 54, 57, 62, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -89,70 +89,70 @@ static const unsigned char Windows_1252_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 1, 17, 11, 10, 0, 15, 16, 9, 3, 24, 21, 8, 13, 4, 5, /* 4X */ - 14, 25, 7, 6, 2, 12, 20, 19, 22, 18, 23,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 1, 17, 11, 10, 0, 15, 16, 9, 3, 24, 21, 8, 13, 4, 5, /* 6X */ - 14, 25, 7, 6, 2, 12, 20, 19, 22, 18, 23,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM,ILL,SYM, 65,SYM,SYM,SYM,SYM,SYM,SYM, 54,SYM, 41,ILL, 56,ILL, /* 8X */ - ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 54,SYM, 41,ILL, 56, 66, /* 9X */ + SYM, 1, 18, 11, 10, 0, 15, 16, 8, 3, 23, 21, 9, 13, 4, 5, /* 4X */ + 14, 25, 7, 6, 2, 12, 20, 19, 22, 17, 24,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 1, 18, 11, 10, 0, 15, 16, 8, 3, 23, 21, 9, 13, 4, 5, /* 6X */ + 14, 25, 7, 6, 2, 12, 20, 19, 22, 17, 24,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM,ILL,SYM, 63,SYM,SYM,SYM,SYM,SYM,SYM, 51,SYM, 45,ILL, 50,ILL, /* 8X */ + ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 51,SYM, 45,ILL, 50, 64, /* 9X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 52,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 39, 36, 30, 47, 46, 67, 43, 31, 27, 26, 38, 44, 50, 35, 40, 42, /* CX */ - 48, 29, 53, 37, 32, 68, 28,SYM, 49, 34, 55, 45, 33, 51, 69, 57, /* DX */ - 39, 36, 30, 47, 46, 70, 43, 31, 27, 26, 38, 44, 50, 35, 40, 42, /* EX */ - 48, 29, 53, 37, 32, 71, 28,SYM, 49, 34, 55, 45, 33, 51, 72, 73, /* FX */ + SYM,SYM,SYM,SYM,SYM, 65,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 42, 32, 38, 37, 39, 55, 29, 30, 31, 26, 43, 46, 58, 33, 41, 49, /* CX */ + 35, 36, 53, 34, 40, 56, 27,SYM, 48, 66, 44, 52, 28, 54, 57, 47, /* DX */ + 42, 32, 38, 37, 39, 55, 29, 30, 31, 26, 43, 46, 58, 33, 41, 49, /* EX */ + 35, 36, 53, 34, 40, 56, 27,SYM, 48, 67, 44, 52, 28, 54, 57, 68, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ static const int Unicode_Char_size = 52; static const unsigned int Unicode_CharOrder[] = { - 65, 1, 66, 17, 67, 11, 68, 10, 69, 0, 70, 15, 71, 16, 72, 9, - 73, 3, 74, 24, 75, 21, 76, 8, 77, 13, 78, 4, 79, 5, 80, 14, + 65, 1, 66, 18, 67, 11, 68, 10, 69, 0, 70, 15, 71, 16, 72, 8, + 73, 3, 74, 23, 75, 21, 76, 9, 77, 13, 78, 4, 79, 5, 80, 14, 81, 25, 82, 7, 83, 6, 84, 2, 85, 12, 86, 20, 87, 19, 88, 22, - 89, 18, 90, 23, 97, 1, 98, 17, 99, 11, 100, 10, 101, 0,102, 15, - 103, 16, 104, 9, 105, 3, 106, 24, 107, 21, 108, 8, 109, 13,110, 4, + 89, 17, 90, 24, 97, 1, 98, 18, 99, 11, 100, 10, 101, 0,102, 15, + 103, 16, 104, 8, 105, 3, 106, 23, 107, 21, 108, 9, 109, 13,110, 4, 111, 5, 112, 14, 113, 25, 114, 7, 115, 6, 116, 2, 117, 12,118, 20, - 119, 19, 120, 22, 121, 18, 122, 23, + 119, 19, 120, 22, 121, 17, 122, 24, }; /* Model Table: - * Total considered sequences: 863 / 676 - * - Positive sequences: first 369 (0.9950424985513596) - * - Probable sequences: next 125 (494-369) (0.003963798368833871) - * - Neutral sequences: last 182 (0.0009937030798065072) - * - Negative sequences: -187 (off-ratio) + * Total considered sequences: 1047 / 676 + * - Positive sequences: first 377 (0.9950075198967843) + * - Probable sequences: next 160 (537-377) (0.003999516176216855) + * - Neutral sequences: last 139 (0.00099296392699888) + * - Negative sequences: -371 (off-ratio) * Negative sequences: TODO */ static const PRUint8 EnglishLangModel[] = { - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3, - 3,2,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2, - 3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,2,2,2,3,3,2,2,1,2,1,0, - 3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,3,3,3,3,2,3, - 3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,3,3,2,3,2,3,3,2,2,3,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3, + 3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2, + 3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,2,2,3,2,3,2,2,0,1,2,1, + 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,1,3,3,3,2,3,3, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,3,2,2,2,2, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2, - 3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,3,2,3,3,3,2,3,1,1,1,3, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,2,1,1, - 3,3,3,3,2,3,3,2,3,2,3,3,3,3,3,3,3,3,3,3,3,3,1,1,1,1, - 3,3,3,3,3,3,3,3,3,1,2,2,3,3,2,1,2,2,3,3,2,1,0,2,1,2, - 3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,3,1,1,1,2,2, - 3,3,3,3,2,3,3,3,3,3,2,3,3,2,1,2,1,3,3,1,1,3,1,1,0,2, - 3,3,3,3,3,3,3,3,3,1,3,3,2,3,3,3,3,3,2,2,3,3,2,2,2,1, - 3,3,2,3,3,3,3,2,2,1,1,2,3,3,3,2,2,3,3,1,1,1,0,1,1,1, - 3,3,3,3,2,3,3,3,3,3,2,2,3,3,3,1,2,2,3,1,1,2,1,0,1,1, - 3,3,3,3,1,3,3,3,3,0,1,1,3,1,1,3,1,1,2,1,1,1,0,0,1,1, - 3,3,3,3,3,3,3,3,3,3,3,1,3,3,2,1,3,2,3,2,1,1,1,1,1,1, - 3,3,2,3,3,3,3,3,3,3,2,3,3,2,1,2,2,3,3,1,1,1,0,1,3,0, - 3,3,3,3,3,3,3,2,3,1,2,3,2,3,3,1,3,3,1,2,1,1,1,2,1,0, - 3,3,3,3,3,3,3,3,3,3,2,1,1,1,1,2,1,2,2,1,0,2,0,0,0,0, - 3,3,1,3,1,3,1,2,1,1,1,1,2,1,1,1,1,0,2,0,1,1,0,0,0,1, - 3,3,2,3,3,3,3,2,3,3,2,1,3,3,2,1,2,2,3,2,1,2,0,0,1,1, - 3,3,3,3,1,3,1,0,0,2,0,3,3,1,3,2,0,0,2,1,1,0,1,0,0,1, - 3,3,1,3,1,3,1,1,2,2,1,1,2,1,0,1,0,2,2,1,1,1,0,2,1,0, - 3,3,0,2,1,3,2,2,0,1,0,0,3,1,1,0,0,1,1,1,1,1,1,0,1,0, - 1,1,1,2,0,1,0,1,1,0,0,1,3,0,0,1,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,0,1,2,3, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,2,2, + 3,3,3,3,3,3,3,3,1,3,2,2,3,3,2,2,1,3,2,3,1,2,0,0,1,1, + 3,3,3,3,2,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,2,2,1, + 3,3,2,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,1,0,2,2,2, + 3,3,3,3,2,3,3,3,3,3,2,3,3,2,2,2,1,3,2,1,0,3,0,0,2,2, + 3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,2,3,1,2,3,2,1,2,2, + 3,3,2,3,3,3,3,2,2,2,2,2,3,3,3,2,1,3,3,2,1,1,0,1,1,0, + 3,3,3,3,1,3,3,3,3,3,2,2,3,3,3,2,2,3,2,1,1,2,1,1,1,1, + 3,3,3,3,1,3,3,3,1,3,1,2,3,1,1,3,2,3,1,1,0,1,1,1,0,0, + 3,3,3,3,3,3,3,3,3,3,3,1,3,3,2,1,3,3,2,2,1,1,1,1,1,0, + 3,3,3,3,3,3,3,3,1,3,2,3,3,3,3,2,2,2,2,2,1,2,1,1,2,0, + 3,3,3,3,3,3,3,3,2,3,2,3,3,3,2,1,2,3,3,1,1,1,1,3,0,1, + 3,3,3,3,3,3,3,3,3,3,2,2,1,1,2,2,1,2,2,2,0,2,0,0,0,0, + 3,3,1,3,1,3,2,2,1,2,1,0,2,1,1,1,1,2,1,0,1,1,0,0,1,0, + 3,3,2,3,3,3,3,3,3,3,1,2,3,3,2,1,2,3,1,2,1,2,0,1,1,0, + 3,3,3,3,1,3,1,0,2,1,0,3,2,1,3,3,0,2,1,1,2,0,1,0,0,0, + 3,3,0,2,1,3,1,2,1,1,1,1,3,0,0,0,0,1,0,0,1,1,1,1,0,0, + 3,3,1,3,2,3,2,0,2,2,1,1,2,2,1,0,1,2,2,1,1,1,0,1,2,1, + 1,2,1,2,0,0,0,0,1,1,1,1,3,0,0,0,0,0,0,0,0,0,1,0,0,0, }; @@ -161,7 +161,7 @@ const SequenceModel Iso_8859_1EnglishModel = Iso_8859_1_CharToOrderMap, EnglishLangModel, 26, - (float)0.9990062969201935, + (float)0.9990070360730011, PR_TRUE, "ISO-8859-1", "en" @@ -172,7 +172,7 @@ const SequenceModel Windows_1252EnglishModel = Windows_1252_CharToOrderMap, EnglishLangModel, 26, - (float)0.9990062969201935, + (float)0.9990070360730011, PR_TRUE, "WINDOWS-1252", "en" @@ -185,5 +185,8 @@ const LanguageModel EnglishModel = 52, EnglishLangModel, 26, - (float)0.9996665801879581, + 4, + (float)0.3710736871659752, + 18, + (float)0.037105859213141135, }; diff --git a/src/LangModels/LangEsperantoModel.cpp b/src/LangModels/LangEsperantoModel.cpp index e0b8fed..8d9ea7c 100644 --- a/src/LangModels/LangEsperantoModel.cpp +++ b/src/LangModels/LangEsperantoModel.cpp @@ -42,7 +42,7 @@ /** * Generated by BuildLangModel.py - * On: 2021-03-16 18:54:42.163514 + * On: 2022-12-14 17:59:23.880137 **/ /* Character Mapping Table: @@ -68,76 +68,75 @@ static const unsigned char Iso_8859_3_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 18, 17, 10, 2, 19, 15, 20, 3, 11, 9, 7, 13, 4, 1, /* 4X */ - 14, 34, 5, 8, 6, 12, 16, 25, 33, 26, 21,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 18, 17, 10, 2, 19, 15, 20, 3, 11, 9, 7, 13, 4, 1, /* 6X */ - 14, 34, 5, 8, 6, 12, 16, 25, 33, 26, 21,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 0, 18, 17, 10, 2, 19, 15, 21, 3, 11, 9, 6, 13, 4, 1, /* 4X */ + 14, 33, 5, 8, 7, 12, 16, 27, 32, 26, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 18, 17, 10, 2, 19, 15, 21, 3, 11, 9, 6, 13, 4, 1, /* 6X */ + 14, 33, 5, 8, 7, 12, 16, 27, 32, 26, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM, 55,SYM,SYM,SYM,ILL, 31,SYM,SYM, 56, 51, 57, 28,SYM,ILL, 41, /* AX */ - SYM, 58,SYM,SYM,SYM,SYM, 31,SYM,SYM, 53, 51, 59, 28,SYM,ILL, 41, /* BX */ - 46, 29, 50,ILL, 39, 60, 24, 40, 38, 30, 48, 49, 61, 36, 47, 54, /* CX */ - ILL, 42, 52, 32, 45, 62, 35,SYM, 22, 63, 44, 64, 37, 23, 27, 43, /* DX */ - 46, 29, 50,ILL, 39, 65, 24, 40, 38, 30, 48, 49, 66, 36, 47, 54, /* EX */ - ILL, 42, 52, 32, 45, 67, 35,SYM, 22, 68, 44, 69, 37, 23, 27,SYM, /* FX */ + SYM, 61,SYM,SYM,SYM,ILL, 29,SYM,SYM, 62, 59, 57, 28,SYM,ILL, 48, /* AX */ + SYM, 63,SYM,SYM,SYM,SYM, 29,SYM,SYM, 46, 59, 57, 28,SYM,ILL, 48, /* BX */ + 49, 31, 41,ILL, 37, 54, 24, 44, 42, 30, 60, 55, 51, 35, 47, 53, /* CX */ + ILL, 43, 52, 34, 45, 50, 36,SYM, 22, 58, 39, 56, 38, 23, 25, 40, /* DX */ + 49, 31, 41,ILL, 37, 54, 24, 44, 42, 30, 60, 55, 51, 35, 47, 53, /* EX */ + ILL, 43, 52, 34, 45, 50, 36,SYM, 22, 58, 39, 56, 38, 23, 25,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const int Unicode_Char_size = 64; +static const int Unicode_Char_size = 60; static const unsigned int Unicode_CharOrder[] = { - 65, 0, 66, 18, 67, 17, 68, 10, 69, 2, 70, 19, 71, 15, 72, 20, - 73, 3, 74, 11, 75, 9, 76, 7, 77, 13, 78, 4, 79, 1, 80, 14, - 82, 5, 83, 8, 84, 6, 85, 12, 86, 16, 87, 25, 89, 26, 90, 21, - 97, 0, 98, 18, 99, 17, 100, 10, 101, 2, 102, 19, 103, 15,104, 20, - 105, 3, 106, 11, 107, 9, 108, 7, 109, 13, 110, 4, 111, 1,112, 14, - 114, 5, 115, 8, 116, 6, 117, 12, 118, 16, 119, 25, 121, 26,122, 21, - 193, 29, 201, 30, 225, 29, 233, 30, 264, 24, 265, 24, 284, 22,285, 22, - 292, 31, 293, 31, 308, 28, 309, 28, 348, 27, 349, 27, 364, 23,365, 23, + 65, 0, 66, 18, 67, 17, 68, 10, 69, 2, 70, 19, 71, 15, 72, 21, + 73, 3, 74, 11, 75, 9, 76, 6, 77, 13, 78, 4, 79, 1, 80, 14, + 82, 5, 83, 8, 84, 7, 85, 12, 86, 16, 87, 27, 89, 26, 90, 20, + 97, 0, 98, 18, 99, 17, 100, 10, 101, 2, 102, 19, 103, 15,104, 21, + 105, 3, 106, 11, 107, 9, 108, 6, 109, 13, 110, 4, 111, 1,112, 14, + 114, 5, 115, 8, 116, 7, 117, 12, 118, 16, 119, 27, 121, 26,122, 20, + 264, 24, 265, 24, 284, 22, 285, 22, 292, 29, 293, 29, 308, 28,309, 28, + 348, 25, 349, 25, 364, 23, 365, 23, }; /* Model Table: - * Total sequences: 1066 - * First 512 sequences: 0.995442680189542 - * Next 512 sequences (512-1024): 0.0044874885692908805 - * Rest: 6.983124116715766e-05 + * Total considered sequences: 1198 / 900 + * - Positive sequences: first 496 (0.9950012527506046) + * - Probable sequences: next 287 (783-496) (0.004000631822044021) + * - Neutral sequences: last 117 (0.0009981154273513981) + * - Negative sequences: -298 (off-ratio) * Negative sequences: TODO */ static const PRUint8 EsperantoLangModel[] = { - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,0,1,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,0,3, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,3,3,2,3,2,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,3,2,2,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,2,2,3,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,2,3,3,2,2,2,2,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,3,2,3,3,0, - 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3,0,0,2,2,3,2,0,3,3,0, - 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,2,0,0,3,2,2,0,3,2,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,3,0,0,2,3,3,0,0,2,2,0, - 3,3,3,3,3,3,3,3,3,3,2,1,3,3,3,2,2,2,2,2,2,2,2,2,2,0,0,0,0,3,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,0,3,2,2,3,2,2,0,2, - 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,2,2,3,3,2,2,0,0,2,2,3,2,0,3,3,0, - 3,3,3,3,2,3,3,3,3,2,2,2,3,2,3,0,0,2,2,2,3,0,0,1,0,0,2,0,0,3,3,0, - 3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,2,3,2,3,2,0,2,0,2,3,2,2,2,2,2, - 3,3,3,3,2,3,2,3,3,2,2,3,3,2,2,2,2,2,2,2,1,2,0,0,2,0,2,2,0,3,1,0, - 3,3,3,3,2,3,3,3,3,3,2,2,3,2,2,2,2,3,2,2,3,3,0,0,2,1,3,0,0,2,2,2, - 3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,2,2,2,3,0,2,2,2,0,0,2,3,2,0,2,2,0, - 3,3,3,3,2,3,3,3,2,2,2,2,3,3,2,2,2,0,2,3,2,0,0,0,0,0,2,0,0,2,2,0, - 3,3,3,3,3,3,3,3,2,2,2,2,3,3,2,2,0,2,2,2,2,0,0,2,0,3,2,0,0,2,2,0, - 3,3,3,3,3,1,2,3,3,3,2,2,3,2,2,2,2,2,2,2,2,3,0,0,0,2,3,1,0,2,2,0, - 3,3,3,3,0,2,2,3,2,2,2,2,3,2,2,2,1,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0, - 3,2,3,3,3,3,3,3,3,3,3,2,3,2,2,3,2,0,2,2,0,3,0,0,2,0,0,3,0,0,0,0, - 3,3,3,3,0,2,0,2,0,2,3,0,3,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,2,3,2,3,2,2,0,2,2,2,0,0,2,0,2,2,2,0,0,0,2,2,0,0,0,0,0, - 3,3,3,3,3,2,2,2,3,2,2,2,2,2,2,2,2,3,2,2,2,2,0,0,0,2,0,0,0,2,1,0, - 3,3,3,3,2,2,3,2,0,2,2,2,3,0,2,0,3,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0, - 3,3,2,2,2,2,0,2,0,0,0,0,3,2,0,2,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,1,2,3,3,3,3,3,2,2,0,0,2,1,2,2,2,2,0,0,2,0,0,0,0,2,0,0,0,0,0, - 2,2,2,2,3,3,3,2,2,2,2,1,0,2,2,2,2,2,2,0,2,2,0,0,0,0,0,0,0,0,2,0, - 3,3,3,3,2,0,2,2,0,0,1,0,2,2,0,0,0,0,2,0,0,0,0,1,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,1,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,2,2,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,2,1,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,2,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,2,3,3,1,1,1, + 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,3,3,1,0,2,2,3,2,0,0, + 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3,0,0,1,2,3,2,1,0, + 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,1,3,3,3,0,1,1,2,3,2,1,0, + 3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,3,1,2,2,3,3,1,0,1,1,3,2,1,0, + 3,3,3,3,3,3,3,3,3,2,3,1,3,3,3,2,2,1,3,2,2,1,3,0,1,1,0,0,0,1, + 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,1,3,3,2,1,1,2, + 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,2,2,3,3,1,2,0,1,2,2,3,2,0,0, + 3,3,3,3,2,3,3,3,3,2,2,2,3,2,3,1,2,2,2,2,1,3,0,0,0,1,2,1,0,0, + 3,3,3,3,3,3,3,3,3,2,2,1,3,3,1,2,3,0,2,2,2,3,0,1,1,1,3,1,1,1, + 3,3,3,3,3,3,3,2,3,2,3,3,3,1,1,2,1,1,0,1,1,1,0,0,2,1,1,0,1,0, + 3,3,3,3,2,3,3,3,3,3,2,1,3,3,3,1,2,3,1,2,3,3,0,0,1,0,3,1,0,1, + 3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,1,2,2,2,2,0,2,1,1,0,3,2,1,1,2, + 3,3,3,3,1,3,3,3,2,2,2,1,3,3,2,3,2,1,1,3,0,1,0,1,0,1,2,2,0,0, + 3,3,3,3,2,2,2,2,3,2,1,1,3,2,2,2,2,1,3,1,1,2,0,0,0,1,3,2,0,0, + 3,3,3,3,3,3,3,3,3,2,2,1,3,3,1,1,0,1,2,1,0,1,0,1,0,0,2,2,0,0, + 3,3,3,3,0,1,3,1,0,1,2,2,3,0,2,0,1,0,2,1,0,0,0,1,0,0,0,0,0,0, + 3,2,3,3,2,3,3,3,3,3,3,2,3,2,2,3,3,1,2,2,3,1,1,0,1,2,0,0,0,0, + 3,3,3,3,0,1,0,1,0,2,3,1,3,2,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,2,2,2,3,1,3,2,2,3,0,2,0,2,0,1,0,0,1,0,0,1,0,0,0,0,0, + 3,3,3,2,3,2,3,2,3,2,2,1,2,2,3,2,2,3,3,2,2,1,0,0,0,0,2,3,0,0, + 3,3,3,3,2,2,2,1,2,1,1,0,2,1,0,0,0,0,0,1,0,2,0,0,0,0,2,3,0,0, + 3,3,3,2,1,0,1,0,0,0,0,1,3,1,1,0,1,0,1,1,0,0,0,1,0,0,0,0,1,0, + 3,3,3,3,2,1,2,2,0,0,0,0,2,2,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0, }; @@ -145,8 +144,8 @@ const SequenceModel Iso_8859_3EsperantoModel = { Iso_8859_3_CharToOrderMap, EsperantoLangModel, - 32, - (float)0.995442680189542, + 30, + (float)0.9990018845726486, PR_FALSE, "ISO-8859-3", "eo" @@ -156,8 +155,11 @@ const LanguageModel EsperantoModel = { "eo", Unicode_CharOrder, - 64, + 60, EsperantoLangModel, - 32, - (float)0.995442680189542, + 30, + 3, + (float)0.31985724199464655, + 19, + (float)0.032423471354769064, }; diff --git a/src/LangModels/LangEstonianModel.cpp b/src/LangModels/LangEstonianModel.cpp index f1ed29c..40184a0 100644 --- a/src/LangModels/LangEstonianModel.cpp +++ b/src/LangModels/LangEstonianModel.cpp @@ -42,7 +42,7 @@ /** * Generated by BuildLangModel.py - * On: 2021-03-16 19:01:52.571827 + * On: 2022-12-14 18:02:05.522096 **/ /* Character Mapping Table: @@ -68,18 +68,18 @@ static const unsigned char Iso_8859_4_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 20, 23, 10, 2, 22, 15, 16, 1, 17, 8, 5, 12, 7, 9, /* 4X */ - 14, 29, 11, 3, 4, 6, 13, 27, 26, 25, 28,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 20, 23, 10, 2, 22, 15, 16, 1, 17, 8, 5, 12, 7, 9, /* 6X */ - 14, 29, 11, 3, 4, 6, 13, 27, 26, 25, 28,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 0, 20, 23, 11, 2, 22, 14, 17, 1, 16, 8, 5, 12, 7, 9, /* 4X */ + 15, 31, 10, 3, 4, 6, 13, 26, 29, 25, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 20, 23, 11, 2, 22, 14, 17, 1, 16, 8, 5, 12, 7, 9, /* 6X */ + 15, 31, 10, 3, 4, 6, 13, 26, 29, 25, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM, 55, 56, 57,SYM, 58, 59,SYM,SYM, 30, 47, 60, 61,SYM, 33,SYM, /* AX */ - SYM, 62,SYM, 63,SYM, 64, 65,SYM,SYM, 30, 47, 66, 67, 68, 33, 69, /* BX */ - 37, 44, 70, 71, 18, 43, 45, 72, 73, 31, 74, 75, 76, 36, 77, 41, /* CX */ - 78, 79, 32, 80, 81, 19, 24,SYM, 39, 82, 53, 83, 21, 84, 34, 85, /* DX */ - 37, 44, 86, 87, 18, 43, 45, 88, 89, 31, 90, 91, 92, 36, 93, 41, /* EX */ - 94, 95, 32, 96, 97, 19, 24,SYM, 39, 98, 53, 99, 21,100, 34,SYM, /* FX */ + SYM, 60, 61, 62,SYM, 63, 64,SYM,SYM, 28, 41, 65, 66,SYM, 30,SYM, /* AX */ + SYM, 67,SYM, 68,SYM, 69, 70,SYM,SYM, 28, 41, 71, 72, 73, 30, 74, /* BX */ + 33, 35, 46, 75, 18, 32, 47, 76, 43, 34, 77, 78, 79, 54, 80, 38, /* CX */ + 53, 81, 52, 82, 51, 19, 24,SYM, 36, 83, 84, 85, 21, 86, 39, 87, /* DX */ + 33, 35, 46, 88, 18, 32, 47, 89, 43, 34, 90, 91, 92, 54, 93, 38, /* EX */ + 53, 94, 52, 95, 51, 19, 24,SYM, 36, 96, 97, 98, 21, 99, 39,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -89,18 +89,18 @@ static const unsigned char Iso_8859_13_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 20, 23, 10, 2, 22, 15, 16, 1, 17, 8, 5, 12, 7, 9, /* 4X */ - 14, 29, 11, 3, 4, 6, 13, 27, 26, 25, 28,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 20, 23, 10, 2, 22, 15, 16, 1, 17, 8, 5, 12, 7, 9, /* 6X */ - 14, 29, 11, 3, 4, 6, 13, 27, 26, 25, 28,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 0, 20, 23, 11, 2, 22, 14, 17, 1, 16, 8, 5, 12, 7, 9, /* 4X */ + 15, 31, 10, 3, 4, 6, 13, 26, 29, 25, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 20, 23, 11, 2, 22, 14, 17, 1, 16, 8, 5, 12, 7, 9, /* 6X */ + 15, 31, 10, 3, 4, 6, 13, 26, 29, 25, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 39,SYM,101,SYM,SYM,SYM,SYM, 45, /* AX */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 39,SYM,102,SYM,SYM,SYM,SYM, 45, /* BX */ - 103,104, 37,105, 18, 43,106, 47,107, 31,108,109,110,111, 41,112, /* CX */ - 30,113,114, 42, 32, 19, 24,SYM,115, 54,116, 34, 21, 51, 33,117, /* DX */ - 118,119, 37,120, 18, 43,121, 47,122, 31,123,124,125,126, 41,127, /* EX */ - 30,128,129, 42, 32, 19, 24,SYM,130, 54,131, 34, 21, 51, 33,SYM, /* FX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 36,SYM,100,SYM,SYM,SYM,SYM, 47, /* AX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 36,SYM,101,SYM,SYM,SYM,SYM, 47, /* BX */ + 102,103, 33, 48, 18, 32,104, 41, 43, 34,105,106,107,108, 38,109, /* CX */ + 28,110,111, 40, 52, 19, 24,SYM,112, 37, 58, 39, 21, 59, 30,113, /* DX */ + 114,115, 33, 48, 18, 32,116, 41, 43, 34,117,118,119,120, 38,121, /* EX */ + 28,122,123, 40, 52, 19, 24,SYM,124, 37, 58, 39, 21, 59, 30,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -110,18 +110,18 @@ static const unsigned char Iso_8859_15_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 20, 23, 10, 2, 22, 15, 16, 1, 17, 8, 5, 12, 7, 9, /* 4X */ - 14, 29, 11, 3, 4, 6, 13, 27, 26, 25, 28,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 20, 23, 10, 2, 22, 15, 16, 1, 17, 8, 5, 12, 7, 9, /* 6X */ - 14, 29, 11, 3, 4, 6, 13, 27, 26, 25, 28,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 0, 20, 23, 11, 2, 22, 14, 17, 1, 16, 8, 5, 12, 7, 9, /* 4X */ + 15, 31, 10, 3, 4, 6, 13, 26, 29, 25, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 20, 23, 11, 2, 22, 14, 17, 1, 16, 8, 5, 12, 7, 9, /* 6X */ + 15, 31, 10, 3, 4, 6, 13, 26, 29, 25, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM, 30,SYM, 30,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM, 33, 52,SYM,SYM, 33,SYM,SYM,SYM,132,133,134,SYM, /* BX */ - 38, 44,135,136, 18, 43, 45, 50, 40, 31,137,138, 35, 36,139,140, /* CX */ - 46,141, 49, 42,142, 19, 24,SYM, 39, 48, 53,143, 21,144,145,146, /* DX */ - 38, 44,147,148, 18, 43, 45, 50, 40, 31,149,150, 35, 36,151,152, /* EX */ - 46,153, 49, 42,154, 19, 24,SYM, 39, 48, 53,155, 21,156,157,158, /* FX */ + SYM,SYM,SYM,SYM,SYM,SYM, 28,SYM, 28,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ + SYM,SYM,SYM,SYM, 30,125,SYM,SYM, 30,SYM,SYM,SYM,126,127,128,SYM, /* BX */ + 45, 35, 46,129, 18, 32, 47, 42, 49, 34,130,131, 56, 54,132,133, /* CX */ + 44, 55, 50, 40, 51, 19, 24,SYM, 36, 57,134,135, 21,136,137,138, /* DX */ + 45, 35, 46,139, 18, 32, 47, 42, 49, 34,140,141, 56, 54,142,143, /* EX */ + 44, 55, 50, 40, 51, 19, 24,SYM, 36, 57,144,145, 21,146,147,148, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -131,18 +131,18 @@ static const unsigned char Windows_1252_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 20, 23, 10, 2, 22, 15, 16, 1, 17, 8, 5, 12, 7, 9, /* 4X */ - 14, 29, 11, 3, 4, 6, 13, 27, 26, 25, 28,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 20, 23, 10, 2, 22, 15, 16, 1, 17, 8, 5, 12, 7, 9, /* 6X */ - 14, 29, 11, 3, 4, 6, 13, 27, 26, 25, 28,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM,ILL,SYM,159,SYM,SYM,SYM,SYM,SYM,SYM, 30,SYM,160,ILL, 33,ILL, /* 8X */ - ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 30,SYM,161,ILL, 33,162, /* 9X */ + SYM, 0, 20, 23, 11, 2, 22, 14, 17, 1, 16, 8, 5, 12, 7, 9, /* 4X */ + 15, 31, 10, 3, 4, 6, 13, 26, 29, 25, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 20, 23, 11, 2, 22, 14, 17, 1, 16, 8, 5, 12, 7, 9, /* 6X */ + 15, 31, 10, 3, 4, 6, 13, 26, 29, 25, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM,ILL,SYM,149,SYM,SYM,SYM,SYM,SYM,SYM, 28,SYM,150,ILL, 30,ILL, /* 8X */ + ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 28,SYM,151,ILL, 30,152, /* 9X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 52,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 38, 44,163,164, 18, 43, 45, 50, 40, 31,165,166, 35, 36,167,168, /* CX */ - 46,169, 49, 42,170, 19, 24,SYM, 39, 48, 53,171, 21,172,173,174, /* DX */ - 38, 44,175,176, 18, 43, 45, 50, 40, 31,177,178, 35, 36,179,180, /* EX */ - 46,181, 49, 42,182, 19, 24,SYM, 39, 48, 53,183, 21,184,185,186, /* FX */ + SYM,SYM,SYM,SYM,SYM,153,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 45, 35, 46,154, 18, 32, 47, 42, 49, 34,155,156, 56, 54,157,158, /* CX */ + 44, 55, 50, 40, 51, 19, 24,SYM, 36, 57,159,160, 21,161,162,163, /* DX */ + 45, 35, 46,164, 18, 32, 47, 42, 49, 34,165,166, 56, 54,167,168, /* EX */ + 44, 55, 50, 40, 51, 19, 24,SYM, 36, 57,169,170, 21,171,172,173, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -152,79 +152,77 @@ static const unsigned char Windows_1257_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 20, 23, 10, 2, 22, 15, 16, 1, 17, 8, 5, 12, 7, 9, /* 4X */ - 14, 29, 11, 3, 4, 6, 13, 27, 26, 25, 28,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 20, 23, 10, 2, 22, 15, 16, 1, 17, 8, 5, 12, 7, 9, /* 6X */ - 14, 29, 11, 3, 4, 6, 13, 27, 26, 25, 28,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 0, 20, 23, 11, 2, 22, 14, 17, 1, 16, 8, 5, 12, 7, 9, /* 4X */ + 15, 31, 10, 3, 4, 6, 13, 26, 29, 25, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 20, 23, 11, 2, 22, 14, 17, 1, 16, 8, 5, 12, 7, 9, /* 6X */ + 15, 31, 10, 3, 4, 6, 13, 26, 29, 25, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */ SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM,ILL,SYM,ILL,SYM,SYM,SYM, /* 8X */ ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,ILL,SYM,ILL,SYM,SYM,ILL, /* 9X */ - SYM,ILL,SYM,SYM,SYM,ILL,SYM,SYM, 39,SYM,187,SYM,SYM,SYM,SYM, 45, /* AX */ - SYM,SYM,SYM,SYM,SYM, 52,SYM,SYM, 39,SYM,188,SYM,SYM,SYM,SYM, 45, /* BX */ - 189,190, 37,191, 18, 43,192, 47,193, 31,194,195,196,197, 41,198, /* CX */ - 30,199,200, 42, 32, 19, 24,SYM,201, 54,202, 34, 21, 51, 33,203, /* DX */ - 204,205, 37,206, 18, 43,207, 47,208, 31,209,210,211,212, 41,213, /* EX */ - 30,214,215, 42, 32, 19, 24,SYM,216, 54,217, 34, 21, 51, 33,SYM, /* FX */ + SYM,ILL,SYM,SYM,SYM,ILL,SYM,SYM, 36,SYM,174,SYM,SYM,SYM,SYM, 47, /* AX */ + SYM,SYM,SYM,SYM,SYM,175,SYM,SYM, 36,SYM,176,SYM,SYM,SYM,SYM, 47, /* BX */ + 177,178, 33, 48, 18, 32,179, 41, 43, 34,180,181,182,183, 38,184, /* CX */ + 28,185,186, 40, 52, 19, 24,SYM,187, 37, 58, 39, 21, 59, 30,188, /* DX */ + 189,190, 33, 48, 18, 32,191, 41, 43, 34,192,193,194,195, 38,196, /* EX */ + 28,197,198, 40, 52, 19, 24,SYM,199, 37, 58, 39, 21, 59, 30,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const int Unicode_Char_size = 68; +static const int Unicode_Char_size = 64; static const unsigned int Unicode_CharOrder[] = { - 65, 0, 66, 20, 67, 23, 68, 10, 69, 2, 70, 22, 71, 15, 72, 16, - 73, 1, 74, 17, 75, 8, 76, 5, 77, 12, 78, 7, 79, 9, 80, 14, - 81, 29, 82, 11, 83, 3, 84, 4, 85, 6, 86, 13, 87, 27, 88, 26, - 89, 25, 90, 28, 97, 0, 98, 20, 99, 23, 100, 10, 101, 2,102, 22, - 103, 15, 104, 16, 105, 1, 106, 17, 107, 8, 108, 5, 109, 12,110, 7, - 111, 9, 112, 14, 113, 29, 114, 11, 115, 3, 116, 4, 117, 6,118, 13, - 119, 27, 120, 26, 121, 25, 122, 28, 196, 18, 201, 31, 213, 19,214, 24, - 220, 21, 228, 18, 233, 31, 245, 19, 246, 24, 252, 21, 332, 32,333, 32, - 352, 30, 353, 30, 381, 33, 382, 33, + 65, 0, 66, 20, 67, 23, 68, 11, 69, 2, 70, 22, 71, 14, 72, 17, + 73, 1, 74, 16, 75, 8, 76, 5, 77, 12, 78, 7, 79, 9, 80, 15, + 81, 31, 82, 10, 83, 3, 84, 4, 85, 6, 86, 13, 87, 26, 88, 29, + 89, 25, 90, 27, 97, 0, 98, 20, 99, 23, 100, 11, 101, 2,102, 22, + 103, 14, 104, 17, 105, 1, 106, 16, 107, 8, 108, 5, 109, 12,110, 7, + 111, 9, 112, 15, 113, 31, 114, 10, 115, 3, 116, 4, 117, 6,118, 13, + 119, 26, 120, 29, 121, 25, 122, 27, 196, 18, 213, 19, 214, 24,220, 21, + 228, 18, 245, 19, 246, 24, 252, 21, 352, 28, 353, 28, 381, 30,382, 30, }; /* Model Table: - * Total sequences: 869 - * First 512 sequences: 0.9973685549586747 - * Next 512 sequences (512-1024): 0.002631445041325318 - * Rest: -3.122502256758253e-17 + * Total considered sequences: 915 / 1024 + * - Positive sequences: first 453 (0.9950155799281164) + * - Probable sequences: next 161 (614-453) (0.003991392475357514) + * - Neutral sequences: last 410 (0.000993027596526086) + * - Negative sequences: 109 (off-ratio) * Negative sequences: TODO */ static const PRUint8 EstonianLangModel[] = { - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,0,3,3,3,3,3,2,0,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,2,3,3,2,2,3,3,3,2,2,2,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,0,2,3,3,2,2,0,0,0,2, - 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,2,2,2,0,0,2,0, - 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,0,3,2,3,3,3,2,2,2,0,3,0,2,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,2,0,2,2,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,3,2,2,3,0,0,2,2,0,2,2,2,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,2,3,3,3,2,2,3,0,3,2,2,3,0,2, - 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,0,3,2,2,3,3,0,2,0,0,0,0,2,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,3,2,3,3,0,3,3,3,2,2,2,0,0,0, - 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,2,3,0,2,0,3,0,0,0,2,2,2,0,0,0,0,2,3, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,2,2,2,2,0,0, - 3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,2,3,3,3,3,3,2,3,3,0,2,2,0,2,2,0,0, - 3,3,3,3,2,3,3,3,3,3,2,2,2,2,2,2,2,2,3,3,0,2,0,2,3,2,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,3,2,3,3,0,3,0,2,3,2,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,0,2,2,2,2,2,0,2,2,0,2,2,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,0,3,3,3,0,0,3,3,3,3,0,3,3,2,0,3,0,2,0,0,0,2,2,0, - 3,3,3,2,3,0,3,3,2,3,2,0,3,0,2,0,0,0,3,3,0,3,0,0,2,0,0,0,0,0,0,0,0,0, - 2,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,0,3,0,0,0,0,0,2,0,0,0,0,0,0,0, - 0,3,3,3,3,3,3,3,2,0,3,3,3,3,3,3,3,3,0,3,3,0,0,0,0,0,0,0,2,0,0,0,0,0, - 3,3,3,3,2,3,3,3,3,3,0,3,2,0,0,0,2,3,0,2,2,2,0,2,0,2,2,0,0,0,0,0,0,0, - 3,0,2,3,3,3,0,3,3,3,3,3,3,3,3,3,3,0,0,0,3,3,2,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,2,3,3,3,2,0,3,2,3,0,0,0,2,0,2,2,0,0,3,3,3,2,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,2,3,0,0,0,2,0,2,3,0,3,0,0,2,2,0,0,0,0, - 2,2,3,3,3,3,0,3,3,2,3,3,3,3,3,3,2,2,0,0,3,0,0,0,3,0,0,0,0,0,0,0,0,0, - 3,3,3,3,2,3,3,3,2,3,2,3,2,0,3,2,0,0,0,0,2,0,0,3,2,0,2,0,2,0,0,0,2,0, - 2,3,2,2,2,0,2,2,2,2,0,0,0,0,0,0,0,2,0,0,0,0,0,2,0,2,0,0,0,0,0,0,0,0, - 3,3,3,2,2,2,2,2,2,3,0,2,0,0,0,2,3,0,0,0,0,0,2,0,0,2,0,2,0,0,0,0,0,0, - 2,3,3,2,0,2,2,2,2,3,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,2,2,0,0,0,0,0, - 3,3,2,0,0,0,3,0,0,2,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0, - 2,3,3,0,0,2,3,2,2,2,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0, - 0,0,0,2,0,2,0,2,2,2,2,2,2,0,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,2,2,2,2,2,2,0,0,0,2,0,0,2,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,3,2,0,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,0,3,2,3,3,2,1,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,2,3,2,3,3,1,1,2,2,2,2,2,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,1,3,3,2,2,3,2,1,3,2,1, + 3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,3,3,3,3,3,3,2,2,2,0,0,0,1, + 3,3,3,3,3,3,3,3,3,3,3,1,3,3,1,3,3,3,3,3,1,3,1,2,3,3,2,2,3,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,1,1,2,0,1,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,3,2,2,3,0,1,1,2,2,1,1,1, + 3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,2,2,3,3,3,2,3,3,3,1,2,0,3,1,1,1,1, + 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,1,3,0,2,2,3,2,0,1,0,0,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,1,3,3,1,1,2,2,0,2,1,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,2,0,0,1,1, + 3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,3,2,0,1,2,3,2,0,0,2,2,2,1,0,3,1, + 3,3,3,3,3,3,3,3,3,3,2,1,3,2,3,3,2,2,3,3,3,3,2,2,3,2,1,1,0,0,0,0, + 3,3,3,3,2,3,3,3,3,3,2,2,2,2,2,2,2,2,3,3,0,3,0,1,3,0,0,0,1,0,0,0, + 3,3,3,3,2,3,3,3,3,3,3,1,3,2,2,3,1,3,1,0,1,2,1,0,2,3,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,2,3,3,2,3,1,2,3,1,3,3,3,0,3,1,1,3,2,1,1,1,1,0,0, + 3,2,3,0,3,1,3,2,2,3,1,1,2,0,0,2,0,0,3,3,0,2,0,1,1,0,0,0,0,0,0,0, + 3,3,3,2,3,3,3,3,3,3,3,1,3,3,0,1,3,3,3,3,2,3,0,1,1,3,1,0,0,0,0,0, + 2,3,3,3,3,3,1,3,3,2,3,3,3,3,3,3,0,3,3,0,3,0,0,0,0,1,0,0,0,0,0,0, + 1,3,3,3,3,3,3,3,2,1,3,3,3,2,3,3,3,3,0,3,2,0,0,0,0,0,0,0,1,0,0,0, + 3,3,3,3,1,3,3,3,2,3,3,1,1,0,1,1,3,2,0,1,2,3,1,1,0,2,0,1,0,0,0,0, + 3,0,2,3,3,3,0,3,3,3,3,3,3,2,3,3,1,3,1,0,2,3,1,0,0,0,0,1,0,0,0,0, + 3,3,3,2,3,2,3,2,0,3,3,1,1,0,1,0,2,0,3,0,0,3,2,0,3,1,0,1,0,0,0,1, + 3,3,3,2,3,3,3,2,3,3,2,1,3,1,1,1,0,3,0,0,1,1,0,3,0,2,0,2,0,1,0,1, + 1,2,3,3,3,3,0,3,3,1,3,3,2,2,2,3,1,1,0,0,2,1,1,0,3,0,0,0,0,1,0,0, + 2,1,3,3,2,3,1,2,1,2,3,2,2,1,2,2,0,1,0,0,1,0,0,2,1,0,0,0,0,1,0,0, + 3,3,3,2,1,0,1,1,1,3,1,0,1,0,0,0,1,2,0,0,0,0,1,1,1,1,2,0,0,0,0,0, + 3,3,3,0,1,1,2,2,1,3,0,2,1,1,1,1,1,0,0,1,1,0,0,0,0,0,1,2,0,0,0,0, + 3,3,3,0,2,1,2,0,2,2,1,0,0,2,0,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0, + 2,2,2,1,2,0,1,0,1,2,0,0,1,1,0,2,0,0,1,0,0,0,1,1,0,1,0,0,1,1,0,0, + 3,2,2,1,0,0,2,2,0,1,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0, + 2,1,0,0,0,0,2,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, }; @@ -232,8 +230,8 @@ const SequenceModel Iso_8859_4EstonianModel = { Iso_8859_4_CharToOrderMap, EstonianLangModel, - 34, - (float)0.9973685549586747, + 32, + (float)0.9990069724034739, PR_TRUE, "ISO-8859-4", "et" @@ -243,8 +241,8 @@ const SequenceModel Iso_8859_13EstonianModel = { Iso_8859_13_CharToOrderMap, EstonianLangModel, - 34, - (float)0.9973685549586747, + 32, + (float)0.9990069724034739, PR_TRUE, "ISO-8859-13", "et" @@ -254,8 +252,8 @@ const SequenceModel Iso_8859_15EstonianModel = { Iso_8859_15_CharToOrderMap, EstonianLangModel, - 34, - (float)0.9973685549586747, + 32, + (float)0.9990069724034739, PR_TRUE, "ISO-8859-15", "et" @@ -265,8 +263,8 @@ const SequenceModel Windows_1252EstonianModel = { Windows_1252_CharToOrderMap, EstonianLangModel, - 34, - (float)0.9973685549586747, + 32, + (float)0.9990069724034739, PR_TRUE, "WINDOWS-1252", "et" @@ -276,8 +274,8 @@ const SequenceModel Windows_1257EstonianModel = { Windows_1257_CharToOrderMap, EstonianLangModel, - 34, - (float)0.9973685549586747, + 32, + (float)0.9990069724034739, PR_TRUE, "WINDOWS-1257", "et" @@ -287,8 +285,11 @@ const LanguageModel EstonianModel = { "et", Unicode_CharOrder, - 68, + 64, EstonianLangModel, - 34, - (float)0.9973685549586747, + 32, + 3, + (float)0.33312644846916206, + 18, + (float)0.03563976943342159, }; diff --git a/src/LangModels/LangFinnishModel.cpp b/src/LangModels/LangFinnishModel.cpp index 23f7c58..20d0321 100644 --- a/src/LangModels/LangFinnishModel.cpp +++ b/src/LangModels/LangFinnishModel.cpp @@ -42,7 +42,7 @@ /** * Generated by BuildLangModel.py - * On: 2021-03-16 19:06:31.129345 + * On: 2022-12-14 18:01:43.323316 **/ /* Character Mapping Table: @@ -68,18 +68,18 @@ static const unsigned char Iso_8859_1_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 19, 21, 18, 4, 23, 20, 15, 1, 14, 9, 6, 11, 2, 7, /* 4X */ - 16, 29, 10, 5, 3, 8, 13, 24, 26, 17, 25,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 19, 21, 18, 4, 23, 20, 15, 1, 14, 9, 6, 11, 2, 7, /* 6X */ - 16, 29, 10, 5, 3, 8, 13, 24, 26, 17, 25,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 0, 21, 22, 18, 5, 23, 19, 16, 1, 14, 9, 6, 10, 3, 7, /* 4X */ + 15, 28, 12, 4, 2, 8, 13, 24, 26, 17, 25,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 21, 22, 18, 5, 23, 19, 16, 1, 14, 9, 6, 10, 3, 7, /* 6X */ + 15, 28, 12, 4, 2, 8, 13, 24, 26, 17, 25,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 65,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 44, 36, 62, 47, 12, 54, 37, 39, 38, 30, 52, 31, 60, 32, 66, 59, /* CX */ - 67, 58, 50, 33, 35, 53, 22,SYM, 68, 69, 41, 70, 34, 71, 72, 56, /* DX */ - 44, 36, 62, 47, 12, 54, 37, 39, 38, 30, 52, 31, 60, 32, 73, 59, /* EX */ - 74, 58, 50, 33, 35, 53, 22,SYM, 75, 76, 41, 77, 34, 78, 79, 80, /* FX */ + SYM,SYM,SYM,SYM,SYM, 77,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 46, 30, 59, 37, 11, 42, 40, 35, 36, 29, 60, 52, 68, 31, 51, 50, /* CX */ + 63, 53, 69, 34, 44, 71, 20,SYM, 38, 78, 48, 79, 32, 74, 67, 56, /* DX */ + 46, 30, 59, 37, 11, 42, 40, 35, 36, 29, 60, 52, 68, 31, 51, 50, /* EX */ + 63, 53, 69, 34, 44, 71, 20,SYM, 38, 80, 48, 81, 32, 74, 67, 82, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -89,18 +89,18 @@ static const unsigned char Iso_8859_4_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 19, 21, 18, 4, 23, 20, 15, 1, 14, 9, 6, 11, 2, 7, /* 4X */ - 16, 29, 10, 5, 3, 8, 13, 24, 26, 17, 25,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 19, 21, 18, 4, 23, 20, 15, 1, 14, 9, 6, 11, 2, 7, /* 6X */ - 16, 29, 10, 5, 3, 8, 13, 24, 26, 17, 25,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 0, 21, 22, 18, 5, 23, 19, 16, 1, 14, 9, 6, 10, 3, 7, /* 4X */ + 15, 28, 12, 4, 2, 8, 13, 24, 26, 17, 25,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 21, 22, 18, 5, 23, 19, 16, 1, 14, 9, 6, 10, 3, 7, /* 6X */ + 15, 28, 12, 4, 2, 8, 13, 24, 26, 17, 25,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM, 81, 82, 48,SYM, 83, 84,SYM,SYM, 28, 85, 86, 87,SYM, 27,SYM, /* AX */ - SYM, 88,SYM, 48,SYM, 89, 90,SYM,SYM, 28, 91, 92, 93, 42, 27, 42, /* BX */ - 63, 36, 62, 47, 12, 54, 37, 94, 46, 30, 95, 31, 96, 32, 97, 98, /* CX */ - 99, 64,100,101, 35, 53, 22,SYM,102,103, 41,104, 34,105, 55, 56, /* DX */ - 63, 36, 62, 47, 12, 54, 37,106, 46, 30,107, 31,108, 32,109,110, /* EX */ - 111, 64,112,113, 35, 53, 22,SYM,114,115, 41,116, 34,117, 55,SYM, /* FX */ + SYM, 61, 83, 84,SYM, 85, 73,SYM,SYM, 27, 66, 86, 87,SYM, 33,SYM, /* AX */ + SYM, 61,SYM, 88,SYM, 89, 73,SYM,SYM, 27, 66, 90, 91, 47, 33, 47, /* BX */ + 41, 30, 59, 37, 11, 42, 40, 92, 49, 29, 57, 52, 39, 31, 51, 58, /* CX */ + 54, 93, 70, 94, 44, 71, 20,SYM, 38, 72, 48, 95, 32, 96, 45, 56, /* DX */ + 41, 30, 59, 37, 11, 42, 40, 97, 49, 29, 57, 52, 39, 31, 51, 58, /* EX */ + 54, 98, 70, 99, 44, 71, 20,SYM, 38, 72, 48,100, 32,101, 45,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -110,18 +110,18 @@ static const unsigned char Iso_8859_9_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 19, 21, 18, 4, 23, 20, 15, 1, 14, 9, 6, 11, 2, 7, /* 4X */ - 16, 29, 10, 5, 3, 8, 13, 24, 26, 17, 25,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 19, 21, 18, 4, 23, 20, 15, 1, 14, 9, 6, 11, 2, 7, /* 6X */ - 16, 29, 10, 5, 3, 8, 13, 24, 26, 17, 25,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 0, 21, 22, 18, 5, 23, 19, 16, 1, 14, 9, 6, 10, 3, 7, /* 4X */ + 15, 28, 12, 4, 2, 8, 13, 24, 26, 17, 25,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 21, 22, 18, 5, 23, 19, 16, 1, 14, 9, 6, 10, 3, 7, /* 6X */ + 15, 28, 12, 4, 2, 8, 13, 24, 26, 17, 25,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM,118,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 44, 36, 62, 47, 12, 54, 37, 39, 38, 30, 52, 31, 60, 32,119, 59, /* CX */ - 51, 58, 50, 33, 35, 53, 22,SYM,120,121, 41,122, 34, 49, 43, 56, /* DX */ - 44, 36, 62, 47, 12, 54, 37, 39, 38, 30, 52, 31, 60, 32,123, 59, /* EX */ - 51, 58, 50, 33, 35, 53, 22,SYM,124,125, 41,126, 34, 45, 43,127, /* FX */ + SYM,SYM,SYM,SYM,SYM,102,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 46, 30, 59, 37, 11, 42, 40, 35, 36, 29, 60, 52, 68, 31, 51, 50, /* CX */ + 75, 53, 69, 34, 44, 71, 20,SYM, 38,103, 48,104, 32,105,106, 56, /* DX */ + 46, 30, 59, 37, 11, 42, 40, 35, 36, 29, 60, 52, 68, 31, 51, 50, /* EX */ + 75, 53, 69, 34, 44, 71, 20,SYM, 38,107, 48,108, 32, 55,109,110, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -131,18 +131,18 @@ static const unsigned char Iso_8859_13_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 19, 21, 18, 4, 23, 20, 15, 1, 14, 9, 6, 11, 2, 7, /* 4X */ - 16, 29, 10, 5, 3, 8, 13, 24, 26, 17, 25,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 19, 21, 18, 4, 23, 20, 15, 1, 14, 9, 6, 11, 2, 7, /* 6X */ - 16, 29, 10, 5, 3, 8, 13, 24, 26, 17, 25,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 0, 21, 22, 18, 5, 23, 19, 16, 1, 14, 9, 6, 10, 3, 7, /* 4X */ + 15, 28, 12, 4, 2, 8, 13, 24, 26, 17, 25,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 21, 22, 18, 5, 23, 19, 16, 1, 14, 9, 6, 10, 3, 7, /* 6X */ + 15, 28, 12, 4, 2, 8, 13, 24, 26, 17, 25,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,128,SYM, 48,SYM,SYM,SYM,SYM, 37, /* AX */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,129,SYM, 48,SYM,SYM,SYM,SYM, 37, /* BX */ - 130,131, 63, 40, 12, 54,132,133, 46, 30, 61,134,135,136,137,138, /* CX */ - 28,139, 64, 33,140, 53, 22,SYM,141,142,143, 55, 34, 57, 27, 56, /* DX */ - 144,145, 63, 40, 12, 54,146,147, 46, 30, 61,148,149,150,151,152, /* EX */ - 28,153, 64, 33,154, 53, 22,SYM,155,156,157, 55, 34, 57, 27,SYM, /* FX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 38,SYM,111,SYM,SYM,SYM,SYM, 40, /* AX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 38,SYM,112,SYM,SYM,SYM,SYM, 40, /* BX */ + 61,113, 41, 43, 11, 42, 57, 66, 49, 29,114, 39,115,116, 58, 73, /* CX */ + 27, 64,117, 34, 70, 71, 20,SYM, 72, 76, 65, 45, 32, 62, 33, 56, /* DX */ + 61,118, 41, 43, 11, 42, 57, 66, 49, 29,119, 39,120,121, 58, 73, /* EX */ + 27, 64,122, 34, 70, 71, 20,SYM, 72, 76, 65, 45, 32, 62, 33,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -152,18 +152,18 @@ static const unsigned char Iso_8859_15_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 19, 21, 18, 4, 23, 20, 15, 1, 14, 9, 6, 11, 2, 7, /* 4X */ - 16, 29, 10, 5, 3, 8, 13, 24, 26, 17, 25,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 19, 21, 18, 4, 23, 20, 15, 1, 14, 9, 6, 11, 2, 7, /* 6X */ - 16, 29, 10, 5, 3, 8, 13, 24, 26, 17, 25,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 0, 21, 22, 18, 5, 23, 19, 16, 1, 14, 9, 6, 10, 3, 7, /* 4X */ + 15, 28, 12, 4, 2, 8, 13, 24, 26, 17, 25,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 21, 22, 18, 5, 23, 19, 16, 1, 14, 9, 6, 10, 3, 7, /* 6X */ + 15, 28, 12, 4, 2, 8, 13, 24, 26, 17, 25,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM, 28,SYM, 28,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM, 27,158,SYM,SYM, 27,SYM,SYM,SYM,159,160,161,SYM, /* BX */ - 44, 36, 62, 47, 12, 54, 37, 39, 38, 30, 52, 31, 60, 32,162, 59, /* CX */ - 163, 58, 50, 33, 35, 53, 22,SYM,164,165, 41,166, 34,167,168, 56, /* DX */ - 44, 36, 62, 47, 12, 54, 37, 39, 38, 30, 52, 31, 60, 32,169, 59, /* EX */ - 170, 58, 50, 33, 35, 53, 22,SYM,171,172, 41,173, 34,174,175,176, /* FX */ + SYM,SYM,SYM,SYM,SYM,SYM, 27,SYM, 27,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ + SYM,SYM,SYM,SYM, 33,123,SYM,SYM, 33,SYM,SYM,SYM,124,125,126,SYM, /* BX */ + 46, 30, 59, 37, 11, 42, 40, 35, 36, 29, 60, 52, 68, 31, 51, 50, /* CX */ + 63, 53, 69, 34, 44, 71, 20,SYM, 38,127, 48,128, 32, 74, 67, 56, /* DX */ + 46, 30, 59, 37, 11, 42, 40, 35, 36, 29, 60, 52, 68, 31, 51, 50, /* EX */ + 63, 53, 69, 34, 44, 71, 20,SYM, 38,129, 48,130, 32, 74, 67,131, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -173,74 +173,74 @@ static const unsigned char Windows_1252_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 19, 21, 18, 4, 23, 20, 15, 1, 14, 9, 6, 11, 2, 7, /* 4X */ - 16, 29, 10, 5, 3, 8, 13, 24, 26, 17, 25,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 19, 21, 18, 4, 23, 20, 15, 1, 14, 9, 6, 11, 2, 7, /* 6X */ - 16, 29, 10, 5, 3, 8, 13, 24, 26, 17, 25,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM,ILL,SYM,177,SYM,SYM,SYM,SYM,SYM,SYM, 28,SYM,178,ILL, 27,ILL, /* 8X */ - ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 28,SYM,179,ILL, 27,180, /* 9X */ + SYM, 0, 21, 22, 18, 5, 23, 19, 16, 1, 14, 9, 6, 10, 3, 7, /* 4X */ + 15, 28, 12, 4, 2, 8, 13, 24, 26, 17, 25,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 21, 22, 18, 5, 23, 19, 16, 1, 14, 9, 6, 10, 3, 7, /* 6X */ + 15, 28, 12, 4, 2, 8, 13, 24, 26, 17, 25,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM,ILL,SYM,132,SYM,SYM,SYM,SYM,SYM,SYM, 27,SYM,133,ILL, 33,ILL, /* 8X */ + ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 27,SYM,134,ILL, 33,135, /* 9X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM,181,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 44, 36, 62, 47, 12, 54, 37, 39, 38, 30, 52, 31, 60, 32,182, 59, /* CX */ - 183, 58, 50, 33, 35, 53, 22,SYM,184,185, 41,186, 34,187,188, 56, /* DX */ - 44, 36, 62, 47, 12, 54, 37, 39, 38, 30, 52, 31, 60, 32,189, 59, /* EX */ - 190, 58, 50, 33, 35, 53, 22,SYM,191,192, 41,193, 34,194,195,196, /* FX */ + SYM,SYM,SYM,SYM,SYM,136,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 46, 30, 59, 37, 11, 42, 40, 35, 36, 29, 60, 52, 68, 31, 51, 50, /* CX */ + 63, 53, 69, 34, 44, 71, 20,SYM, 38,137, 48,138, 32, 74, 67, 56, /* DX */ + 46, 30, 59, 37, 11, 42, 40, 35, 36, 29, 60, 52, 68, 31, 51, 50, /* EX */ + 63, 53, 69, 34, 44, 71, 20,SYM, 38,139, 48,140, 32, 74, 67,141, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const int Unicode_Char_size = 60; +static const int Unicode_Char_size = 58; static const unsigned int Unicode_CharOrder[] = { - 65, 0, 66, 19, 67, 21, 68, 18, 69, 4, 70, 23, 71, 20, 72, 15, - 73, 1, 74, 14, 75, 9, 76, 6, 77, 11, 78, 2, 79, 7, 80, 16, - 81, 29, 82, 10, 83, 5, 84, 3, 85, 8, 86, 13, 87, 24, 88, 26, - 89, 17, 90, 25, 97, 0, 98, 19, 99, 21, 100, 18, 101, 4,102, 23, - 103, 20, 104, 15, 105, 1, 106, 14, 107, 9, 108, 6, 109, 11,110, 2, - 111, 7, 112, 16, 113, 29, 114, 10, 115, 5, 116, 3, 117, 8,118, 13, - 119, 24, 120, 26, 121, 17, 122, 25, 196, 12, 214, 22, 228, 12,246, 22, - 352, 28, 353, 28, 381, 27, 382, 27, + 65, 0, 66, 21, 67, 22, 68, 18, 69, 5, 70, 23, 71, 19, 72, 16, + 73, 1, 74, 14, 75, 9, 76, 6, 77, 10, 78, 3, 79, 7, 80, 15, + 81, 28, 82, 12, 83, 4, 84, 2, 85, 8, 86, 13, 87, 24, 88, 26, + 89, 17, 90, 25, 97, 0, 98, 21, 99, 22, 100, 18, 101, 5,102, 23, + 103, 19, 104, 16, 105, 1, 106, 14, 107, 9, 108, 6, 109, 10,110, 3, + 111, 7, 112, 15, 113, 28, 114, 12, 115, 4, 116, 2, 117, 8,118, 13, + 119, 24, 120, 26, 121, 17, 122, 25, 196, 11, 214, 20, 228, 11,246, 20, + 352, 27, 353, 27, }; /* Model Table: - * Total sequences: 940 - * First 512 sequences: 0.9985812031154878 - * Next 512 sequences (512-1024): 0.0014187968845121583 - * Rest: 2.7321894746634712e-17 + * Total considered sequences: 1166 / 841 + * - Positive sequences: first 398 (0.9950171899546914) + * - Probable sequences: next 196 (594-398) (0.003983288176498734) + * - Neutral sequences: last 247 (0.0009995218688099161) + * - Negative sequences: -325 (off-ratio) * Negative sequences: TODO */ static const PRUint8 FinnishLangModel[] = { - 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,3,3,3,3,2,3,3,2,0,3,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,0,0,0,2, - 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,0,2,0, - 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,0,3,3,3,3,0,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,0,3,3,2,3,0,2,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,0,2,3,2,3,2,2,0,2,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2, - 3,3,2,2,3,3,2,3,3,2,3,3,3,2,2,2,3,3,2,3,3,3,3,2,2,2,2,0,0,0, - 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,0,2,2,2,0,0,0,0,0,0, - 3,3,2,2,3,2,2,3,3,2,2,0,3,2,2,2,2,3,2,0,0,0,2,0,0,0,0,0,0,0, - 3,3,2,0,3,2,2,3,3,2,2,2,3,2,0,2,2,3,0,2,0,2,3,2,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,2,0,3,3,2,2,0,0,0,0,2, - 3,3,2,3,3,3,3,3,3,2,3,3,3,2,0,3,3,3,3,2,2,2,3,2,3,0,0,0,2,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,2,3,2,2,0,2,0, - 3,3,3,2,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,0,3,0,2, - 3,3,3,3,3,3,3,3,3,3,3,2,2,0,2,3,0,3,3,3,2,3,2,0,2,2,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,2,3,3,3,3,3,3,2,3,2,0,0,0,2, - 3,3,2,3,3,3,3,3,3,3,3,2,0,0,0,3,2,3,3,2,3,3,0,0,2,2,0,0,0,2, - 3,3,3,3,2,3,3,2,0,3,3,3,3,3,3,3,3,3,3,0,2,0,3,2,0,0,0,0,0,0, - 3,3,2,3,3,3,3,3,3,2,3,2,2,2,2,0,2,3,0,2,2,3,0,3,2,2,0,0,0,0, - 3,3,3,2,3,3,2,3,2,2,3,2,0,0,0,3,0,2,2,2,0,2,0,2,0,0,0,0,0,0, - 3,3,3,2,3,0,2,3,2,2,2,2,2,2,0,2,2,3,2,2,2,0,0,2,2,3,0,0,0,0, - 3,3,0,2,2,2,3,2,2,0,0,2,0,2,0,2,2,0,0,2,0,2,0,3,2,0,2,0,0,0, - 3,2,0,0,2,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,2,2,3,0,0,2,2,2,2,2,0,2,2,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0, - 2,2,0,0,0,2,0,0,3,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2, + 3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,1,3,3,1,1,3,2,3,2,2,2,0,3,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,1,1,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,3,3,3,2,3,1,0,0,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,2,1,2, + 3,3,3,2,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,2,3,1,2,0,1,1, + 3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,1,3,3,3,3,2,2,1,1, + 3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,2,3,3,1,3,3,2,2,2,2,2,1, + 3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,2,3,3,1,2,3,1,1,1,1,1,0,1,0, + 3,3,2,2,2,3,2,3,3,1,3,3,2,1,1,3,1,3,2,1,2,3,2,2,2,1,1,0,1, + 3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,1,1,1,1,1,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,2,2,1,2, + 3,3,2,1,2,3,2,3,3,1,1,3,2,1,1,2,0,3,1,1,1,1,0,1,0,0,1,0,0, + 3,3,0,2,1,3,2,3,3,0,1,3,2,1,1,2,0,3,1,0,3,0,1,1,0,0,0,0,0, + 3,3,3,2,3,3,3,3,3,2,2,3,3,1,1,3,3,3,2,1,3,1,1,2,1,1,0,0,0, + 3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,1,1,3,3,1,2,1,1,1,2,1,0,0,1, + 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,2,2,1,1,1,1,0,0, + 3,3,2,2,3,3,2,3,3,1,2,3,3,2,2,2,2,3,2,2,2,2,2,2,2,1,0,0,1, + 3,3,2,3,2,3,3,3,3,2,2,1,3,2,1,1,3,3,2,2,2,2,1,1,2,0,0,0,0, + 2,3,3,3,3,2,3,2,1,3,3,3,3,3,3,2,3,3,2,1,3,0,0,0,0,1,0,0,0, + 3,3,2,3,2,3,3,3,3,2,2,1,3,1,2,1,2,2,2,0,1,3,3,1,2,1,0,0,0, + 3,3,3,2,2,3,3,3,3,3,2,0,2,1,0,1,3,2,2,1,0,1,2,1,1,1,1,0,2, + 3,3,2,1,2,3,3,3,2,1,1,1,3,1,1,2,1,2,1,2,1,1,2,2,0,0,0,0,0, + 3,3,1,2,2,3,2,3,1,1,1,1,2,1,0,1,2,1,1,2,1,0,1,1,1,0,0,0,0, + 3,3,1,1,0,3,1,2,2,1,1,0,1,1,1,1,1,2,1,1,1,2,1,1,1,2,0,0,0, + 2,3,2,0,1,2,1,2,1,1,1,0,0,1,0,1,1,1,0,0,0,1,1,2,0,0,1,0,0, + 2,2,2,1,0,2,0,2,2,2,1,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0, + 2,1,0,0,1,0,0,1,3,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0, }; @@ -248,8 +248,8 @@ const SequenceModel Iso_8859_1FinnishModel = { Iso_8859_1_CharToOrderMap, FinnishLangModel, - 30, - (float)0.9985812031154878, + 29, + (float)0.9990004781311901, PR_TRUE, "ISO-8859-1", "fi" @@ -259,8 +259,8 @@ const SequenceModel Iso_8859_4FinnishModel = { Iso_8859_4_CharToOrderMap, FinnishLangModel, - 30, - (float)0.9985812031154878, + 29, + (float)0.9990004781311901, PR_TRUE, "ISO-8859-4", "fi" @@ -270,8 +270,8 @@ const SequenceModel Iso_8859_9FinnishModel = { Iso_8859_9_CharToOrderMap, FinnishLangModel, - 30, - (float)0.9985812031154878, + 29, + (float)0.9990004781311901, PR_TRUE, "ISO-8859-9", "fi" @@ -281,8 +281,8 @@ const SequenceModel Iso_8859_13FinnishModel = { Iso_8859_13_CharToOrderMap, FinnishLangModel, - 30, - (float)0.9985812031154878, + 29, + (float)0.9990004781311901, PR_TRUE, "ISO-8859-13", "fi" @@ -292,8 +292,8 @@ const SequenceModel Iso_8859_15FinnishModel = { Iso_8859_15_CharToOrderMap, FinnishLangModel, - 30, - (float)0.9985812031154878, + 29, + (float)0.9990004781311901, PR_TRUE, "ISO-8859-15", "fi" @@ -303,8 +303,8 @@ const SequenceModel Windows_1252FinnishModel = { Windows_1252_CharToOrderMap, FinnishLangModel, - 30, - (float)0.9985812031154878, + 29, + (float)0.9990004781311901, PR_TRUE, "WINDOWS-1252", "fi" @@ -314,8 +314,11 @@ const LanguageModel FinnishModel = { "fi", Unicode_CharOrder, - 60, + 58, FinnishLangModel, - 30, - (float)0.9985812031154878, + 29, + 3, + (float)0.3300643936651628, + 16, + (float)0.04489754832004237, }; diff --git a/src/LangModels/LangFrenchModel.cpp b/src/LangModels/LangFrenchModel.cpp index ffea661..4ec59ac 100644 --- a/src/LangModels/LangFrenchModel.cpp +++ b/src/LangModels/LangFrenchModel.cpp @@ -42,7 +42,7 @@ /** * Generated by BuildLangModel.py - * On: 2021-03-16 01:24:27.092782 + * On: 2022-12-14 17:27:44.115466 **/ /* Character Mapping Table: @@ -68,18 +68,18 @@ static const unsigned char Iso_8859_15_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 1, 18, 11, 10, 0, 17, 16, 19, 3, 25, 26, 7, 12, 4, 8, /* 4X */ - 13, 20, 5, 2, 6, 9, 15, 31, 22, 21, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 1, 18, 11, 10, 0, 17, 16, 19, 3, 25, 26, 7, 12, 4, 8, /* 6X */ - 13, 20, 5, 2, 6, 9, 15, 31, 22, 21, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 2, 18, 11, 10, 0, 17, 16, 19, 3, 25, 26, 7, 13, 4, 8, /* 4X */ + 12, 20, 6, 1, 5, 9, 15, 30, 21, 24, 28,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 2, 18, 11, 10, 0, 17, 16, 19, 3, 25, 26, 7, 13, 4, 8, /* 6X */ + 12, 20, 6, 1, 5, 9, 15, 30, 21, 24, 28,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM, 49,SYM, 49,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM, 57, 58,SYM,SYM, 59,SYM,SYM,SYM, 37, 37, 53,SYM, /* BX */ - 23, 40, 34, 47, 48, 55, 43, 29, 24, 14, 28, 39, 60, 41, 32, 35, /* CX */ - 54, 45, 51, 38, 30, 61, 44,SYM, 50, 36, 46, 33, 42, 52, 62, 56, /* DX */ - 23, 40, 34, 47, 48, 55, 43, 29, 24, 14, 28, 39, 63, 41, 32, 35, /* EX */ - 54, 45, 51, 38, 30, 64, 44,SYM, 50, 36, 46, 33, 42, 52, 65, 53, /* FX */ + SYM,SYM,SYM,SYM,SYM,SYM, 44,SYM, 44,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ + SYM,SYM,SYM,SYM, 49, 51,SYM,SYM, 49,SYM,SYM,SYM, 34, 34, 59,SYM, /* BX */ + 22, 39, 32, 46, 47, 58, 48, 29, 23, 14, 27, 38, 53, 42, 33, 35, /* CX */ + 57, 45, 54, 40, 31, 55, 43,SYM, 52, 37, 50, 36, 41, 60, 61, 56, /* DX */ + 22, 39, 32, 46, 47, 58, 48, 29, 23, 14, 27, 38, 53, 42, 33, 35, /* EX */ + 57, 45, 54, 40, 31, 55, 43,SYM, 52, 37, 50, 36, 41, 62, 63, 64, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -89,18 +89,18 @@ static const unsigned char Iso_8859_1_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 1, 18, 11, 10, 0, 17, 16, 19, 3, 25, 26, 7, 12, 4, 8, /* 4X */ - 13, 20, 5, 2, 6, 9, 15, 31, 22, 21, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 1, 18, 11, 10, 0, 17, 16, 19, 3, 25, 26, 7, 12, 4, 8, /* 6X */ - 13, 20, 5, 2, 6, 9, 15, 31, 22, 21, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 2, 18, 11, 10, 0, 17, 16, 19, 3, 25, 26, 7, 13, 4, 8, /* 4X */ + 12, 20, 6, 1, 5, 9, 15, 30, 21, 24, 28,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 2, 18, 11, 10, 0, 17, 16, 19, 3, 25, 26, 7, 13, 4, 8, /* 6X */ + 12, 20, 6, 1, 5, 9, 15, 30, 21, 24, 28,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 66,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 23, 40, 34, 47, 48, 55, 43, 29, 24, 14, 28, 39, 67, 41, 32, 35, /* CX */ - 54, 45, 51, 38, 30, 68, 44,SYM, 50, 36, 46, 33, 42, 52, 69, 56, /* DX */ - 23, 40, 34, 47, 48, 55, 43, 29, 24, 14, 28, 39, 70, 41, 32, 35, /* EX */ - 54, 45, 51, 38, 30, 71, 44,SYM, 50, 36, 46, 33, 42, 52, 72, 53, /* FX */ + SYM,SYM,SYM,SYM,SYM, 51,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 22, 39, 32, 46, 47, 58, 48, 29, 23, 14, 27, 38, 53, 42, 33, 35, /* CX */ + 57, 45, 54, 40, 31, 55, 43,SYM, 52, 37, 50, 36, 41, 65, 66, 56, /* DX */ + 22, 39, 32, 46, 47, 58, 48, 29, 23, 14, 27, 38, 53, 42, 33, 35, /* EX */ + 57, 45, 54, 40, 31, 55, 43,SYM, 52, 37, 50, 36, 41, 67, 68, 69, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -110,84 +110,85 @@ static const unsigned char Windows_1252_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 1, 18, 11, 10, 0, 17, 16, 19, 3, 25, 26, 7, 12, 4, 8, /* 4X */ - 13, 20, 5, 2, 6, 9, 15, 31, 22, 21, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 1, 18, 11, 10, 0, 17, 16, 19, 3, 25, 26, 7, 12, 4, 8, /* 6X */ - 13, 20, 5, 2, 6, 9, 15, 31, 22, 21, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM,ILL,SYM, 73,SYM,SYM,SYM,SYM,SYM,SYM, 49,SYM, 37,ILL, 74,ILL, /* 8X */ - ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 49,SYM, 37,ILL, 75, 53, /* 9X */ + SYM, 2, 18, 11, 10, 0, 17, 16, 19, 3, 25, 26, 7, 13, 4, 8, /* 4X */ + 12, 20, 6, 1, 5, 9, 15, 30, 21, 24, 28,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 2, 18, 11, 10, 0, 17, 16, 19, 3, 25, 26, 7, 13, 4, 8, /* 6X */ + 12, 20, 6, 1, 5, 9, 15, 30, 21, 24, 28,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM,ILL,SYM, 70,SYM,SYM,SYM,SYM,SYM,SYM, 44,SYM, 34,ILL, 49,ILL, /* 8X */ + ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 44,SYM, 34,ILL, 49, 71, /* 9X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 76,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 23, 40, 34, 47, 48, 55, 43, 29, 24, 14, 28, 39, 77, 41, 32, 35, /* CX */ - 54, 45, 51, 38, 30, 78, 44,SYM, 50, 36, 46, 33, 42, 52, 79, 56, /* DX */ - 23, 40, 34, 47, 48, 55, 43, 29, 24, 14, 28, 39, 80, 41, 32, 35, /* EX */ - 54, 45, 51, 38, 30, 81, 44,SYM, 50, 36, 46, 33, 42, 52, 82, 53, /* FX */ + SYM,SYM,SYM,SYM,SYM, 51,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 22, 39, 32, 46, 47, 58, 48, 29, 23, 14, 27, 38, 53, 42, 33, 35, /* CX */ + 57, 45, 54, 40, 31, 55, 43,SYM, 52, 37, 50, 36, 41, 72, 73, 56, /* DX */ + 22, 39, 32, 46, 47, 58, 48, 29, 23, 14, 27, 38, 53, 42, 33, 35, /* EX */ + 57, 45, 54, 40, 31, 55, 43,SYM, 52, 37, 50, 36, 41, 74, 75, 76, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ static const int Unicode_Char_size = 76; static const unsigned int Unicode_CharOrder[] = { - 65, 1, 66, 18, 67, 11, 68, 10, 69, 0, 70, 17, 71, 16, 72, 19, - 73, 3, 74, 25, 75, 26, 76, 7, 77, 12, 78, 4, 79, 8, 80, 13, - 81, 20, 82, 5, 83, 2, 84, 6, 85, 9, 86, 15, 87, 31, 88, 22, - 89, 21, 90, 27, 97, 1, 98, 18, 99, 11, 100, 10, 101, 0,102, 17, - 103, 16, 104, 19, 105, 3, 106, 25, 107, 26, 108, 7, 109, 12,110, 4, - 111, 8, 112, 13, 113, 20, 114, 5, 115, 2, 116, 6, 117, 9,118, 15, - 119, 31, 120, 22, 121, 21, 122, 27, 192, 23, 194, 34, 199, 29,200, 24, - 201, 14, 202, 28, 206, 32, 207, 35, 212, 30, 217, 36, 219, 33,224, 23, - 226, 34, 231, 29, 232, 24, 233, 14, 234, 28, 238, 32, 239, 35,244, 30, - 249, 36, 251, 33, 338, 37, 339, 37, + 65, 2, 66, 18, 67, 11, 68, 10, 69, 0, 70, 17, 71, 16, 72, 19, + 73, 3, 74, 25, 75, 26, 76, 7, 77, 13, 78, 4, 79, 8, 80, 12, + 81, 20, 82, 6, 83, 1, 84, 5, 85, 9, 86, 15, 87, 30, 88, 21, + 89, 24, 90, 28, 97, 2, 98, 18, 99, 11, 100, 10, 101, 0,102, 17, + 103, 16, 104, 19, 105, 3, 106, 25, 107, 26, 108, 7, 109, 13,110, 4, + 111, 8, 112, 12, 113, 20, 114, 6, 115, 1, 116, 5, 117, 9,118, 15, + 119, 30, 120, 21, 121, 24, 122, 28, 192, 22, 194, 32, 199, 29,200, 23, + 201, 14, 202, 27, 206, 33, 207, 35, 212, 31, 217, 37, 219, 36,224, 22, + 226, 32, 231, 29, 232, 23, 233, 14, 234, 27, 238, 33, 239, 35,244, 31, + 249, 37, 251, 36, 338, 34, 339, 34, }; /* Model Table: - * Total sequences: 1049 - * First 512 sequences: 0.997006678170155 - * Next 512 sequences (512-1024): 0.0029768569132891634 - * Rest: 1.646491655585584e-05 + * Total considered sequences: 1255 / 1444 + * - Positive sequences: first 465 (0.995025880940353) + * - Probable sequences: next 182 (647-465) (0.0039795148879469) + * - Neutral sequences: last 797 (0.000994604171700053) + * - Negative sequences: 189 (off-ratio) * Negative sequences: TODO */ static const PRUint8 FrenchLangModel[] = { - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,2,0,3,3,3,0,3,0,3,0,2,0,3,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,3,3,0,3,2,3,3,0,0,3,0,0, - 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,2,3,3,3,3,3,0,2,3,2,3,2,0,0,0,2,0,2,0,0,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,0,2,2,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,3,3,3,3,2,3,2,2,1,0,0,0,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,3,2,3,3,3,3,3,2,0,2,3,0,0,0, - 3,3,3,3,2,3,3,3,3,3,2,3,2,2,3,2,2,2,3,3,0,3,0,2,3,2,2,3,3,0,3,2,0,0,2,0,0,0, - 3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,2,0,2,2,0,2,2,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,0,3,3,3,2,1,0,0,3,2,3,0,2,3,0, - 3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,3,3,3,3,0,2,2,0,0,2,2,0,0, - 3,3,3,3,2,3,2,3,3,3,3,2,3,2,3,2,3,2,2,3,0,3,0,0,3,3,2,2,0,0,2,2,2,2,0,0,0,2, - 3,3,3,3,2,3,3,3,3,3,2,3,2,2,3,2,2,2,2,3,3,3,0,0,3,0,3,2,2,0,3,2,0,0,2,0,0,2, - 3,3,3,3,3,2,3,2,3,3,2,2,3,3,3,2,2,2,3,2,0,3,0,0,3,2,2,2,3,0,2,2,0,2,2,0,0,2, - 3,3,3,3,2,3,3,3,3,3,2,2,2,3,3,2,3,2,2,3,0,3,0,0,3,2,2,0,3,2,3,2,0,0,2,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,0,0,3,2,3,0,2,0,2,0,0,3,2,0,0, - 3,3,3,3,2,3,2,2,3,3,2,2,2,2,3,1,2,0,0,3,0,2,0,0,3,0,2,0,3,0,2,0,0,0,2,0,0,2, - 3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,2,3,2,3,3,2,3,0,0,3,2,2,1,2,0,0,2,0,0,2,0,0,0, - 3,3,3,3,2,3,3,3,3,3,2,2,3,2,3,2,2,3,2,0,0,2,0,0,3,0,2,0,3,0,0,0,0,2,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,2,2,0,3,2,2,2,3,2,2,3,2,0,2,3,2,2,2,0,0,2,1,2,3,0,0,2, - 3,3,2,3,3,3,3,3,3,3,2,2,3,2,3,2,2,2,2,2,2,3,0,0,3,1,2,2,2,0,3,2,0,0,2,0,0,2, - 2,3,2,2,0,2,0,0,2,3,1,2,2,2,0,2,0,0,2,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,2,2,2,2,0,2,2,0,2,2,2,2,0,2,2,2,0,0,2,0,0,0, - 3,3,2,3,0,1,3,2,2,2,1,3,0,3,3,3,0,2,2,2,2,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,3,1,3,3,3,3,0,0,3,3,3,2,0,3,3,0,2,0,3,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0, - 3,3,2,3,2,2,2,0,3,3,2,2,2,2,3,0,0,1,2,1,0,0,0,3,0,2,2,2,0,0,0,0,0,0,2,0,0,0, - 3,3,3,3,2,3,2,3,3,3,2,2,3,2,3,0,2,0,2,3,0,3,0,0,0,2,2,1,0,0,0,2,0,0,0,0,0,2, - 3,3,2,3,2,2,2,2,3,3,2,2,2,1,3,2,2,2,3,2,2,2,0,0,2,0,2,3,0,0,0,2,0,0,0,0,0,0, - 0,0,0,0,2,2,3,2,0,0,0,3,3,0,0,2,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,3,0,0,0,0,0,0,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,2,2,0,3,0,3,3,0,0,0,0,3,3,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,2,3,2,2,2,2,3,3,2,2,2,0,2,0,2,2,2,2,0,2,0,0,0,0,2,1,0,0,0,2,0,0,0,0,0,0, - 0,0,0,0,3,0,3,3,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,2,2,3,2,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,2,2,3,2,0,0,0,3,2,2,0,0,3,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,2,2,0,2,2,2,2,0,2,3,2,2,0,0,2,2,2,2,2,2,0,0,0,0,0,2,0,0,0,0,2,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,2,3,0,0,3,3,2,0,3,3,3,1,1,0,0,1,1,0, + 3,3,3,3,2,3,3,3,3,3,2,3,3,3,3,2,1,3,3,3,3,1,0,3,3,1,3,1,2,1,2,0,1,2,2,0,2,0, + 3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,1,3,3,3,0,3,3,2,1,1,3,0,3,1,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,2,2,3,1,3,1,2,1,1,0,1,0,0,0, + 3,3,3,3,3,3,3,2,3,3,3,3,2,2,3,3,3,3,2,3,3,1,1,3,3,3,3,2,3,3,1,1,1,1,1,0,0,0, + 3,3,3,3,2,3,3,3,3,3,2,3,2,3,3,2,2,2,3,3,1,1,1,3,3,1,2,2,2,0,2,3,2,0,1,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,3,3,2,3,3,2,2,2,3,3,1,0,0,2,1, + 3,3,3,3,2,3,1,3,3,3,3,3,3,3,3,3,3,3,3,2,3,1,3,3,3,2,2,1,2,0,1,2,2,0,1,0,1,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,0,2,3,3,3,1,2,0,3,0,0,2,1,3,3,3, + 3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,2,2,3,0,2,3,3,3,2,2,2,1,0,1,1,0,1,0,0, + 3,3,3,3,2,2,3,2,3,3,3,2,2,3,3,2,2,2,2,3,1,0,1,3,3,3,1,0,1,0,2,2,0,1,1,0,2,0, + 3,3,3,3,2,3,3,3,3,3,2,3,2,2,3,1,1,2,2,3,3,1,0,3,3,1,3,2,2,0,1,3,1,0,2,0,0,0, + 3,3,3,3,2,3,3,3,3,3,2,3,3,2,3,1,1,2,1,3,1,1,0,3,2,1,1,3,1,1,1,2,2,1,0,0,1,0, + 3,3,3,3,3,2,2,2,3,3,1,3,3,3,3,1,2,1,3,2,0,1,1,3,3,1,1,3,1,0,1,1,2,0,2,0,1,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,1,1,3,2,0,2,1,0,0,2,0,0,0,0,0, + 3,1,3,3,1,1,3,2,3,3,1,1,1,1,3,1,1,1,1,2,1,0,0,3,2,0,1,3,1,0,0,1,1,1,1,0,0,0, + 3,3,3,3,3,3,3,3,3,3,1,2,2,3,3,2,3,1,2,3,1,1,1,3,3,1,1,1,1,0,2,1,2,1,1,0,0,0, + 3,3,3,3,1,2,3,3,3,3,1,1,2,2,3,1,2,3,1,1,0,1,0,2,1,1,1,3,1,0,1,0,1,1,2,0,1,0, + 3,3,3,3,3,3,3,3,3,3,2,1,1,1,3,2,0,1,3,2,2,0,0,2,3,3,1,2,1,0,1,0,3,1,3,0,1,0, + 3,2,3,3,3,3,3,3,3,3,1,1,1,3,3,1,1,1,1,1,1,0,1,3,3,1,2,1,2,0,2,3,3,0,2,0,1,0, + 1,1,2,1,0,0,1,0,1,3,1,0,1,1,1,0,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, + 3,1,3,3,0,3,1,1,2,2,0,3,3,1,3,3,0,1,1,2,2,3,1,1,3,0,0,0,0,0,1,0,0,0,0,0,0,0, + 0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0, + 0,3,0,1,3,3,3,3,0,0,3,3,1,3,0,3,3,1,2,0,3,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0, + 3,3,3,2,3,3,3,3,3,2,3,3,3,3,3,3,3,1,2,2,1,1,1,1,1,0,1,0,1,0,1,1,1,0,0,0,0,0, + 3,1,3,3,1,1,1,1,3,3,1,1,1,1,2,0,0,1,0,1,0,0,2,2,1,1,1,0,0,0,1,0,0,1,0,0,1,0, + 3,2,3,3,2,2,2,3,3,3,1,1,1,3,2,1,2,1,2,3,0,1,0,0,2,1,2,1,0,0,1,1,1,0,1,0,0,0, + 0,1,0,0,2,3,0,2,0,0,1,3,1,3,0,2,0,0,0,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0, + 3,1,3,3,1,2,1,1,3,2,1,1,1,1,2,1,1,1,2,1,1,1,0,1,2,1,1,0,2,0,1,0,0,0,0,0,0,0, + 0,0,3,0,0,0,0,0,3,3,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, + 3,2,3,3,2,1,2,1,3,1,1,1,1,1,1,0,1,1,1,2,0,0,0,0,1,0,1,0,0,0,2,1,0,0,0,0,0,0, + 0,1,0,1,3,3,0,3,0,0,1,0,2,3,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,1,0,1,2,3,1,2,0,0,1,3,1,2,0,0,3,0,1,1,1,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0, + 0,0,0,0,3,3,1,3,1,0,1,2,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,2,0,1,1,2,1,1,0,3,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 2,3,1,0,2,2,1,2,1,0,2,2,0,2,1,1,1,1,1,0,2,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0, + 0,0,0,0,1,3,2,2,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,2,2,2,2,2,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, }; @@ -196,7 +197,7 @@ const SequenceModel Iso_8859_15FrenchModel = Iso_8859_15_CharToOrderMap, FrenchLangModel, 38, - (float)0.997006678170155, + (float)0.9990053958283, PR_TRUE, "ISO-8859-15", "fr" @@ -207,7 +208,7 @@ const SequenceModel Iso_8859_1FrenchModel = Iso_8859_1_CharToOrderMap, FrenchLangModel, 38, - (float)0.997006678170155, + (float)0.9990053958283, PR_TRUE, "ISO-8859-1", "fr" @@ -218,7 +219,7 @@ const SequenceModel Windows_1252FrenchModel = Windows_1252_CharToOrderMap, FrenchLangModel, 38, - (float)0.997006678170155, + (float)0.9990053958283, PR_TRUE, "WINDOWS-1252", "fr" @@ -231,5 +232,8 @@ const LanguageModel FrenchModel = 76, FrenchLangModel, 38, - (float)0.997006678170155, + 4, + (float)0.3741847216798905, + 19, + (float)0.03469583335040825, }; diff --git a/src/LangModels/LangGermanModel.cpp b/src/LangModels/LangGermanModel.cpp index 164da12..52c7c08 100644 --- a/src/LangModels/LangGermanModel.cpp +++ b/src/LangModels/LangGermanModel.cpp @@ -42,7 +42,7 @@ /** * Generated by BuildLangModel.py - * On: 2021-03-16 01:10:34.750155 + * On: 2022-12-14 18:05:20.300464 **/ /* Character Mapping Table: @@ -68,18 +68,18 @@ static const unsigned char Iso_8859_1_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 5, 15, 12, 10, 0, 18, 14, 7, 2, 22, 16, 9, 13, 3, 11, /* 4X */ - 17, 30, 1, 4, 6, 8, 21, 20, 28, 24, 19,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 5, 15, 12, 10, 0, 18, 14, 7, 2, 22, 16, 9, 13, 3, 11, /* 6X */ - 17, 30, 1, 4, 6, 8, 21, 20, 28, 24, 19,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 6, 15, 12, 7, 0, 16, 11, 8, 2, 25, 17, 10, 14, 1, 13, /* 4X */ + 20, 29, 3, 4, 5, 9, 21, 18, 28, 27, 19,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 6, 15, 12, 7, 0, 16, 11, 8, 2, 25, 17, 10, 14, 1, 13, /* 6X */ + 20, 29, 3, 4, 5, 9, 21, 18, 28, 27, 19,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 59,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 48, 31, 46, 42, 23, 41, 45, 36, 35, 29, 51, 40, 55, 32, 52, 47, /* CX */ - 50, 37, 53, 33, 49, 58, 26,SYM, 39, 60, 38, 61, 25, 44, 54, 27, /* DX */ - 48, 31, 46, 42, 23, 41, 45, 36, 35, 29, 51, 40, 55, 32, 52, 47, /* EX */ - 50, 37, 53, 33, 49, 58, 26,SYM, 39, 62, 38, 63, 25, 44, 54, 57, /* FX */ + SYM,SYM,SYM,SYM,SYM, 47,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 32, 31, 48, 39, 22, 43, 51, 35, 37, 30, 42, 55, 44, 33, 49, 57, /* CX */ + 58, 52, 46, 38, 45, 54, 24,SYM, 34, 50, 40, 53, 23, 56, 60, 26, /* DX */ + 32, 31, 48, 39, 22, 43, 51, 35, 37, 30, 42, 55, 44, 33, 49, 57, /* EX */ + 58, 52, 46, 38, 45, 54, 24,SYM, 34, 50, 40, 53, 23, 56, 61, 62, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -89,75 +89,75 @@ static const unsigned char Windows_1252_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 5, 15, 12, 10, 0, 18, 14, 7, 2, 22, 16, 9, 13, 3, 11, /* 4X */ - 17, 30, 1, 4, 6, 8, 21, 20, 28, 24, 19,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 5, 15, 12, 10, 0, 18, 14, 7, 2, 22, 16, 9, 13, 3, 11, /* 6X */ - 17, 30, 1, 4, 6, 8, 21, 20, 28, 24, 19,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM,ILL,SYM, 64,SYM,SYM,SYM,SYM,SYM,SYM, 34,SYM, 56,ILL, 43,ILL, /* 8X */ - ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 34,SYM, 56,ILL, 43, 57, /* 9X */ + SYM, 6, 15, 12, 7, 0, 16, 11, 8, 2, 25, 17, 10, 14, 1, 13, /* 4X */ + 20, 29, 3, 4, 5, 9, 21, 18, 28, 27, 19,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 6, 15, 12, 7, 0, 16, 11, 8, 2, 25, 17, 10, 14, 1, 13, /* 6X */ + 20, 29, 3, 4, 5, 9, 21, 18, 28, 27, 19,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM,ILL,SYM, 63,SYM,SYM,SYM,SYM,SYM,SYM, 36,SYM, 59,ILL, 41,ILL, /* 8X */ + ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 36,SYM, 59,ILL, 41, 64, /* 9X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 65,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 48, 31, 46, 42, 23, 41, 45, 36, 35, 29, 51, 40, 55, 32, 52, 47, /* CX */ - 50, 37, 53, 33, 49, 58, 26,SYM, 39, 66, 38, 67, 25, 44, 54, 27, /* DX */ - 48, 31, 46, 42, 23, 41, 45, 36, 35, 29, 51, 40, 55, 32, 52, 47, /* EX */ - 50, 37, 53, 33, 49, 58, 26,SYM, 39, 68, 38, 69, 25, 44, 54, 57, /* FX */ + SYM,SYM,SYM,SYM,SYM, 47,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 32, 31, 48, 39, 22, 43, 51, 35, 37, 30, 42, 55, 44, 33, 49, 57, /* CX */ + 58, 52, 46, 38, 45, 54, 24,SYM, 34, 50, 40, 53, 23, 56, 65, 26, /* DX */ + 32, 31, 48, 39, 22, 43, 51, 35, 37, 30, 42, 55, 44, 33, 49, 57, /* EX */ + 58, 52, 46, 38, 45, 54, 24,SYM, 34, 50, 40, 53, 23, 56, 66, 67, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const int Unicode_Char_size = 61; +static const int Unicode_Char_size = 59; static const unsigned int Unicode_CharOrder[] = { - 65, 5, 66, 15, 67, 12, 68, 10, 69, 0, 70, 18, 71, 14, 72, 7, - 73, 2, 74, 22, 75, 16, 76, 9, 77, 13, 78, 3, 79, 11, 80, 17, - 81, 30, 82, 1, 83, 4, 84, 6, 85, 8, 86, 21, 87, 20, 88, 28, - 89, 24, 90, 19, 97, 5, 98, 15, 99, 12, 100, 10, 101, 0,102, 18, - 103, 14, 104, 7, 105, 2, 106, 22, 107, 16, 108, 9, 109, 13,110, 3, - 111, 11, 112, 17, 113, 30, 114, 1, 115, 4, 116, 6, 117, 8,118, 21, - 119, 20, 120, 28, 121, 24, 122, 19, 196, 23, 201, 29, 214, 26,220, 25, - 223, 27, 228, 23, 233, 29, 246, 26, 252, 25, + 65, 6, 66, 15, 67, 12, 68, 7, 69, 0, 70, 16, 71, 11, 72, 8, + 73, 2, 74, 25, 75, 17, 76, 10, 77, 14, 78, 1, 79, 13, 80, 20, + 81, 29, 82, 3, 83, 4, 84, 5, 85, 9, 86, 21, 87, 18, 88, 28, + 89, 27, 90, 19, 97, 6, 98, 15, 99, 12, 100, 7, 101, 0,102, 16, + 103, 11, 104, 8, 105, 2, 106, 25, 107, 17, 108, 10, 109, 14,110, 1, + 111, 13, 112, 20, 113, 29, 114, 3, 115, 4, 116, 5, 117, 9,118, 21, + 119, 18, 120, 28, 121, 27, 122, 19, 196, 22, 214, 24, 220, 23,223, 26, + 228, 22, 246, 24, 252, 23, }; /* Model Table: - * Total sequences: 1337 - * First 512 sequences: 0.9936565191798025 - * Next 512 sequences (512-1024): 0.00616485529057582 - * Rest: 0.00017862552962171364 + * Total considered sequences: 1194 / 900 + * - Positive sequences: first 494 (0.9950332887709427) + * - Probable sequences: next 153 (647-494) (0.003970985671306937) + * - Neutral sequences: last 253 (0.0009957255577504043) + * - Negative sequences: -294 (off-ratio) * Negative sequences: TODO */ static const PRUint8 GermanLangModel[] = { - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,1,2,3,3,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,0,2,3,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,0,2,3,3,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,0,1,3,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,0,0,3,3,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,2,3,3,3,3,0,1,2,2, - 3,3,3,3,3,3,3,2,3,3,3,3,2,3,2,3,3,2,3,2,3,2,2,3,3,3,3,0,0,2,2, - 3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,3,3,3,3,3,3,2,1,2,1,1,3,3,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,1,0,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,2,3,2,2,3,3,2,2,0,0,2,1, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,0,0,3,3,1,2, - 3,3,3,2,2,3,3,3,3,3,2,3,3,2,2,2,3,2,2,3,2,2,2,2,3,1,1,0,0,2,2, - 3,3,3,2,3,3,3,2,3,3,3,3,3,3,2,3,3,3,3,2,2,2,2,3,3,3,3,0,1,2,0, - 3,3,3,3,3,3,3,3,3,3,2,3,2,2,3,3,3,2,2,3,3,2,2,2,3,3,2,0,1,2,1, - 3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,2,2,2,2,2,3,3,3,3,0,0,2,0, - 3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,2,3,3,2,3,3,2,2,3,3,3,3,0,2,2,0, - 3,3,3,2,3,3,3,3,3,3,3,3,2,2,2,2,2,3,3,2,2,2,2,3,2,2,2,1,0,2,1, - 3,3,3,3,3,3,3,2,3,3,2,3,2,3,3,3,2,2,3,2,2,2,2,3,2,3,2,0,0,2,1, - 3,2,3,2,3,3,3,2,3,3,2,3,2,3,2,3,2,2,2,3,3,2,2,2,3,2,3,0,0,2,2, - 3,3,3,3,3,3,2,2,3,3,2,3,2,2,2,2,2,2,2,0,2,0,3,3,2,3,2,0,0,1,0, - 3,2,3,2,2,3,2,2,2,2,2,3,2,1,2,1,2,2,2,1,2,2,0,2,2,0,3,0,0,2,0, - 3,2,3,2,2,3,2,1,3,2,2,3,2,2,1,1,2,0,1,2,0,1,2,3,2,3,2,0,0,2,0, - 2,3,3,3,3,1,3,3,3,3,3,3,3,3,3,2,2,2,3,2,0,2,0,1,1,0,0,2,0,0,2, - 3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,2,3,3,2,2,2,2,2,0,2,2,2,1,1,1,0, - 2,3,1,3,3,1,3,3,0,3,3,0,3,2,3,3,2,2,2,1,1,0,0,0,0,1,0,2,1,0,0, - 2,3,2,3,3,0,3,3,0,3,2,1,2,3,2,2,2,2,3,2,2,2,0,0,1,0,1,3,1,0,0, - 3,2,3,2,2,2,3,2,2,2,2,2,0,2,2,3,2,1,2,1,2,2,0,0,0,0,0,0,0,0,0, - 3,1,3,1,2,3,3,2,2,2,1,3,2,1,1,2,2,3,2,1,2,2,0,0,2,0,0,0,2,0,1, - 2,2,1,2,2,2,2,1,0,2,2,2,2,2,2,2,2,2,1,2,0,2,2,0,0,0,0,0,0,0,2, - 1,0,2,0,0,2,1,0,3,1,0,1,0,1,1,0,0,1,0,0,1,2,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,3,3,3,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,2,1,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,1,1,2,2,3,1,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,3,1,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,0,3,1,2, + 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,2,0,3,1,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,1,0,1,2,3,3,3,2, + 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,3,2,3,3,2,2,0,3,0,1, + 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,2,2,3,3,3,2,0,3,1,1, + 3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3,3,3,1,1,0,1,3,2,2,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,3,1,1, + 3,3,3,3,3,3,3,2,3,3,3,3,1,3,3,3,2,3,2,2,2,2,3,3,2,2,0,2,1,1, + 3,1,3,3,2,3,3,3,3,3,3,1,2,3,2,1,1,3,1,1,1,2,1,1,0,1,0,2,0,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,1,1,3,3,2,2,1, + 3,3,3,2,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3,3,3,2,3,3,3,1,1,2,1,1, + 3,3,3,3,3,3,3,2,3,3,3,3,2,3,2,3,2,3,3,3,1,2,3,3,2,3,0,2,0,1, + 3,3,3,3,3,3,3,2,3,3,3,3,1,3,2,3,3,3,2,2,2,2,3,3,3,2,0,1,1,1, + 3,3,3,3,3,3,3,2,2,3,3,3,1,3,3,2,3,2,3,2,2,2,3,3,3,1,0,2,0,0, + 3,2,3,2,2,2,3,1,2,3,2,1,1,3,1,1,1,1,1,1,1,1,3,3,3,3,0,1,0,1, + 3,2,3,2,3,3,3,2,2,3,3,3,1,3,2,3,2,2,3,2,2,2,3,3,3,1,0,2,0,0, + 3,1,3,3,3,3,3,3,3,3,3,1,1,3,2,1,3,1,1,2,3,2,3,1,1,2,0,2,1,1, + 3,1,3,2,2,1,3,2,1,2,1,2,1,3,1,1,1,1,1,1,2,2,1,1,3,1,0,1,0,0, + 1,3,3,3,3,3,1,3,3,3,3,3,3,2,3,2,3,1,0,1,1,1,0,0,0,0,3,0,0,1, + 1,3,0,3,3,3,0,3,3,0,3,3,3,0,2,3,2,1,0,0,2,0,0,1,0,0,2,0,0,0, + 1,3,0,3,3,3,1,3,3,0,3,3,3,1,3,1,3,2,2,1,2,2,0,0,0,0,3,1,0,0, + 3,1,2,1,1,1,3,1,1,3,1,1,1,3,1,1,0,1,1,1,1,1,3,3,2,1,0,1,0,0, + 3,3,3,2,3,3,1,1,2,2,3,2,1,1,2,3,1,1,1,1,1,1,0,0,1,0,0,0,0,0, + 3,3,1,2,3,2,3,2,1,2,3,1,2,2,3,2,1,2,1,2,3,1,0,1,0,2,0,1,0,0, + 3,1,3,1,1,3,2,1,1,2,1,1,1,1,1,1,1,2,1,1,3,1,0,0,1,0,0,1,1,0, + 0,0,1,1,0,1,1,0,0,3,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, }; @@ -165,8 +165,8 @@ const SequenceModel Iso_8859_1GermanModel = { Iso_8859_1_CharToOrderMap, GermanLangModel, - 31, - (float)0.9936565191798025, + 30, + (float)0.9990042744422496, PR_TRUE, "ISO-8859-1", "de" @@ -176,8 +176,8 @@ const SequenceModel Windows_1252GermanModel = { Windows_1252_CharToOrderMap, GermanLangModel, - 31, - (float)0.9936565191798025, + 30, + (float)0.9990042744422496, PR_TRUE, "WINDOWS-1252", "de" @@ -187,8 +187,11 @@ const LanguageModel GermanModel = { "de", Unicode_CharOrder, - 61, + 59, GermanLangModel, - 31, - (float)0.9936565191798025, + 30, + 3, + (float)0.3342907100494642, + 20, + (float)0.031235642270464454, }; diff --git a/src/LangModels/LangGreekModel.cpp b/src/LangModels/LangGreekModel.cpp index 4038450..7506c46 100644 --- a/src/LangModels/LangGreekModel.cpp +++ b/src/LangModels/LangGreekModel.cpp @@ -42,7 +42,7 @@ /** * Generated by BuildLangModel.py - * On: 2021-03-16 18:58:31.005768 + * On: 2022-12-14 18:05:43.615861 **/ /* Character Mapping Table: @@ -68,18 +68,18 @@ static const unsigned char Iso_8859_7_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 33, 51, 41, 40, 30, 53, 48, 42, 32, 56, 49, 39, 44, 34, 36, /* 4X */ - 47, 60, 35, 37, 38, 43, 55, 52, 58, 54, 57,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 33, 51, 41, 40, 30, 53, 48, 42, 32, 56, 49, 39, 44, 34, 36, /* 6X */ - 47, 60, 35, 37, 38, 43, 55, 52, 58, 54, 57,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 32, 49, 40, 42, 31, 50, 47, 43, 33, 55, 52, 39, 44, 34, 35, /* 4X */ + 48, 59, 36, 38, 37, 45, 53, 54, 56, 51, 57,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 32, 49, 40, 42, 31, 50, 47, 43, 33, 55, 52, 39, 44, 34, 35, /* 6X */ + 48, 59, 36, 38, 37, 45, 53, 54, 56, 51, 57,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM,SYM, 18,SYM, 19, 23, 15,SYM, 16,SYM, 25, 28, /* BX */ - 45, 0, 21, 17, 20, 5, 29, 11, 27, 3, 8, 12, 14, 4, 31, 1, /* CX */ - 13, 6,ILL, 9, 2, 10, 26, 24, 46, 22, 50, 59, 18, 19, 23, 15, /* DX */ - 61, 0, 21, 17, 20, 5, 29, 11, 27, 3, 8, 12, 14, 4, 31, 1, /* EX */ - 13, 6, 7, 9, 2, 10, 26, 24, 46, 22, 50, 59, 16, 25, 28,ILL, /* FX */ + SYM,SYM,SYM,SYM,SYM,SYM, 17,SYM, 19, 21, 15,SYM, 16,SYM, 24, 26, /* BX */ + 58, 0, 28, 18, 20, 5, 30, 9, 25, 3, 6, 14, 12, 4, 29, 1, /* CX */ + 11, 7,ILL, 8, 2, 13, 27, 23, 41, 22, 46, 60, 17, 19, 21, 15, /* DX */ + 62, 0, 28, 18, 20, 5, 30, 9, 25, 3, 6, 14, 12, 4, 29, 1, /* EX */ + 11, 7, 10, 8, 2, 13, 27, 23, 41, 22, 46, 60, 16, 24, 26,ILL, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -89,142 +89,132 @@ static const unsigned char Windows_1253_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 33, 51, 41, 40, 30, 53, 48, 42, 32, 56, 49, 39, 44, 34, 36, /* 4X */ - 47, 60, 35, 37, 38, 43, 55, 52, 58, 54, 57,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 33, 51, 41, 40, 30, 53, 48, 42, 32, 56, 49, 39, 44, 34, 36, /* 6X */ - 47, 60, 35, 37, 38, 43, 55, 52, 58, 54, 57,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 32, 49, 40, 42, 31, 50, 47, 43, 33, 55, 52, 39, 44, 34, 35, /* 4X */ + 48, 59, 36, 38, 37, 45, 53, 54, 56, 51, 57,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 32, 49, 40, 42, 31, 50, 47, 43, 33, 55, 52, 39, 44, 34, 35, /* 6X */ + 48, 59, 36, 38, 37, 45, 53, 54, 56, 51, 57,SYM,SYM,SYM,SYM,CTR, /* 7X */ SYM,ILL,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,ILL,SYM,ILL,ILL,ILL,ILL, /* 8X */ ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,ILL,SYM,ILL,ILL,ILL,ILL, /* 9X */ - SYM,SYM, 18,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 62,SYM,SYM, 19, 23, 15,SYM, 16,SYM, 25, 28, /* BX */ - 45, 0, 21, 17, 20, 5, 29, 11, 27, 3, 8, 12, 14, 4, 31, 1, /* CX */ - 13, 6,ILL, 9, 2, 10, 26, 24, 46, 22, 50, 59, 18, 19, 23, 15, /* DX */ - 61, 0, 21, 17, 20, 5, 29, 11, 27, 3, 8, 12, 14, 4, 31, 1, /* EX */ - 13, 6, 7, 9, 2, 10, 26, 24, 46, 22, 50, 59, 16, 25, 28,ILL, /* FX */ + SYM,SYM, 17,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,SYM,SYM,SYM,SYM, /* AX */ + SYM,SYM,SYM,SYM,SYM, 61,SYM,SYM, 19, 21, 15,SYM, 16,SYM, 24, 26, /* BX */ + 58, 0, 28, 18, 20, 5, 30, 9, 25, 3, 6, 14, 12, 4, 29, 1, /* CX */ + 11, 7,ILL, 8, 2, 13, 27, 23, 41, 22, 46, 60, 17, 19, 21, 15, /* DX */ + 62, 0, 28, 18, 20, 5, 30, 9, 25, 3, 6, 14, 12, 4, 29, 1, /* EX */ + 11, 7, 10, 8, 2, 13, 27, 23, 41, 22, 46, 60, 16, 24, 26,ILL, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const int Unicode_Char_size = 93; +static const int Unicode_Char_size = 84; static const unsigned int Unicode_CharOrder[] = { - 65, 33, 67, 41, 68, 40, 69, 30, 72, 42, 73, 32, 76, 39, 77, 44, - 78, 34, 79, 36, 82, 35, 83, 37, 84, 38, 85, 43, 97, 33, 99, 41, - 100, 40, 101, 30, 104, 42, 105, 32, 108, 39, 109, 44, 110, 34,111, 36, - 114, 35, 115, 37, 116, 38, 117, 43, 902, 18, 904, 19, 905, 23,906, 15, - 908, 16, 910, 25, 911, 28, 912, 45, 913, 0, 914, 21, 915, 17,916, 20, - 917, 5, 918, 29, 919, 11, 920, 27, 921, 3, 922, 8, 923, 12,924, 14, - 925, 4, 926, 31, 927, 1, 928, 13, 929, 6, 931, 7, 931, 9,932, 2, - 933, 10, 934, 26, 935, 24, 936, 46, 937, 22, 940, 18, 941, 19,942, 23, - 943, 15, 945, 0, 946, 21, 947, 17, 948, 20, 949, 5, 950, 29,951, 11, - 952, 27, 953, 3, 954, 8, 955, 12, 956, 14, 957, 4, 958, 31,959, 1, - 960, 13, 961, 6, 962, 7, 963, 9, 964, 2, 965, 10, 966, 26,967, 24, - 968, 46, 969, 22, 972, 16, 973, 25, 974, 28, + 65, 32, 67, 40, 69, 31, 73, 33, 76, 39, 78, 34, 79, 35, 82, 36, + 83, 38, 84, 37, 97, 32, 99, 40, 101, 31, 105, 33, 108, 39,110, 34, + 111, 35, 114, 36, 115, 38, 116, 37, 902, 17, 904, 19, 905, 21,906, 15, + 908, 16, 910, 24, 911, 26, 913, 0, 914, 28, 915, 18, 916, 20,917, 5, + 918, 30, 919, 9, 920, 25, 921, 3, 922, 6, 923, 14, 924, 12,925, 4, + 926, 29, 927, 1, 928, 11, 929, 7, 931, 8, 931, 10, 932, 2,933, 13, + 934, 27, 935, 23, 936, 41, 937, 22, 940, 17, 941, 19, 942, 21,943, 15, + 945, 0, 946, 28, 947, 18, 948, 20, 949, 5, 950, 30, 951, 9,952, 25, + 953, 3, 954, 6, 955, 14, 956, 12, 957, 4, 958, 29, 959, 1,960, 11, + 961, 7, 962, 10, 963, 8, 964, 2, 965, 13, 966, 27, 967, 23,968, 41, + 969, 22, 972, 16, 973, 24, 974, 26, }; /* Model Table: - * Total sequences: 1390 - * First 512 sequences: 0.9624941725288916 - * Next 512 sequences (512-1024): 0.035897222027766316 - * Rest: 0.0016086054433421051 + * Total considered sequences: 1629 / 1764 + * - Positive sequences: first 850 (0.9950192468921002) + * - Probable sequences: next 252 (1102-850) (0.003986534258695218) + * - Neutral sequences: last 662 (0.0009942188492045867) + * - Negative sequences: 135 (off-ratio) * Negative sequences: TODO */ static const PRUint8 GreekLangModel[] = { - 1,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,2,2,3,3,1, - 2,3,3,3,3,1,3,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,3,2, - 2,2,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,2,2,3,3,1, - 2,3,3,3,3,1,2,0,3,0,0,0,0,0,0,0,0,0,0,0,1,0,2,2, - 3,3,2,3,2,3,3,3,2,3,3,3,3,1,2,3,3,2,3,3,0,2,3, - 3,2,2,2,1,3,3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, - 3,3,3,2,3,3,3,3,3,3,1,2,3,3,3,1,3,3,3,3,3,3,3, - 2,3,0,2,3,3,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2, - 3,3,3,3,3,3,2,3,2,3,2,3,2,1,2,3,3,3,3,3,3,2,3, - 3,2,2,2,3,3,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,3,3,3,3,2,3,3,3,3,3,1,3,3,3,3,2,3,2,1,3,3,3, - 1,3,3,3,3,2,2,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2, - 3,3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3, - 3,3,3,2,3,3,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, - 0,0,0,1,0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0, - 3,3,3,3,2,3,3,2,3,2,3,3,3,2,2,3,3,2,3,3,3,2,3, - 3,2,3,2,2,3,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0, - 3,3,3,3,2,3,2,0,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3, - 3,3,3,3,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,2,3,2,3,2,3,3,3,3,0,2,3,3,3,3,2,3,3,2,3,3,1, - 2,3,0,2,3,2,2,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2, - 1,1,3,1,3,1,3,3,3,3,0,0,3,2,3,0,1,3,0,1,2,1,0, - 0,2,1,2,3,1,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2, - 3,3,3,3,3,3,2,2,2,2,3,3,3,2,3,3,3,3,3,3,2,3,3, - 3,2,3,3,2,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,2,3,3,2,2,1,3,3,3,2,1,3,3,1,3,3,0,1,3, - 3,1,2,1,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,1,3,2,3,1,2,1,2,2,3,2,3,3,3,3,1,3,3,0,3,3, - 3,1,2,3,0,2,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1, - 3,3,3,1,3,3,3,3,3,3,1,2,3,3,3,0,0,3,0,0,3,2,3, - 0,3,0,2,3,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2, - 2,2,3,2,3,3,3,3,3,3,2,2,3,3,3,0,0,3,0,0,3,2,1, - 0,2,0,2,2,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2, - 3,3,0,3,3,3,3,0,3,0,2,3,3,0,3,3,3,3,3,3,2,0,3, - 3,2,2,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,3,3,3,3,2,3,3,3,3,1,2,3,3,3,0,0,3,0,0,3,3,1, - 0,3,0,3,3,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2, - 3,3,3,3,3,2,3,3,3,3,2,1,3,3,3,0,0,3,0,0,3,2,2, - 0,3,0,3,2,0,2,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2, - 3,3,0,3,1,3,3,0,0,2,3,3,0,0,0,3,3,0,3,3,0,0,3, - 3,0,3,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,1,3,1,3,3,1,1,1,2,2,3,0,0,3,3,2,3,3,2,2,2, - 2,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,1,3,2,3,1,3,3,2,3,0,1,2,3,3,1,0,3,2,1,2,3,1, - 2,2,0,2,3,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,0,3,1,3,1,3,3,3,3,0,0,2,2,3,0,0,2,0,0,2,1,1, - 0,2,0,2,3,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2, - 3,3,3,3,3,3,3,1,0,2,2,3,2,1,2,3,3,1,3,3,0,1,3, - 3,0,2,1,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,3,3,2,3,3,3,3,3,3,0,2,3,3,3,0,0,3,0,0,2,2,1, - 0,2,0,2,3,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2, - 3,3,2,3,2,3,3,1,1,2,3,3,2,0,1,3,3,2,2,3,0,1,3, - 3,0,2,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,0,2,3,3,3,1,1,0,3,3,3,0,3,2,2,0,3,3,0,0,3, - 3,0,2,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,1,3,2,3,0,3,3,2,3,0,2,2,3,2,0,0,1,0,0,2,2,1, - 0,1,0,1,2,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,0,3,2,3,1,0,0,1,2,3,1,0,2,3,3,1,2,3,0,2,3, - 2,0,1,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,2,1,2,2,3,3,2,3,2,2,3,2,2,2,2,0,0, - 3,3,1,3,0,3,0,0,0,0,2,3,1,1,1,2,2,1,2,2,0,0,2, - 2,0,3,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,2,0,2,3,3,2,2,3,3,2,2,3,1,2,2,0,0, - 0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,2,0,2,1,3,3,1,2,3,3,2,2,2,2,2,0,0, - 1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,2,0,2,2,2,2,2,2,2,1,3,2,1,2,1,0,0, - 0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,3,0,3,3,2,2,2,2,2,2,2,2,2,2,2,0,0, - 0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,1,0,0,0,0,2,0,2,2,3,3,2,2,2,2,2,2,2,2,2,0,0, - 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,2,0,2,2,1,1,2,2,3,2,2,2,2,2,2,0,0, - 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,3,0,3,2,0,2,2,2,2,2,1,1,3,2,1,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,3,0,3,2,1,1,3,2,2,2,2,1,1,2,2,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,3,0,3,2,1,2,2,2,1,2,2,1,1,2,1,0,0, - 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,2,0,2,2,1,2,2,1,2,2,1,1,3,2,1,0,0, - 0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,3,0,2,2,2,2,2,2,2,2,0,0,0,2,2,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,2,0,2,2,2,2,1,2,2,2,2,2,1,0,2,0,0, - 1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1, - 0,0,0,0,0,0,0,3,0,2,3,1,1,2,2,1,1,0,1,0,2,2,0,0, - 1,3,1,0,2,0,1,1,1,2,0,0,1,0,1,0,0,0,0,0,2,0,0, - 0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,2,0,2,0,2,0,0,0,0,2,3,0,0,0,2,1,0,2,1,0,2,1, - 2,0,1,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 2,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,1,3,2,3, + 3,1,3,3,3,2,3,3,3,3,0,0,0,0,0,0,0,1,0,0,3, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3, + 3,2,3,3,3,2,3,3,3,3,0,0,0,1,0,0,0,0,0,0,3, + 3,3,3,3,2,3,2,3,3,3,3,1,3,3,3,3,3,3,2,3,1, + 3,3,2,3,2,3,2,2,0,3,0,0,0,0,1,0,0,0,0,0,0, + 3,3,3,2,3,3,3,3,3,3,3,3,3,1,3,2,3,3,3,3,3, + 3,3,3,1,3,3,3,3,3,3,0,1,0,1,0,0,0,1,0,0,3, + 3,3,3,3,3,3,3,2,3,3,2,2,3,3,2,3,3,3,3,3,3, + 3,3,2,3,3,3,2,1,1,2,0,0,0,0,1,0,0,1,0,0,0, + 3,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3,3,3,3,1,3, + 1,3,3,3,3,3,3,3,3,3,0,0,0,0,0,0,0,1,0,0,3, + 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,2,3,3, + 3,3,3,3,3,3,3,2,0,1,0,1,0,0,1,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,3,3,2,0,1,0,0,0,0,0,0,0,0,1, + 3,3,3,3,2,3,3,3,3,3,0,3,3,3,3,3,3,3,2,3,3, + 3,3,3,3,3,3,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,1,3,1,3,2,3,3,3,1,3,3,3,0,3,1,1,1,3,2,3, + 0,0,3,1,3,0,3,2,3,0,0,0,0,0,0,0,0,0,0,0,3, + 0,1,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, + 3,3,3,3,3,3,1,3,1,3,1,3,1,3,3,3,3,3,1,3,0, + 3,3,1,3,1,3,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,2,3,3,3,1,1,2,3,2,3,3,3,2,3,3,3,1,3,1, + 3,3,2,3,0,3,3,3,0,1,0,1,0,0,0,0,0,0,0,0,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3, + 3,1,3,0,3,2,3,3,3,3,0,0,0,0,0,0,0,0,0,0,3, + 3,3,3,3,3,3,3,1,2,3,2,3,3,3,3,3,3,3,3,3,3, + 3,3,2,3,3,3,3,3,1,1,0,0,0,0,0,0,0,0,0,0,2, + 3,3,3,1,3,3,3,3,3,3,3,3,3,0,3,0,0,0,3,0,3, + 0,3,3,0,3,0,3,3,3,3,0,0,0,0,0,0,0,1,0,0,3, + 2,3,3,2,3,3,3,3,3,3,3,3,3,1,3,0,0,0,3,0,3, + 0,2,3,0,3,0,3,3,2,3,0,0,0,0,0,0,0,0,0,0,3, + 2,3,3,3,3,2,3,3,3,3,3,3,3,1,3,0,0,0,3,0,3, + 0,1,3,0,3,0,3,3,3,3,0,0,0,0,0,0,0,0,1,0,2, + 3,3,1,3,3,3,3,3,0,3,0,0,3,3,3,3,3,3,3,3,3, + 3,3,3,3,0,3,0,1,1,0,0,1,0,0,1,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,2,3,3,3,1,3,0,0,0,3,0,3, + 0,3,3,0,3,0,3,3,3,3,0,0,0,1,0,0,0,0,0,0,3, + 3,3,1,3,2,3,2,3,1,3,0,0,1,3,0,3,3,3,1,3,0, + 3,3,0,3,0,3,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0, + 1,0,3,1,3,2,3,3,3,0,3,3,3,0,3,0,0,0,3,0,3, + 0,1,3,0,3,0,3,1,3,1,0,0,0,0,0,0,0,0,0,0,2, + 2,2,3,3,3,2,3,3,3,1,3,3,3,1,3,2,1,3,3,1,3, + 3,1,2,0,3,1,3,3,2,2,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,0,3,1,3,1,0,3,3,3,3,3,3,1,3,0, + 3,3,0,3,3,3,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,2,3,3,3,3,3,2,3,3,3,0,3,0,0,0,3,0,3, + 0,2,3,0,3,0,3,3,3,3,0,0,0,0,0,0,0,0,0,0,3, + 3,3,0,3,3,3,0,3,1,3,0,0,3,3,3,3,3,3,1,3,0, + 3,3,0,3,0,3,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0, + 2,2,3,3,3,1,2,3,3,3,3,3,3,0,3,0,0,0,2,0,3, + 0,2,2,0,3,0,2,2,2,3,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,2,3,2,3,2,3,1,0,1,3,3,3,3,3,3,3,1, + 3,3,0,3,3,3,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,1,3,1,3,1,3,1,3,1,1,0,3,3,3,3,3,2,3,3, + 3,3,0,2,0,3,1,2,0,1,1,0,0,0,0,1,0,0,0,0,0, + 3,3,1,3,0,3,1,0,0,3,0,2,1,3,1,3,3,3,1,3,0, + 3,3,0,3,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,1,3,1,3,1,1,1,3,0,1,2,2,1,3,3,3,1,3,0, + 3,3,0,2,0,3,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,1,1,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0, + 0,0,0,0,1,0,0,0,0,0,3,3,3,3,3,3,3,3,3,3,0, + 0,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,1,0,0, + 0,0,0,0,1,0,0,0,0,0,3,2,3,3,2,3,3,3,3,3,0, + 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, + 0,0,0,0,1,0,0,0,0,0,3,3,2,3,3,3,3,3,3,3,0, + 0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0, + 0,0,0,0,0,0,0,0,0,0,3,3,3,3,3,2,3,3,2,3,0, + 0,0,0,0,1,0,0,1,1,0,1,1,1,1,0,1,0,0,0,0,1, + 0,0,0,1,0,0,0,0,0,0,2,2,3,3,3,3,3,3,3,3,0, + 1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,3,3,3,3,3,3,3,3,3,3,0, + 1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0, + 0,0,0,1,0,0,0,0,0,0,3,3,3,1,3,3,3,3,2,2,0, + 0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,1,0,0,0,0,0,3,3,3,2,3,2,3,3,2,3,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,3,3,3,1,3,1,3,2,3,2,0, + 0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, + 0,0,0,0,0,0,0,0,0,0,3,3,3,2,3,3,3,2,3,2,0, + 3,3,0,3,0,3,0,0,0,3,0,1,0,3,0,2,2,2,0,1,0, + 3,2,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, }; @@ -232,8 +222,8 @@ const SequenceModel Iso_8859_7GreekModel = { Iso_8859_7_CharToOrderMap, GreekLangModel, - 47, - (float)0.9624941725288916, + 42, + (float)0.9990057811507954, PR_FALSE, "ISO-8859-7", "el" @@ -243,8 +233,8 @@ const SequenceModel Windows_1253GreekModel = { Windows_1253_CharToOrderMap, GreekLangModel, - 47, - (float)0.9624941725288916, + 42, + (float)0.9990057811507954, PR_FALSE, "WINDOWS-1253", "el" @@ -254,8 +244,11 @@ const LanguageModel GreekModel = { "el", Unicode_CharOrder, - 93, + 84, GreekLangModel, - 47, - (float)0.9624941725288916, + 42, + 5, + (float)0.3796638326464317, + 27, + (float)0.03455794045912186, }; diff --git a/src/LangModels/LangHebrewModel.cpp b/src/LangModels/LangHebrewModel.cpp index d040701..d609413 100644 --- a/src/LangModels/LangHebrewModel.cpp +++ b/src/LangModels/LangHebrewModel.cpp @@ -42,7 +42,7 @@ /** * Generated by BuildLangModel.py - * On: 2021-03-17 23:15:54.331334 + * On: 2022-12-14 18:05:54.189238 **/ /* Character Mapping Table: @@ -68,18 +68,18 @@ static const unsigned char Iso_8859_8_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 48, 44, 46, 64, 49, 59, 65, 60, 51, 69, 70, 58, 52, 43, 72, /* 4X */ - 56, 77, 55, 45, 57, 68, 67, 66, 75, 74, 73,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 29, 42, 37, 38, 28, 53, 41, 39, 27, 76, 61, 34, 40, 31, 32, /* 6X */ - 50, 79, 30, 36, 33, 35, 47, 63, 71, 62, 54,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 46, 44, 42, 53, 59, 65, 62, 60, 48, 69, 74, 58, 41, 63, 67, /* 4X */ + 57, 78, 55, 52, 54, 66, 61, 70, 71, 76, 77,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 27, 47, 37, 38, 26, 56, 43, 40, 29, 73, 51, 34, 39, 32, 31, /* 6X */ + 45, 75, 30, 35, 33, 36, 49, 64, 72, 50, 68,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ SYM,CTR,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 80,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,CTR, /* BX */ + SYM,SYM,SYM,SYM,SYM, 79,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,CTR, /* BX */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* CX */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,SYM, /* DX */ - 6, 4, 19, 12, 3, 1, 23, 18, 14, 0, 25, 21, 5, 16, 7, 20, /* EX */ - 9, 15, 17, 26, 11, 24, 22, 13, 2, 10, 8,CTR,CTR,SYM,SYM,CTR, /* FX */ + 8, 6, 20, 13, 2, 1, 23, 16, 19, 0, 24, 18, 4, 11, 7, 22, /* EX */ + 10, 17, 12, 25, 14, 28, 21, 15, 3, 9, 5,CTR,CTR,SYM,SYM,CTR, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -89,172 +89,173 @@ static const unsigned char Windows_1255_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 48, 44, 46, 64, 49, 59, 65, 60, 51, 69, 70, 58, 52, 43, 72, /* 4X */ - 56, 77, 55, 45, 57, 68, 67, 66, 75, 74, 73,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 29, 42, 37, 38, 28, 53, 41, 39, 27, 76, 61, 34, 40, 31, 32, /* 6X */ - 50, 81, 30, 36, 33, 35, 47, 63, 71, 62, 54,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM,CTR,SYM, 82,SYM,SYM,SYM,SYM, 83,SYM,CTR,SYM,CTR,CTR,CTR,CTR, /* 8X */ + SYM, 46, 44, 42, 53, 59, 65, 62, 60, 48, 69, 74, 58, 41, 63, 67, /* 4X */ + 57, 78, 55, 52, 54, 66, 61, 70, 71, 76, 77,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 27, 47, 37, 38, 26, 56, 43, 40, 29, 73, 51, 34, 39, 32, 31, /* 6X */ + 45, 75, 30, 35, 33, 36, 49, 64, 72, 50, 68,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM,CTR,SYM, 80,SYM,SYM,SYM,SYM, 81,SYM,CTR,SYM,CTR,CTR,CTR,CTR, /* 8X */ CTR,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,CTR,SYM,CTR,CTR,CTR,CTR, /* 9X */ - SYM,SYM,SYM,SYM, 84,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 85,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + SYM,SYM,SYM,SYM, 82,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ + SYM,SYM,SYM,SYM,SYM, 83,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* CX */ - SYM,SYM,SYM,SYM, 78, 86, 87,SYM,SYM,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* DX */ - 6, 4, 19, 12, 3, 1, 23, 18, 14, 0, 25, 21, 5, 16, 7, 20, /* EX */ - 9, 15, 17, 26, 11, 24, 22, 13, 2, 10, 8,CTR,CTR,SYM,SYM,CTR, /* FX */ + SYM,SYM,SYM,SYM, 84, 85, 86,SYM,SYM,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* DX */ + 8, 6, 20, 13, 2, 1, 23, 16, 19, 0, 24, 18, 4, 11, 7, 22, /* EX */ + 10, 17, 12, 25, 14, 28, 21, 15, 3, 9, 5,CTR,CTR,SYM,SYM,CTR, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ static const int Unicode_Char_size = 64; static const unsigned int Unicode_CharOrder[] = { - 65, 48, 66, 44, 67, 46, 69, 49, 70, 59, 72, 60, 73, 51, 76, 58, - 77, 52, 78, 43, 80, 56, 82, 55, 83, 45, 84, 57, 97, 29, 98, 42, - 99, 37, 100, 38, 101, 28, 102, 53, 103, 41, 104, 39, 105, 27, 107, 61, - 108, 34, 109, 40, 110, 31, 111, 32, 112, 50, 114, 30, 115, 36, 116, 33, - 117, 35, 118, 47, 119, 63, 121, 62, 122, 54, 1488, 6, 1489, 4,1490, 19, - 1491, 12, 1492, 3, 1493, 1, 1494, 23, 1495, 18, 1496, 14, 1497, 0,1498, 25, - 1499, 21, 1500, 5, 1501, 16, 1502, 7, 1503, 20, 1504, 9, 1505, 15,1506, 17, - 1507, 26, 1508, 11, 1509, 24, 1510, 22, 1511, 13, 1512, 2, 1513, 10,1514, 8, + 65, 46, 66, 44, 67, 42, 68, 53, 69, 59, 71, 62, 72, 60, 73, 48, + 76, 58, 77, 41, 78, 63, 80, 57, 82, 55, 83, 52, 84, 54, 86, 61, + 97, 27, 98, 47, 99, 37, 100, 38, 101, 26, 102, 56, 103, 43, 104, 40, + 105, 29, 107, 51, 108, 34, 109, 39, 110, 32, 111, 31, 112, 45, 114, 30, + 115, 35, 116, 33, 117, 36, 118, 49, 121, 50, 1488, 8, 1489, 6,1490, 20, + 1491, 13, 1492, 2, 1493, 1, 1494, 23, 1495, 16, 1496, 19, 1497, 0,1498, 24, + 1499, 18, 1500, 4, 1501, 11, 1502, 7, 1503, 22, 1504, 10, 1505, 17,1506, 12, + 1507, 25, 1508, 14, 1509, 28, 1510, 21, 1511, 15, 1512, 3, 1513, 9,1514, 5, }; /* Model Table: - * Total sequences: 1195 - * First 512 sequences: 0.9890483702848128 - * Next 512 sequences (512-1024): 0.010550187339775191 - * Rest: 0.0004014423754119586 + * Total considered sequences: 1605 / 4096 + * - Positive sequences: first 698 (0.9950195693248958) + * - Probable sequences: next 340 (1038-698) (0.003982282638035017) + * - Neutral sequences: last 3058 (0.000998148037069213) + * - Negative sequences: 2491 (off-ratio) * Negative sequences: TODO */ static const PRUint8 HebrewLangModel[] = { - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,2,2,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,3,3,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,0,0,1,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,0,0,3,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,2,2,0,2,3,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,0,3,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,0,0,0,0,0, + 3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,2,1,3,2,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,0,1,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,2,3,3,3,3,3,3,3,2,3,2,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,0,0,2,0,0,0,0,0, + 1,0,1,1,1,1,2,1,1,0,0,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0, + 3,3,3,3,3,3,3,3,2,3,3,3,0,3,3,3,0,3,3,3,3,3,3,3,1,2,0,0,3,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,3,2,3,2,2,0,2,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,3,2,3,3,2,3,3,1,2,0,0,2,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,0,3,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,2,0,2,3,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,2,0,3,0,0,2,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,0,0,0,0,0,2,2,0,2,0,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,2,3,3,3,3,3,3,3,1,2,0,0,3,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,3,0,0,2,3,2,3,3,2,2,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,2,3,1,3,3,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,0,0,3,3,2,3,3,2,0,2,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,0,0,1,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,3,3,3,2,2,3,2,2,3,1,0,2,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,2,3,1,0,2,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,0,0,0,0,0,2,2,0,1,0,1,1,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,2,3,1,3,3,0,3,3,0,2,0,0,1,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,3,2,2,3,2,3,2,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,1,0,3,3,3,2,1,0,2,0,0,2,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,1,2,3,3,3,2,1,2,0,2,0,2,0,0,0,0,0, + 2,0,1,1,1,1,0,1,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,2,3,2,2,2,3,1,2,3,3,2,2,3,3,2,2,0,1,2,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,0,3,3,3,3,2,3,2,2,3,1,3,3,3,2,2,2,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,0,3,3,2, + 3,3,3,3,2,3,3,3,2,3,0,2,1,2,0,3,0,3,2,2,1,0,0,0,2,0,0,0,0,0,0,1, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,1,0,3,3,0, + 3,3,3,3,2,3,3,3,3,0,0,3,0,2,0,3,0,3,3,2,1,1,1,0,2,0,0,0,0,0,0,0, + 0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,3,2,3, - 2,3,2,2,3,3,2,1,2,2,2,0,0,0,1,2,0,0,2,0,0,1,2,0,1,0,0,0,0,1,0,0, - 0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,3,3,3, - 2,2,3,2,2,2,2,1,2,2,2,0,0,0,0,2,0,0,2,0,0,2,2,0,0,0,0,0,0,0,2,2, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,1,3,3, - 0,3,3,2,2,2,2,2,2,2,2,0,0,2,0,2,0,0,2,0,0,2,2,0,0,0,0,0,0,2,2,1, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,3,2,2, - 2,2,2,2,2,2,2,1,2,2,2,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,2,2,1, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,2,2,2, - 2,2,2,2,2,2,2,1,0,3,2,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,2,3,3, - 2,3,2,2,2,2,2,2,3,2,2,0,0,0,0,2,0,0,2,0,0,2,2,0,0,0,0,0,0,2,0,2, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,2,2,0, - 2,2,2,2,2,0,0,3,0,0,0,0,0,0,0,1,0,0,1,0,0,0,2,0,0,0,0,0,0,0,2,2, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,2,0,1, - 2,2,2,2,1,2,2,1,2,0,0,0,0,0,0,2,0,0,1,0,1,0,1,0,0,0,0,0,0,0,2,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2, - 0,2,3,0,2,2,2,1,2,2,2,0,0,0,0,2,0,0,2,0,0,0,1,0,0,0,0,0,0,2,0,0, - 0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,0, - 2,3,1,2,2,2,2,2,2,0,0,0,0,0,0,1,0,0,2,0,0,1,2,0,0,0,0,0,0,1,2,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,0, - 2,2,2,2,1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2,2,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,2,2,0, - 2,0,0,2,1,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,2,2,2,0, - 2,2,1,1,0,0,0,0,2,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,1, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,0,2, - 2,0,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,1,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2, - 2,2,2,2,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,0, - 2,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,2,1,0, - 3,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,2,0,0,1,0,0,0,0,0,2,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,0, - 2,0,1,2,0,0,0,0,0,0,0,0,2,0,1,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,0,0, - 2,2,1,2,0,2,0,0,0,0,1,0,0,2,0,0,2,2,2,0,0,2,1,0,0,2,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,0, - 2,0,0,2,0,0,0,2,0,0,0,0,0,0,0,0,2,0,0,1,0,0,1,2,0,0,0,1,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,1, - 2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2, - 0,0,2,1,1,2,2,2,2,0,2,2,0,2,0,1,0,0,1,1,0,0,0,2,0,2,2,1,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2, - 0,0,2,2,0,0,2,1,0,2,0,0,0,2,0,2,0,2,0,0,1,0,0,2,0,0,0,2,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,0, - 2,2,2,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,2, - 1,0,0,2,2,0,0,0,0,0,0,0,1,0,1,1,2,1,1,2,2,0,0,0,0,0,1,2,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,0,0, - 2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,1, - 2,2,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,0,1, - 2,2,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,0,0, - 2,0,0,1,0,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,2,2,0, - 2,0,2,1,0,0,0,1,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,0, - 2,0,0,0,0,0,0,2,0,0,0,0,0,2,0,0,1,1,0,2,1,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,0,0, - 2,0,0,2,0,0,0,0,0,0,0,0,0,1,2,0,0,0,0,2,0,0,0,0,0,0,2,0,1,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,0, - 2,0,1,0,0,0,0,0,0,0,0,0,1,0,2,0,1,2,0,1,0,0,0,0,0,2,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,2,1,0, - 2,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0,0, - 2,0,0,0,2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,2, - 0,0,0,0,0,2,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,0,0, - 0,0,0,0,2,1,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2, + 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,0,3,3,3, + 3,3,3,3,3,3,3,3,1,0,0,3,0,2,0,3,0,3,1,2,0,0,1,0,2,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,0,3,2,3, + 2,3,2,3,3,2,3,2,1,1,0,3,0,2,0,3,0,2,3,3,0,0,0,0,2,0,1,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0,2,3,3, + 3,3,3,3,3,3,3,3,2,0,1,3,0,3,0,2,0,3,2,3,1,1,1,0,3,0,1,0,0,0,0,1, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,0,3,2,3, + 3,3,2,3,3,3,3,1,1,0,0,3,0,1,0,1,0,0,2,2,1,1,0,0,2,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,0,3,3,3, + 1,2,2,2,3,1,0,1,3,0,0,0,1,2,0,1,0,1,3,1,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,0,3,1,3, + 1,3,3,2,3,1,2,2,1,1,0,2,0,2,0,2,0,2,2,1,0,0,0,0,1,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,0,3,2,3, + 1,3,1,3,2,3,2,2,3,0,0,1,0,2,1,1,0,1,2,2,0,0,0,0,1,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,0,3,3,1, + 3,3,3,3,0,3,2,3,1,0,0,1,0,2,0,2,0,1,0,1,0,0,1,0,1,0,0,1,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,0,3,2,3, + 0,3,2,1,3,2,0,0,3,0,0,0,1,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,1,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,0,3,2,3, + 1,1,1,2,2,1,1,2,2,0,1,2,0,0,0,1,1,1,2,0,1,0,0,0,0,1,0,0,1,0,0,0, + 0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,0,3,0,2, + 2,0,1,1,2,1,1,2,0,0,0,0,0,2,0,2,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,0,3,1,2, + 2,2,2,1,2,1,0,0,0,0,1,0,0,1,0,0,0,1,2,1,0,1,0,0,0,0,0,1,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,0,2,0,3, + 0,0,0,0,2,0,0,0,0,1,0,0,1,0,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,1,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,0,2,2,3, + 0,0,2,0,2,0,0,0,2,0,2,0,1,0,2,0,2,0,0,0,1,2,1,1,0,1,1,1,1,0,0,1, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,2,0,3,2,2, + 2,1,2,2,2,0,1,1,2,0,0,1,0,0,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,0,2,3,3, + 1,0,2,0,3,0,1,0,1,1,2,0,2,0,1,0,1,0,1,0,1,1,0,0,0,1,0,0,0,0,0,2, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,0,2,3,2, + 0,2,2,2,2,0,0,1,2,1,0,0,0,2,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,3,0, + 2,1,2,2,2,2,2,2,1,2,2,1,1,2,1,1,1,1,0,1,2,1,2,2,0,1,2,0,1,1,0,2, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,2,0,2,3,2, + 0,0,3,1,3,0,0,1,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,0,0,1,2, + 2,1,1,2,2,1,0,1,0,2,1,0,0,1,2,0,3,0,0,0,2,1,1,1,0,2,1,1,0,1,1,1, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,0,3,3,2, + 0,0,2,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0,1,1,1, + 2,2,2,2,1,2,0,1,0,0,0,1,0,1,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,2,0,2,1,1, + 1,1,1,3,2,0,0,0,1,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0,2,0,2, + 0,3,1,0,2,2,0,1,2,0,2,0,1,2,2,0,1,0,1,1,1,1,2,1,2,1,0,2,1,0,0,1, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0,2,2,3, + 0,0,0,0,2,0,0,0,0,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,1,1,1,0,2,0,1, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0,2,2,2, + 0,0,0,0,2,0,0,0,3,1,0,0,1,0,2,0,2,0,1,0,1,0,1,1,0,1,0,1,1,0,1,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,2,0,2,1,3, + 0,0,0,0,2,0,0,0,0,1,1,0,1,0,2,0,1,0,0,0,1,1,1,0,0,1,0,2,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,2,0,2,2,2, + 0,1,1,1,1,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0,2,2,2, + 0,0,2,0,2,0,0,0,2,1,1,0,0,0,2,0,1,0,1,0,1,1,0,0,0,1,1,1,1,1,0,1, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,0,3,0,2, + 0,0,0,0,2,0,0,0,0,1,2,0,1,0,2,0,3,0,1,0,1,1,1,1,0,1,2,1,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,1,2,0, + 2,1,2,1,1,1,2,1,0,1,2,1,0,0,1,0,1,2,0,0,1,1,1,2,0,1,1,1,0,0,1,1, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,2,0,2,0,2, + 0,0,0,0,2,0,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0,3,0,2, + 0,0,2,0,1,0,0,0,0,0,1,0,1,0,1,0,2,0,0,0,0,2,1,0,0,1,1,1,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,0,2,2,2, + 1,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,1,0,0,1,1,1,0,2, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0,2,1,2, + 0,0,0,0,1,0,0,0,0,0,1,0,2,0,1,0,1,0,0,0,2,2,1,0,0,1,0,1,0,0,1,0, }; @@ -263,7 +264,7 @@ const SequenceModel Iso_8859_8HebrewModel = Iso_8859_8_CharToOrderMap, HebrewLangModel, 64, - (float)0.9890483702848128, + (float)0.9990018519629308, PR_FALSE, "ISO-8859-8", "he" @@ -274,7 +275,7 @@ const SequenceModel Windows_1255HebrewModel = Windows_1255_CharToOrderMap, HebrewLangModel, 64, - (float)0.9890483702848128, + (float)0.9990018519629308, PR_FALSE, "WINDOWS-1255", "he" @@ -287,5 +288,8 @@ const LanguageModel HebrewModel = 64, HebrewLangModel, 64, - (float)0.9890483702848128, + 5, + (float)0.4409862284999801, + 22, + (float)0.031930999645072025, }; diff --git a/src/LangModels/LangHindiModel.cpp b/src/LangModels/LangHindiModel.cpp index f34a273..d48a34d 100644 --- a/src/LangModels/LangHindiModel.cpp +++ b/src/LangModels/LangHindiModel.cpp @@ -42,7 +42,7 @@ /** * Generated by BuildLangModel.py - * On: 2021-03-19 22:31:01.819135 + * On: 2022-12-14 18:09:07.193296 **/ /* Character Mapping Table: @@ -62,158 +62,205 @@ * ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1 * even though they are both used for French. Same for the euro sign. */ -static const int Unicode_Char_size = 64; +static const int Unicode_Char_size = 86; static const unsigned int Unicode_CharOrder[] = { - 2305, 41, 2306, 25, 2307, 57, 2309, 48, 2310, 22, 2311, 37, 2312, 20,2313, 31, - 2314, 52, 2319, 28, 2320, 53, 2321, 58, 2323, 56, 2324, 40, 2325, 18,2326, 4, - 2327, 10, 2328, 34, 2330, 47, 2331, 50, 2332, 14, 2334, 55, 2335, 26,2336, 44, - 2337, 45, 2338, 54, 2339, 49, 2340, 6, 2341, 35, 2342, 24, 2343, 23,2344, 8, - 2346, 5, 2347, 42, 2348, 46, 2349, 16, 2350, 9, 2351, 32, 2352, 13,2354, 29, - 2357, 0, 2358, 2, 2359, 33, 2360, 12, 2361, 19, 2364, 51, 2366, 3,2367, 1, - 2368, 17, 2369, 27, 2370, 43, 2371, 39, 2375, 15, 2376, 11, 2377, 30,2379, 21, - 2380, 38, 2381, 7, 2404, 36, 2406, 62, 2407, 59, 2408, 61, 2411, 60,2413, 63, + 2305, 46, 2306, 9, 2307, 69, 2309, 26, 2310, 38, 2311, 36, 2312, 44,2313, 39, + 2314, 70, 2315, 71, 2317, 72, 2318, 82, 2319, 33, 2320, 61, 2321, 63,2323, 48, + 2324, 37, 2325, 2, 2326, 41, 2327, 20, 2328, 52, 2329, 79, 2330, 34,2331, 50, + 2332, 19, 2333, 60, 2334, 57, 2335, 28, 2336, 51, 2337, 35, 2338, 56,2339, 40, + 2340, 8, 2341, 27, 2342, 18, 2343, 30, 2344, 6, 2346, 14, 2347, 43,2348, 22, + 2349, 29, 2350, 11, 2351, 13, 2352, 1, 2353, 85, 2354, 15, 2355, 74,2357, 16, + 2358, 25, 2359, 32, 2360, 7, 2361, 12, 2364, 42, 2365, 80, 2366, 0,2367, 5, + 2368, 10, 2369, 23, 2370, 31, 2371, 45, 2372, 77, 2373, 73, 2374, 78,2375, 4, + 2376, 21, 2377, 49, 2378, 84, 2379, 17, 2380, 47, 2381, 3, 2385, 83,2387, 81, + 2400, 76, 2404, 24, 2405, 75, 2406, 54, 2407, 53, 2408, 55, 2409, 68,2410, 66, + 2411, 59, 2412, 64, 2413, 67, 2414, 65, 2415, 58, 2416, 62, }; /* Model Table: - * Total considered sequences: 2113 / 4096 - * - Positive sequences: first 1356 (0.9950083796268726) - * - Probable sequences: next 397 (1753-1356) (0.00399414702204226) - * - Neutral sequences: last 2343 (0.000997473351085132) - * - Negative sequences: 1983 (off-ratio) + * Total considered sequences: 2165 / 7396 + * - Positive sequences: first 1351 (0.9950085486567504) + * - Probable sequences: next 387 (1738-1351) (0.003992816717832359) + * - Neutral sequences: last 5658 (0.0009986346254172718) + * - Negative sequences: 5231 (off-ratio) * Negative sequences: TODO */ static const PRUint8 HindiLangModel[] = { - 3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,0,3,3,3,3,3,0,3,3,0, - 3,0,2,1,0,1,0,3,0,0,1,2,0,1,1,3,0,2,1,0,1,0,0,0,0,1,0,0,0,0,0,0, - 3,0,3,0,3,3,3,0,3,3,3,0,3,3,3,0,3,0,3,3,0,0,1,3,3,3,3,0,3,3,0,1, - 3,3,2,3,3,1,0,0,0,0,3,0,3,3,3,3,2,3,3,2,2,1,1,0,2,2,0,0,0,0,1,0, - 3,3,2,3,0,2,3,3,3,3,2,3,3,3,3,3,3,3,3,3,0,3,0,0,2,3,2,3,0,3,2,0, - 3,0,0,0,1,0,3,2,0,0,0,3,0,0,3,1,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0, - 3,0,3,0,3,3,3,0,3,3,3,0,3,3,3,1,3,0,3,3,3,0,1,3,3,3,3,0,3,3,0,3, - 3,3,3,3,3,3,0,0,1,3,3,0,3,3,3,3,0,3,2,2,3,2,3,2,3,1,0,0,0,0,0,0, - 2,3,0,3,0,3,3,3,3,3,2,2,2,3,3,3,3,3,3,0,0,3,0,0,3,3,2,3,0,3,0,0, - 0,0,0,0,3,0,2,0,0,0,0,3,0,3,2,2,0,3,0,3,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,0,1,3,3,3,3,2,3,3,0, - 3,3,1,3,1,0,3,3,0,0,0,3,3,3,3,3,0,3,2,0,0,0,3,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,0,3,0,1,3,3,3,3,1,3,0,0, - 3,0,0,3,2,0,3,3,0,0,0,3,0,1,3,3,0,1,0,1,0,0,0,1,0,3,0,0,0,0,0,0, - 3,0,3,0,3,3,3,0,3,3,3,0,3,3,3,1,3,0,3,3,0,0,0,3,3,0,3,1,0,3,0,0, - 3,3,3,3,0,0,0,0,0,0,3,0,3,3,3,3,0,3,3,0,0,0,2,3,0,0,0,0,0,0,0,0, - 3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,3,3,3,3,3,0, - 3,3,2,1,3,1,3,3,0,0,3,3,0,3,3,3,0,1,0,0,3,1,0,0,1,3,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,1,3,3,3,3,3,2,3,3,1, - 3,0,0,3,1,1,3,3,0,2,1,3,3,3,2,3,0,3,3,0,2,0,3,0,0,1,0,0,0,0,1,0, - 3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,2,3,3,3,2,0, - 3,0,0,0,2,0,3,3,0,1,3,3,3,3,3,0,0,3,0,3,0,0,3,0,0,2,0,0,0,0,0,0, - 3,0,3,0,0,3,3,0,3,3,3,1,3,3,3,0,2,0,3,2,0,0,0,3,3,3,3,0,0,3,0,0, - 3,3,0,3,3,0,0,0,0,0,2,0,3,3,3,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,2,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,0,0,3,3,3,3,2,3,3,1, - 2,0,2,0,0,1,3,3,0,2,3,3,0,3,3,3,0,0,0,0,1,0,1,0,0,2,0,0,0,0,1,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,1,3,3,3,2,3,3,3,3,1, - 3,3,2,3,3,0,3,3,1,0,3,3,3,2,3,3,2,3,2,0,0,0,0,0,0,2,0,0,0,0,0,0, - 3,3,3,3,2,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,1,3,0,3,3,3,3,3,0,3,3,0, - 3,0,1,0,2,0,3,0,0,0,3,3,0,3,3,1,0,0,0,3,0,0,0,0,0,1,0,0,0,0,0,0, - 3,0,3,0,3,3,3,0,3,3,3,0,3,3,3,2,0,0,3,3,3,0,0,2,3,3,3,0,0,3,0,1, - 3,3,2,2,3,2,0,0,0,1,3,0,0,3,3,3,2,3,0,0,1,0,2,0,0,0,0,0,0,0,0,0, - 3,3,0,3,0,1,2,3,0,1,3,3,1,3,2,3,1,3,3,0,0,3,0,1,3,3,3,3,0,3,0,0, - 3,0,0,0,2,1,3,0,0,0,0,3,1,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,3,0,3,3,3,0,3,3,3,0,3,3,3,0,2,0,3,2,2,0,3,3,3,3,3,0,3,3,0,0, - 3,3,3,2,3,2,0,0,1,0,3,0,3,3,3,3,0,3,3,0,0,0,3,0,3,1,2,0,0,1,1,0, - 3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,3,3,3,0,3,3,0, - 2,2,1,3,3,0,3,3,0,0,2,3,3,3,3,2,0,3,2,3,0,0,1,0,0,0,0,0,0,1,0,0, - 3,3,2,3,1,2,3,3,3,3,3,3,3,3,3,3,1,3,3,2,1,3,0,0,3,3,3,3,0,3,3,0, - 3,0,0,3,2,0,2,2,0,1,2,3,1,3,3,3,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,0,3,0,0,1,0,0,3,2,2,0,3,3,1,0,0,0,3,0,0,0,2,0,2,3,3,0,2,3,0,0, - 3,0,0,0,3,0,0,0,0,0,1,0,0,0,2,1,0,0,0,0,0,0,0,0,2,0,0,0,0,0,1,0, - 3,0,3,0,3,3,3,0,3,3,3,0,3,3,3,0,2,0,3,3,3,0,2,3,3,3,3,0,3,3,0,2, - 3,3,2,3,3,3,0,0,0,1,3,0,2,3,3,3,1,3,0,0,2,0,3,0,1,0,0,0,0,0,0,0, - 3,0,3,0,3,3,3,0,3,3,3,0,3,3,3,1,2,0,3,3,3,0,0,3,3,3,2,0,3,3,0,1, - 3,1,1,0,3,3,0,0,0,3,1,0,3,1,3,3,1,3,1,0,0,0,0,0,1,0,0,0,0,0,0,0, - 3,3,2,3,0,2,3,3,3,2,3,0,3,3,1,3,0,3,3,0,1,3,0,1,0,1,0,3,0,1,0,0, - 1,0,0,0,1,0,1,0,0,0,0,3,0,2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,0,3,3,3,3,3,3,3,3,3,1,3,2,3,3,3,1,3,0,0,3,3,0,3,0,3,0,0, - 3,0,0,0,1,0,3,3,0,0,2,3,0,3,3,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,3,0,3,3,3,0,3,1,3,0,3,3,3,0,3,0,3,3,0,0,2,3,3,2,3,1,1,3,0,1, - 3,0,3,3,3,0,0,0,0,0,2,0,3,3,3,3,0,1,2,0,0,0,2,0,0,0,0,1,0,0,2,0, - 3,3,1,3,1,3,3,3,3,3,2,3,3,3,2,3,2,3,3,2,1,3,0,0,1,2,0,3,1,3,3,0, - 2,0,0,0,1,0,2,0,0,0,2,3,0,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,3,0,3,3,3,1,3,3,3,0,3,3,3,1,3,0,3,3,3,0,3,3,3,3,3,1,3,3,0,1, - 3,3,2,2,0,3,0,0,0,3,3,0,2,3,3,3,2,3,3,1,0,1,2,1,3,2,0,0,0,0,0,0, - 3,0,3,0,0,2,2,0,3,3,3,0,3,1,3,0,0,0,3,2,1,0,1,2,0,3,3,0,1,3,0,0, - 3,0,0,2,3,0,0,0,0,3,3,0,0,3,1,3,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0, - 3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,1,2,3,3,3,3,0,3,3,1, - 3,0,3,1,2,1,3,0,0,0,1,3,0,3,3,3,2,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0, - 2,0,2,0,0,2,0,0,3,3,3,0,3,3,3,0,0,0,3,0,0,0,0,0,0,3,3,0,0,3,0,2, - 3,0,0,0,0,2,0,0,0,0,2,0,0,3,3,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,1,3,3,0,3,3,3,0,3,3,2,0,3,0,3,1,0,0,0,2,3,3,2,0,0,3,0,0, - 0,3,0,2,0,0,0,0,0,2,2,0,3,3,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,1,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,0,3,0,3,3,3,3,3,0,3,2,0, - 3,0,1,3,2,1,3,0,0,0,2,3,1,2,3,3,1,3,0,1,0,0,0,0,0,3,0,0,0,0,0,0, - 2,3,1,3,0,2,3,3,2,3,2,2,3,3,3,3,2,3,3,1,0,3,0,3,3,0,2,2,0,0,0,0, - 3,2,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,3,0,3,0,0,0,2,3,2,0,0,0,3,0,2,0,3,2,1,0,3,0,0,1,3,3,3,0,2,0,0, - 0,0,0,0,0,0,1,1,0,0,0,3,0,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,2,3,0,2,2,3,3,3,1,0,2,3,0,3,0,3,3,2,2,3,0,1,0,2,0,3,0,3,2,0, - 2,0,0,0,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, - 3,0,2,0,1,3,2,0,2,3,2,0,3,2,3,0,2,0,3,3,0,0,2,0,3,0,0,0,2,2,0,2, - 3,0,1,0,1,3,0,0,0,0,2,0,0,1,3,2,2,0,1,0,0,0,0,0,1,0,0,2,0,2,0,0, - 3,0,2,0,0,3,3,0,3,3,2,0,3,3,3,0,0,0,3,0,0,0,0,1,0,3,3,0,2,3,0,0, - 3,1,0,2,1,0,0,0,0,1,2,0,0,3,3,3,0,3,1,1,0,0,0,0,1,0,1,0,0,0,0,0, - 2,0,2,0,3,3,3,0,3,3,3,0,3,3,3,0,1,0,3,3,0,0,0,3,3,3,3,0,0,3,0,0, - 0,1,0,3,1,0,0,0,0,1,1,0,0,3,2,1,0,2,1,1,0,0,0,0,0,0,0,0,0,0,0,0, - 1,0,3,0,1,3,3,0,2,0,2,0,1,0,3,0,2,0,3,3,0,0,0,0,3,3,0,0,0,0,0,0, - 0,3,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,2,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,3,0,0,0,0,0,0,3,3,1,0,0,0,1,0,0,0,0,1,3,0,1,0,0,0,0,0, - 0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,1,1,3,2,2,0,1,0,3,0,3,0,0,0,0,0,3,2,0,0,0,3,3,0,3,0,0,0,0,1, - 1,0,0,0,1,0,0,0,0,3,1,0,1,2,1,3,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0, - 2,3,1,3,0,0,2,3,2,0,3,3,3,3,0,3,0,3,1,1,0,3,1,0,0,2,2,3,0,3,3,1, - 0,0,0,0,1,0,2,0,0,0,0,3,0,3,2,0,0,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,2,0,3,3,3,0,3,3,3,0,3,3,3,0,3,0,3,3,0,0,2,2,3,3,3,0,3,3,0,1, - 2,3,1,2,0,0,0,0,0,3,3,0,3,3,3,3,1,3,2,0,0,0,3,0,2,0,0,0,0,0,1,0, - 3,3,2,3,0,1,3,2,3,3,1,0,1,1,0,3,3,3,3,3,0,3,0,0,0,3,0,1,0,2,0,0, - 1,0,0,0,0,0,1,0,0,0,1,3,0,0,3,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,0,3,0,3,2,3,3,3,2,2,2,3,0,3,1,3,3,1,0,3,0,0,0,2,2,3,0,3,3,0, - 2,0,0,0,0,0,2,0,0,0,0,3,0,1,3,2,0,0,0,3,0,0,0,0,0,1,0,0,0,0,0,0, - 2,3,2,3,2,0,3,3,3,2,3,3,3,3,3,3,0,3,3,3,3,3,0,2,3,3,3,3,0,3,3,0, - 2,0,2,2,1,1,3,3,0,3,0,3,1,3,2,3,0,0,1,0,0,0,3,0,0,0,0,0,0,0,0,0, - 3,3,2,3,0,3,3,3,3,3,0,3,3,3,1,3,2,3,3,2,0,3,0,0,0,3,3,3,1,3,1,0, - 3,0,0,0,0,1,3,0,0,1,0,3,0,2,2,0,0,2,0,0,0,0,3,0,3,0,0,0,0,0,0,0, - 3,0,3,1,3,3,3,0,3,3,3,0,3,3,3,0,3,0,3,3,0,0,0,3,3,3,3,0,0,3,0,0, - 3,3,2,3,0,0,0,0,0,1,3,0,1,3,3,3,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0, - 3,3,1,3,0,3,3,3,3,2,2,0,3,3,1,3,1,3,3,0,0,3,0,2,2,0,0,3,0,0,0,0, - 3,0,0,1,1,0,1,0,0,0,0,0,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,0,3,0,2,3,1,1,1,0,0,0,1,2,3,0,3,1,3,0,3,0,0,2,1,2,3,0,3,0,0, - 0,0,0,0,0,0,1,0,0,0,0,3,3,3,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0, - 3,3,1,3,1,2,3,3,3,3,3,3,3,3,3,3,1,3,3,2,2,3,0,1,2,1,0,3,0,3,2,0, - 2,0,0,0,2,0,3,0,0,1,3,3,0,1,3,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,2,3,1,0,1,1,0,0,2,2,0,0,0,0,0,0,0,0,0,0,2,3,1,0,0,1,0,0, - 0,1,0,0,0,0,0,0,0,3,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,0,0,0,0,0,3,0,2,0,0,0,3,1,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,2,0,0, - 0,0,0,0,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,2,0,3,0,0,2,0,2,0,0,0,0,0,0,3,0,3,2,2,0,0,0,0,0,3,0,0,0,3,0,0, - 0,0,0,0,0,0,0,0,0,2,0,2,0,0,1,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0, - 0,1,0,3,0,0,1,2,0,0,0,0,0,0,0,1,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,2,0,0,1,1,0,1,2,1,0,2,3,1,0,0,0,2,0,2,0,1,1,1,3,2,0,0,3,0,0, - 0,0,0,1,0,1,0,0,0,2,0,0,0,2,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,2,0,2,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,0,0,0, - 0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,2,0,0,2,0,0,0,3,3,0,0,0,0,3,0,0,0,0,0,0,0,1,0,0,1,0,0, - 1,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,3,3,3,3,3, - 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,2,3,3, - 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,3,3,2, - 1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,3,3,3, - 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,3,3,3, + 1,3,3,1,0,0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,3,0,3,0,3,3,1,3,3,3,3,0,3,3,3,3,3,0,0,3,3,3,2, + 3,3,0,3,0,3,0,1,3,3,0,0,0,2,0,0,0,3,2,2,0,0,0,0,0,0,0,3,0,0,1,2,0,0,0,0,0,1,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,1,3,2,1,0,2,2,3,3,0, + 3,2,3,0,3,1,3,3,2,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,3,2,3,0,3,3,3,1,3,2,1,2,3,0,0,0,0,3,1,3, + 2,3,3,1,3,0,3,2,3,0,0,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,1,0,0,0,0,0,0,1,0, + 0,3,3,0,1,0,3,3,3,0,0,3,3,3,3,3,3,0,3,3,3,0,3,0,0,3,0,3,3,3,3,0,3,0,3,3,0,0,0,0,3,3,0, + 3,0,0,0,0,0,0,3,3,3,0,0,0,2,3,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,3,3,0,2,0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,3,0,3,0,3,3,2,3,3,0,2,0,3,2,3,3,3,1,1,2,3,3,0, + 3,3,0,1,0,1,0,1,3,2,0,0,0,0,0,0,0,1,0,3,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0, + 0,3,3,1,0,0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,3,0,3,0,3,3,3,3,3,3,3,0,3,3,3,3,1,0,2,2,3,3,0, + 3,0,0,0,0,3,0,3,2,3,0,0,0,1,0,0,0,1,0,3,0,0,0,0,0,0,3,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,3,2,3,3,2,3,3,3,1,0,2,0,0,0,0, + 2,3,3,0,3,0,3,0,0,0,1,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,3,3,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,0,0,3,3,0,3,0,2,3,3,1,0,2,1,0,2,1, + 3,2,3,2,3,1,3,0,1,1,0,1,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,0,3,3,3,0,3,1,2,3,0,0,0,0,0,0,2,0, + 1,0,3,0,3,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0, + 0,3,3,0,0,0,3,3,3,2,0,2,3,3,3,2,3,0,3,3,3,0,3,0,3,3,0,3,3,3,3,0,2,2,3,3,1,0,2,0,1,3,0, + 2,0,0,0,0,1,0,1,3,3,0,1,0,2,0,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, + 0,3,3,0,0,1,3,3,3,3,0,3,3,3,3,3,3,0,3,3,3,0,3,0,3,3,1,2,3,2,3,0,3,3,3,3,1,0,3,1,3,3,0, + 3,2,0,2,0,2,0,3,3,2,0,0,0,3,0,0,0,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,2,3,2,3,3,1,1,3,3,1,0,3,1,3,1,1, + 2,3,3,1,3,0,3,3,3,1,0,1,0,1,0,0,0,3,0,2,0,0,0,0,0,0,1,2,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,2,3,2,3,1,2,0,3,3,2,0,3,0,0,3,2,0,0,1,0,3,2,0, + 2,0,3,2,3,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,0,3,3,3,3,3,1,0,3,2,1,0,0,0,3,0,1, + 2,0,0,2,3,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,1,3,0,3,3,3,0,3,3,2,3,3,1,0,0,0,3,1,0, + 1,1,3,0,3,0,3,1,3,0,0,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,1,0,3,3,0,3,1,0,3,3,0,0,1,0,1,3,0, + 2,0,0,0,3,0,3,1,0,3,0,0,0,1,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,0,3,0,1,3,3,3,2,2,0,3,3,0,0,1,0,2,0,0, + 2,0,3,0,1,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,3,3,0,0,0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,3,0,3,0,3,3,1,3,3,2,3,0,3,3,3,3,3,0,2,0,3,3,1, + 3,3,0,1,0,1,0,0,1,1,0,0,0,2,0,0,0,1,0,3,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,0,2,0,2,1,3,1,0,2,2,0,0,0,0,2,1,1, + 2,1,3,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,1,3,0,0,3,3,3,3,0,0,0,2,0,0,0,0,0,0,3, + 2,1,0,0,3,0,3,0,1,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,1,0,0,2,3,2,3,0,3,0,3,0,0,0,0,3,0,3, + 2,3,3,0,3,0,3,0,3,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0, + 0,3,3,0,0,0,3,3,3,3,0,3,2,3,3,3,3,0,3,3,3,1,3,0,3,3,1,3,3,2,3,0,2,1,2,3,1,0,0,0,3,1,0, + 2,0,0,1,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,3,3,3,2,3,1,1,1,0,2,1,2,3,0,0,3,3,1,0,0,1,0,2,0, + 0,3,3,2,3,0,3,1,0,2,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0, + 0,3,3,0,0,0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,3,0,3,0,0,3,2,2,3,3,3,0,3,3,3,3,2,0,3,1,3,3,0, + 3,3,0,3,0,3,0,3,2,2,0,0,0,2,0,0,0,3,1,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,2,3,0,0,0,2,3,2,0,0,3,3,3,3,1,3,0,2,3,2,0,3,0,2,2,3,0,1,2,1,0,0,2,1,0,3,1,3,2,0,1,0, + 2,1,0,0,0,1,0,1,0,1,2,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,1,3,3,3,2,2,0,0,1,2,0,3,0,0,2,0,0,0,0,0,0,0,0, + 1,0,2,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,3,3,0,0,0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,3,0,3,0,0,3,0,3,3,3,3,0,3,0,3,3,0,0,0,0,0,3,0, + 3,0,0,1,0,0,2,2,2,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,2,2,3,3,3,1,1,1,3,3,3,0,1,2,1,0,3,1,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, + 0,0,0,0,1,1,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,2,3,3,2,1,2,3,3,3,0,2,1,3,3,3,0,1,1,0,0,0,0,3,0,0,1,2,0,0,0,0,1,1,0, + 2,0,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,1,2,3,3,3,1,0,3,1,3,3,3,3,2,3,3,0,3,0,0,0,0,2,1,0,3,0,0,0,2,0,0,0,0,1,0,0, + 0,0,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,2,3,2,3,3,0,1,1,1,3,3,0,0,3,1,1,3,1,2,0,0,0,0,1,3,0,0,2,2,0,0,1,0,0,0,0, + 0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,3,3,0,0,0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,3,0,3,0,1,3,1,2,3,3,2,1,3,2,3,3,1,0,2,0,1,2,0, + 2,1,0,3,0,2,0,2,2,1,0,2,0,3,0,0,0,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,2,1,3,1,3,3,1,3,2,0,1,3,3,3,2,1,1,3,0,1,0,0,2,3,3,0,2,0,0,2,0,0,0,0,3,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,3,3,0,0,0,3,3,3,3,0,3,1,3,3,3,3,0,1,3,3,1,2,0,3,3,1,2,3,0,0,0,0,1,3,3,1,0,1,0,2,0,0, + 3,1,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,1,0,0,3,2,3,0,1,0,0,3,1,0,3,0,1,1,2,0,0,0,0,1,0,0, + 0,0,0,0,3,1,2,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,2,2,3,3,2,3,3,3,3,3,1,0,2,3,3,3,1,2,0,0,1,1,0,2,0,0,3,1,1,0,0,0,0,0,3, + 2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,3,3,0,0,0,3,3,3,3,0,3,0,3,2,3,3,0,3,3,2,0,3,0,0,3,1,2,3,0,1,0,1,1,3,3,0,0,1,0,3,0,1, + 2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,3,0,0,0,0,0,3,0,1,0,0,0,0,3,0,0,0,3,0,0,0,0,0,0,0,0,0,1,0,0,0,2,1,1,0,0,0,0,0,0,0,0, + 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,3,3,0,0,0,3,3,3,3,0,3,2,3,3,3,3,0,3,3,3,0,3,0,3,3,0,0,2,2,3,0,1,3,3,2,3,0,0,1,0,3,0, + 2,3,0,3,0,1,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,3,3,0,0,0,3,3,3,3,0,3,1,0,3,3,1,0,3,3,3,0,2,0,0,0,0,2,2,3,2,0,2,0,3,3,0,0,0,0,0,1,0, + 1,0,0,1,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,2,3,3,1,3,3,1,3,3,1,1,1,0,1,3,2,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,1,3,3,3,2,0,2,3,3,1,3,2,3,2,1,3,3,1,0,0,0,2,2,0,3,0,0,0,3,0,0,0,0,3,0,3, + 1,1,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,2,3,1,3,3,2,1,2,3,2,3,3,1,3,3,3,3,1,0,0,0,0,0,1,3,1,0,0,1,0,0,0,0,0,1,1, + 0,2,0,1,2,0,3,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,0,3,3,3,3,3,3,2,3,0,1,0,1,3,1,3,2,2,3,3,1,3,1,1,0,0,2,0,0,3,0,1,0,2,0,0,0,0,0,0,3, + 0,0,0,0,2,0,3,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,3,2,0,0,0,2,3,1,3,0,2,0,2,2,2,1,0,2,2,1,0,2,0,3,3,3,0,3,0,0,0,0,3,0,2,0,0,0,0,0,1,0, + 1,0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,2,0,0,0,0,3,3,3,0,0,3,0,3,0,1,0,3,3,2,0,0,0,0,3,1,3,0,2,0,0,3,0,0,0,0,0,0,0,1,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,3,0,0,0,1,3,2,0,0,0,2,1,1,1,3,0,3,2,3,0,1,0,2,0,0,0,3,1,3,0,0,0,3,2,0,0,1,0,0,2,0, + 0,1,0,0,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,3,3,0,0,0,3,3,3,3,0,2,3,1,2,3,2,0,3,3,3,0,2,0,0,3,0,3,3,0,3,0,0,0,1,3,0,0,0,0,1,2,0, + 1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,3,3,0,0,0,2,2,2,3,0,3,1,0,2,3,2,0,1,1,0,0,2,0,1,2,0,0,2,0,0,0,0,1,0,2,1,0,0,1,1,1,0, + 2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,3,3,0,0,0,3,3,0,3,0,3,0,3,3,3,3,0,0,3,2,0,3,0,0,2,0,0,3,0,0,0,0,1,1,3,1,0,0,1,2,0,0, + 3,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,0,1,3,3,0,1,3,2,3,2,3,0,1,3,2,3,1,1,0,0,0,3,0,0,0,0,2,0,0,3,0,0,0,3,0,0,0,0,0,0,0, + 0,0,0,1,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,1,3,2,3,3,3,1,3,3,3,2,2,1,1,1,3,3,0,1,1,0,3,0,1,1,0,0,0,2,0,1,0,0,0,0,0,0,0,0,1,0,0, + 1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,2,2,3,3,3,1,0,3,3,3,1,0,0,2,1,3,0,0,0,0,1,3,0,1,0,0,3,0,0,3,0,0,0,1,0,0,0,0,1,0,0, + 0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,3,3,3,0,0,3,3,0,0,0,0,3,3,3,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,3,3,3,0,0,2,3,0,0,0,0,3,2,3,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,2,3,3,0,0,2,3,0,0,0,0,3,3,3,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,1,2,0,2,2,1,0,3,2,2,0,0,0,0,2,0,2,1,0,0,0,0,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,3, + 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,0,2,1,0,0,0,1,0,2,0,0,0,0,0,1,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,3,3,2,0,0,3,3,0,0,0,0,3,3,3,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,3,3,3,0,0,2,3,0,0,0,0,3,3,3,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,2,1,0,3,2,3,0,2,3,3,1,0,0,0,2,0,1,2,0,0,0,0,3,0,0,0,0,1,0,1,0,0,0,0,2,0,0,0,0,0,0,0, + 0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,1,2,0,0,0,3,3,3,3,0,0,0,0,1,2,1,0,0,1,0,0,2,0,0,2,0,0,0,0,0,0,1,0,1,1,2,0,0,0,1,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,2,2,0,0,0,0,1,0,0,0,3,0,2,2,1,0,0,0,0,1,0,3,0,0,0,1,0,0,0,0,0,0,3,0,2,0,0,2,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,3,3,0,0,0,3,3,0,0,0,0,0,1,2,2,1,0,0,0,2,0,1,0,0,0,0,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0, + 3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,2,3,3,0,0,2,3,0,0,0,0,2,3,3,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,3,3,2,0,0,3,3,0,0,0,0,3,3,3,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,2,3,2,0,0,3,3,0,0,0,0,2,3,2,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,2,3,3,0,0,2,3,0,0,0,0,2,2,2,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,2,3,2,0,0,3,2,0,0,0,0,3,2,2,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,1,0,0,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, + 0,2,0,0,0,0,1,0,3,3,0,0,0,0,3,0,0,0,2,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0,1,0, + 0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,1,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,3,0,1,0,0,0,0,0,2,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,3,3,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0, + 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, }; @@ -221,8 +268,11 @@ const LanguageModel HindiModel = { "hi", Unicode_CharOrder, - 64, + 86, HindiLangModel, - 64, - (float)0.7999999999999992, + 86, + 0, + (float)0, + 41, + (float)0.030491597602687492, }; diff --git a/src/LangModels/LangHungarianModel.cpp b/src/LangModels/LangHungarianModel.cpp index 2bee180..8a863e9 100644 --- a/src/LangModels/LangHungarianModel.cpp +++ b/src/LangModels/LangHungarianModel.cpp @@ -42,7 +42,7 @@ /** * Generated by BuildLangModel.py - * On: 2021-03-16 19:23:30.842519 + * On: 2022-12-14 18:30:39.653036 **/ /* Character Mapping Table: @@ -68,18 +68,18 @@ static const unsigned char Iso_8859_2_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 1, 15, 23, 16, 0, 25, 14, 20, 7, 22, 8, 4, 13, 5, 9, /* 4X */ + SYM, 1, 15, 23, 16, 0, 25, 12, 20, 7, 22, 8, 4, 13, 5, 9, /* 4X */ 21, 34, 6, 3, 2, 19, 18, 32, 33, 17, 10,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 1, 15, 23, 16, 0, 25, 14, 20, 7, 22, 8, 4, 13, 5, 9, /* 6X */ + SYM, 1, 15, 23, 16, 0, 25, 12, 20, 7, 22, 8, 4, 13, 5, 9, /* 6X */ 21, 34, 6, 3, 2, 19, 18, 32, 33, 17, 10,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM, 55,SYM, 40,SYM, 56, 51,SYM,SYM, 38, 54, 57, 58,SYM, 43, 59, /* AX */ - SYM, 55,SYM, 40,SYM, 60, 51,SYM,SYM, 38, 54, 61, 62,SYM, 43, 63, /* BX */ - 64, 11, 45, 41, 37, 65, 35, 50, 39, 12, 66, 46, 49, 28, 42, 67, /* CX */ - 36, 52, 68, 26, 44, 27, 24,SYM, 48, 69, 30, 31, 29, 47, 70, 53, /* DX */ - 71, 11, 45, 41, 37, 72, 35, 50, 39, 12, 73, 46, 49, 28, 42, 74, /* EX */ - 36, 52, 75, 26, 44, 27, 24,SYM, 48, 76, 30, 31, 29, 47, 77,SYM, /* FX */ + SYM, 58,SYM, 39,SYM, 54, 49,SYM,SYM, 36, 47, 61, 62,SYM, 45, 59, /* AX */ + SYM, 58,SYM, 39,SYM, 54, 49,SYM,SYM, 36, 47, 63, 64,SYM, 45, 59, /* BX */ + 65, 11, 50, 40, 35, 66, 38, 41, 37, 14, 53, 51, 57, 28, 52, 67, /* CX */ + 46, 44, 60, 24, 55, 27, 26,SYM, 48, 68, 30, 31, 29, 43, 56, 42, /* DX */ + 69, 11, 50, 40, 35, 70, 38, 41, 37, 14, 53, 51, 57, 28, 52, 71, /* EX */ + 46, 44, 60, 24, 55, 27, 26,SYM, 48, 72, 30, 31, 29, 43, 56,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -89,76 +89,77 @@ static const unsigned char Windows_1250_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 1, 15, 23, 16, 0, 25, 14, 20, 7, 22, 8, 4, 13, 5, 9, /* 4X */ + SYM, 1, 15, 23, 16, 0, 25, 12, 20, 7, 22, 8, 4, 13, 5, 9, /* 4X */ 21, 34, 6, 3, 2, 19, 18, 32, 33, 17, 10,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 1, 15, 23, 16, 0, 25, 14, 20, 7, 22, 8, 4, 13, 5, 9, /* 6X */ + SYM, 1, 15, 23, 16, 0, 25, 12, 20, 7, 22, 8, 4, 13, 5, 9, /* 6X */ 21, 34, 6, 3, 2, 19, 18, 32, 33, 17, 10,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM, 38,SYM, 51, 78, 43, 79, /* 8X */ - ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 38,SYM, 51, 80, 43, 81, /* 9X */ - SYM,SYM,SYM, 40,SYM, 55,SYM,SYM,SYM,SYM, 54,SYM,SYM,SYM,SYM, 82, /* AX */ - SYM,SYM,SYM, 40,SYM,SYM,SYM,SYM,SYM, 55, 54,SYM, 83,SYM, 84, 85, /* BX */ - 86, 11, 45, 41, 37, 87, 35, 50, 39, 12, 88, 46, 49, 28, 42, 89, /* CX */ - 36, 52, 90, 26, 44, 27, 24,SYM, 48, 91, 30, 31, 29, 47, 92, 53, /* DX */ - 93, 11, 45, 41, 37, 94, 35, 50, 39, 12, 95, 46, 49, 28, 42, 96, /* EX */ - 36, 52, 97, 26, 44, 27, 24,SYM, 48, 98, 30, 31, 29, 47, 99,SYM, /* FX */ + SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM, 36,SYM, 49, 73, 45, 74, /* 8X */ + ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 36,SYM, 49, 75, 45, 76, /* 9X */ + SYM,SYM,SYM, 39,SYM, 58,SYM,SYM,SYM,SYM, 47,SYM,SYM,SYM,SYM, 59, /* AX */ + SYM,SYM,SYM, 39,SYM,SYM,SYM,SYM,SYM, 58, 47,SYM, 54,SYM, 54, 59, /* BX */ + 77, 11, 50, 40, 35, 78, 38, 41, 37, 14, 53, 51, 57, 28, 52, 79, /* CX */ + 46, 44, 60, 24, 55, 27, 26,SYM, 48, 80, 30, 31, 29, 43, 56, 42, /* DX */ + 81, 11, 50, 40, 35, 82, 38, 41, 37, 14, 53, 51, 57, 28, 52, 83, /* EX */ + 46, 44, 60, 24, 55, 27, 26,SYM, 48, 84, 30, 31, 29, 43, 56,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ static const int Unicode_Char_size = 64; static const unsigned int Unicode_CharOrder[] = { - 65, 1, 66, 15, 67, 23, 68, 16, 69, 0, 70, 25, 71, 14, 72, 20, + 65, 1, 66, 15, 67, 23, 68, 16, 69, 0, 70, 25, 71, 12, 72, 20, 73, 7, 74, 22, 75, 8, 76, 4, 77, 13, 78, 5, 79, 9, 80, 21, 82, 6, 83, 3, 84, 2, 85, 19, 86, 18, 89, 17, 90, 10, 97, 1, - 98, 15, 99, 23, 100, 16, 101, 0, 102, 25, 103, 14, 104, 20,105, 7, + 98, 15, 99, 23, 100, 16, 101, 0, 102, 25, 103, 12, 104, 20,105, 7, 106, 22, 107, 8, 108, 4, 109, 13, 110, 5, 111, 9, 112, 21,114, 6, - 115, 3, 116, 2, 117, 19, 118, 18, 121, 17, 122, 10, 193, 11,201, 12, - 205, 28, 211, 26, 214, 24, 218, 30, 220, 29, 225, 11, 233, 12,237, 28, - 243, 26, 246, 24, 250, 30, 252, 29, 336, 27, 337, 27, 368, 31,369, 31, + 115, 3, 116, 2, 117, 19, 118, 18, 121, 17, 122, 10, 193, 11,201, 14, + 205, 28, 211, 24, 214, 26, 218, 30, 220, 29, 225, 11, 233, 14,237, 28, + 243, 24, 246, 26, 250, 30, 252, 29, 336, 27, 337, 27, 368, 31,369, 31, }; /* Model Table: - * Total sequences: 1122 - * First 512 sequences: 0.9736098834669349 - * Next 512 sequences (512-1024): 0.026285470450181352 - * Rest: 0.00010464608288375879 + * Total considered sequences: 1249 / 1024 + * - Positive sequences: first 728 (0.9950146122040027) + * - Probable sequences: next 170 (898-728) (0.003990738185399856) + * - Neutral sequences: last 126 (0.0009946496105974756) + * - Negative sequences: -225 (off-ratio) * Negative sequences: TODO */ static const PRUint8 HungarianLangModel[] = { - 2,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,2,3,3,3,3,3,3,2,3,2,1,2,2,0,0, - 2,2,3,3,3,3,3,3,3,2,3,2,2,3,3,3,3,3,3,3,3,3,3,3,2,3,2,0,2,2,2,0, - 3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,3,2,3,2,3,2,3,3,3,3,3,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,2,3,3,3,2,3,2,2,3,3,3,3,3,2, - 3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2, - 3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,3,3,3,2,3,2,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3, - 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,2,3,1,3,3,1,2,2,2,2, - 3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,2,2,2,3,2,2,2,2,3,2,3,2,3,3,2,2, - 2,2,3,3,3,3,3,2,3,2,3,2,2,3,3,3,3,2,3,3,3,3,2,3,1,3,2,0,0,0,0,0, - 3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,2,3,2,3,3,3,3,2,2, - 2,2,3,3,3,3,3,2,3,2,3,2,2,3,3,3,3,0,3,2,2,3,3,3,0,2,0,0,2,0,0,0, - 2,2,3,3,3,3,3,2,3,2,3,0,2,3,3,3,3,0,3,0,3,3,2,2,0,2,0,0,2,2,0,0, - 3,3,3,3,3,2,2,3,2,3,3,3,3,3,2,3,2,2,2,3,2,3,2,2,2,2,3,2,3,2,3,3, - 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,2,3,2,3,3,3,2,3,3,2,2, - 3,3,2,3,3,2,3,3,2,3,2,3,3,2,0,3,2,2,2,3,2,2,2,2,3,2,3,3,2,2,2,2, - 3,3,3,3,2,3,3,3,2,3,2,3,3,3,2,2,3,2,3,3,2,2,3,2,3,2,3,3,3,2,2,2, - 3,3,3,3,3,3,2,3,2,3,3,3,3,2,2,3,2,0,3,3,2,2,2,2,2,2,3,3,3,3,2,3, - 3,3,2,2,2,2,2,3,2,3,1,3,3,2,2,2,1,2,2,2,2,2,2,2,2,2,2,3,3,2,2,2, - 2,2,3,3,3,3,3,2,3,2,2,3,0,3,3,2,3,2,2,2,2,2,2,3,0,2,2,1,1,0,0,0, - 3,3,2,2,2,2,2,3,2,3,2,3,3,2,2,2,2,2,2,3,2,2,2,2,2,2,2,2,3,2,2,2, - 3,3,3,3,3,2,3,3,2,3,2,3,3,2,2,2,2,2,2,3,2,3,3,3,2,2,2,2,3,3,2,0, - 3,3,3,2,3,3,2,2,2,3,2,3,3,2,2,2,3,2,2,3,2,2,2,2,2,2,3,1,2,2,3,2, - 3,3,2,3,2,2,2,3,3,3,2,2,3,2,2,2,2,2,2,2,3,2,2,3,2,2,2,0,3,0,0,1, - 0,0,3,3,3,3,3,0,3,0,3,0,0,2,2,3,3,0,3,0,2,2,1,2,0,2,0,0,0,0,0,0, - 3,3,2,2,2,2,3,3,2,3,0,2,3,2,2,2,2,2,1,2,1,2,2,2,3,2,2,3,1,3,2,2, - 2,2,3,3,3,3,3,3,3,2,3,2,2,2,3,3,3,1,2,2,2,3,3,3,1,2,0,0,1,2,2,0, - 3,2,3,3,3,2,3,3,3,2,3,2,2,2,2,3,3,0,3,1,2,2,3,2,2,2,0,1,2,2,2,0, - 2,2,3,2,2,3,3,0,2,0,3,0,0,3,3,2,2,0,3,0,0,2,3,2,0,0,0,0,0,0,0,0, - 0,2,3,2,3,3,2,0,3,0,3,0,0,2,3,2,2,0,2,0,2,0,2,2,0,0,0,0,0,0,0,0, - 0,2,3,2,3,3,3,2,2,0,2,0,1,1,3,2,2,0,2,0,2,2,3,2,1,2,0,0,0,0,0,0, - 2,2,2,2,2,2,3,2,2,0,2,0,0,2,2,2,2,0,3,0,2,0,2,2,0,2,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,1,2,1,0,0, + 3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,3,3,3,3,3,1,3,1,1,2,1,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,1,3,3,3,3,3,3,3,3,1,0,2,2,2,1, + 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,3,3,3,3,3,2, + 3,3,3,3,3,3,3,3,3,3,3,2,3,3,1,3,3,3,3,3,3,3,3,3,1,3,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,3,3,3, + 1,2,3,3,3,3,3,3,3,1,3,1,3,3,3,3,3,1,3,3,3,3,3,3,0,3,0,0,3,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,3,3,3,3,3,2,3,3,3,3, + 1,2,3,3,3,3,3,3,3,1,3,0,3,3,2,3,3,1,3,1,3,3,3,3,0,1,0,0,1,1,0,0, + 3,3,2,3,3,3,3,3,2,3,1,3,1,1,3,3,3,3,1,3,2,3,2,2,3,2,3,3,3,3,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,2,3,3,3,2,3,1,3,2,1,3,3,2,1,3,3,2,1,3,2,3,3,3,3,3,3,2,3, + 3,3,3,3,3,3,3,3,3,2,3,3,3,3,1,3,3,1,3,1,3,3,2,3,1,2,0,0,1,0,0,0, + 3,3,3,3,3,3,3,3,1,3,1,3,1,3,3,2,2,3,1,3,2,1,2,3,3,3,2,3,3,2,3,2, + 3,3,3,3,3,2,3,3,3,3,3,3,2,2,3,2,2,2,3,3,3,3,3,3,3,3,3,2,3,3,2,1, + 3,3,3,3,3,3,3,3,3,3,3,3,1,2,3,2,3,1,2,3,2,2,3,3,3,2,3,1,2,2,3,2, + 3,3,3,3,3,2,3,3,3,3,3,3,1,2,3,2,2,2,2,3,3,1,1,3,2,1,1,0,3,0,0,0, + 2,3,3,3,3,3,3,3,3,1,3,2,3,3,1,3,3,1,3,1,3,3,3,3,1,3,1,0,0,1,1,0, + 3,3,3,2,3,2,3,3,1,3,0,3,2,2,3,3,2,2,1,3,1,2,3,3,2,3,3,3,1,3,2,2, + 1,0,3,3,3,3,3,0,3,0,3,0,3,3,0,3,3,1,3,1,2,3,1,2,0,1,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,2,3,2,3,3,2,3,3,0,3,1,3,3,3,2,0,3,0,1,2,1,2,0, + 1,2,3,3,3,3,3,1,2,0,3,0,3,3,1,3,2,0,3,1,0,2,3,3,0,1,0,0,0,1,0,0, + 1,1,3,3,3,3,3,0,3,0,3,0,3,2,0,1,2,0,2,0,1,1,2,1,0,1,0,0,0,1,0,0, + 1,2,3,3,3,3,3,1,2,0,3,0,3,1,1,3,2,0,2,0,1,1,3,3,0,1,0,0,0,0,0,0, + 3,1,2,3,3,3,3,1,3,0,3,0,1,2,1,2,0,0,3,2,2,0,3,1,0,3,0,0,1,0,0,0, }; @@ -167,7 +168,7 @@ const SequenceModel Iso_8859_2HungarianModel = Iso_8859_2_CharToOrderMap, HungarianLangModel, 32, - (float)0.9736098834669349, + (float)0.9990053503894025, PR_FALSE, "ISO-8859-2", "hu" @@ -178,7 +179,7 @@ const SequenceModel Windows_1250HungarianModel = Windows_1250_CharToOrderMap, HungarianLangModel, 32, - (float)0.9736098834669349, + (float)0.9990053503894025, PR_FALSE, "WINDOWS-1250", "hu" @@ -191,5 +192,8 @@ const LanguageModel HungarianModel = 64, HungarianLangModel, 32, - (float)0.9736098834669349, + 5, + (float)0.3814879541823728, + 25, + (float)0.03523635287103696, }; diff --git a/src/LangModels/LangIrishModel.cpp b/src/LangModels/LangIrishModel.cpp index a9d814f..92ff29c 100644 --- a/src/LangModels/LangIrishModel.cpp +++ b/src/LangModels/LangIrishModel.cpp @@ -42,7 +42,7 @@ /** * Generated by BuildLangModel.py - * On: 2021-03-16 19:09:36.532691 + * On: 2022-12-14 18:08:23.900202 **/ /* Character Mapping Table: @@ -68,18 +68,18 @@ static const unsigned char Iso_8859_15_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 16, 8, 11, 5, 19, 12, 3, 1, 27, 25, 9, 13, 2, 10, /* 4X */ - 22, 30, 4, 6, 7, 15, 24, 26, 29, 23, 28,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 16, 8, 11, 5, 19, 12, 3, 1, 27, 25, 9, 13, 2, 10, /* 6X */ - 22, 30, 4, 6, 7, 15, 24, 26, 29, 23, 28,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 0, 15, 8, 11, 5, 19, 12, 3, 1, 27, 23, 10, 13, 2, 9, /* 4X */ + 22, 30, 4, 6, 7, 14, 24, 26, 29, 25, 28,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 15, 8, 11, 5, 19, 12, 3, 1, 27, 23, 10, 13, 2, 9, /* 6X */ + 22, 30, 4, 6, 7, 14, 24, 26, 29, 25, 28,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM, 35,SYM, 35,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM, 42, 43,SYM,SYM, 44,SYM,SYM,SYM, 45, 46, 47,SYM, /* BX */ - 48, 14, 49, 50, 34, 51, 52, 36, 32, 18, 40, 53, 54, 17, 55, 39, /* CX */ - 56, 37, 57, 21, 58, 59, 33,SYM, 38, 60, 20, 61, 31, 62, 63, 64, /* DX */ - 65, 14, 66, 67, 34, 68, 69, 36, 32, 18, 40, 70, 71, 17, 72, 39, /* EX */ - 73, 37, 74, 21, 75, 76, 33,SYM, 38, 77, 20, 78, 31, 79, 80, 81, /* FX */ + SYM,SYM,SYM,SYM,SYM,SYM, 45,SYM, 45,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ + SYM,SYM,SYM,SYM, 44, 50,SYM,SYM, 44,SYM,SYM,SYM, 51, 52, 53,SYM, /* BX */ + 40, 16, 54, 55, 31, 36, 56, 35, 33, 18, 46, 37, 57, 17, 43, 49, /* CX */ + 47, 41, 39, 20, 58, 59, 32,SYM, 38, 60, 21, 61, 34, 42, 62, 63, /* DX */ + 40, 16, 64, 65, 31, 36, 66, 35, 33, 18, 46, 37, 67, 17, 43, 49, /* EX */ + 47, 41, 39, 20, 68, 69, 32,SYM, 38, 70, 21, 71, 34, 42, 72, 73, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -89,18 +89,18 @@ static const unsigned char Iso_8859_1_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 16, 8, 11, 5, 19, 12, 3, 1, 27, 25, 9, 13, 2, 10, /* 4X */ - 22, 30, 4, 6, 7, 15, 24, 26, 29, 23, 28,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 16, 8, 11, 5, 19, 12, 3, 1, 27, 25, 9, 13, 2, 10, /* 6X */ - 22, 30, 4, 6, 7, 15, 24, 26, 29, 23, 28,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 0, 15, 8, 11, 5, 19, 12, 3, 1, 27, 23, 10, 13, 2, 9, /* 4X */ + 22, 30, 4, 6, 7, 14, 24, 26, 29, 25, 28,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 15, 8, 11, 5, 19, 12, 3, 1, 27, 23, 10, 13, 2, 9, /* 6X */ + 22, 30, 4, 6, 7, 14, 24, 26, 29, 25, 28,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 82,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 83, 14, 84, 85, 34, 86, 87, 36, 32, 18, 40, 88, 89, 17, 90, 39, /* CX */ - 91, 37, 92, 21, 93, 94, 33,SYM, 38, 95, 20, 96, 31, 97, 98, 99, /* DX */ - 100, 14,101,102, 34,103,104, 36, 32, 18, 40,105,106, 17,107, 39, /* EX */ - 108, 37,109, 21,110,111, 33,SYM, 38,112, 20,113, 31,114,115,116, /* FX */ + SYM,SYM,SYM,SYM,SYM, 74,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 40, 16, 75, 76, 31, 36, 77, 35, 33, 18, 46, 37, 78, 17, 43, 49, /* CX */ + 47, 41, 39, 20, 79, 80, 32,SYM, 38, 81, 21, 82, 34, 42, 83, 84, /* DX */ + 40, 16, 85, 86, 31, 36, 87, 35, 33, 18, 46, 37, 88, 17, 43, 49, /* EX */ + 47, 41, 39, 20, 89, 90, 32,SYM, 38, 91, 21, 92, 34, 42, 93, 94, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -110,18 +110,18 @@ static const unsigned char Iso_8859_9_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 16, 8, 11, 5, 19, 12, 3, 1, 27, 25, 9, 13, 2, 10, /* 4X */ - 22, 30, 4, 6, 7, 15, 24, 26, 29, 23, 28,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 16, 8, 11, 5, 19, 12, 3, 1, 27, 25, 9, 13, 2, 10, /* 6X */ - 22, 30, 4, 6, 7, 15, 24, 26, 29, 23, 28,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 0, 15, 8, 11, 5, 19, 12, 3, 1, 27, 23, 10, 13, 2, 9, /* 4X */ + 22, 30, 4, 6, 7, 14, 24, 26, 29, 25, 28,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 15, 8, 11, 5, 19, 12, 3, 1, 27, 23, 10, 13, 2, 9, /* 6X */ + 22, 30, 4, 6, 7, 14, 24, 26, 29, 25, 28,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM,117,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 118, 14,119,120, 34,121,122, 36, 32, 18, 40,123,124, 17,125, 39, /* CX */ - 126, 37,127, 21,128,129, 33,SYM, 38,130, 20,131, 31,132,133,134, /* DX */ - 135, 14,136,137, 34,138,139, 36, 32, 18, 40,140,141, 17,142, 39, /* EX */ - 143, 37,144, 21,145,146, 33,SYM, 38,147, 20,148, 31, 41,149,150, /* FX */ + SYM,SYM,SYM,SYM,SYM, 95,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 40, 16, 96, 97, 31, 36, 98, 35, 33, 18, 46, 37, 99, 17, 43, 49, /* CX */ + 100, 41, 39, 20,101,102, 32,SYM, 38,103, 21,104, 34,105, 48,106, /* DX */ + 40, 16,107,108, 31, 36,109, 35, 33, 18, 46, 37,110, 17, 43, 49, /* EX */ + 111, 41, 39, 20,112,113, 32,SYM, 38,114, 21,115, 34,116, 48,117, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -131,75 +131,76 @@ static const unsigned char Windows_1252_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 16, 8, 11, 5, 19, 12, 3, 1, 27, 25, 9, 13, 2, 10, /* 4X */ - 22, 30, 4, 6, 7, 15, 24, 26, 29, 23, 28,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 16, 8, 11, 5, 19, 12, 3, 1, 27, 25, 9, 13, 2, 10, /* 6X */ - 22, 30, 4, 6, 7, 15, 24, 26, 29, 23, 28,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM,ILL,SYM,151,SYM,SYM,SYM,SYM,SYM,SYM, 35,SYM,152,ILL,153,ILL, /* 8X */ - ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 35,SYM,154,ILL,155,156, /* 9X */ + SYM, 0, 15, 8, 11, 5, 19, 12, 3, 1, 27, 23, 10, 13, 2, 9, /* 4X */ + 22, 30, 4, 6, 7, 14, 24, 26, 29, 25, 28,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 15, 8, 11, 5, 19, 12, 3, 1, 27, 23, 10, 13, 2, 9, /* 6X */ + 22, 30, 4, 6, 7, 14, 24, 26, 29, 25, 28,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM,ILL,SYM,118,SYM,SYM,SYM,SYM,SYM,SYM, 45,SYM,119,ILL, 44,ILL, /* 8X */ + ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 45,SYM,120,ILL, 44,121, /* 9X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM,157,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 158, 14,159,160, 34,161,162, 36, 32, 18, 40,163,164, 17,165, 39, /* CX */ - 166, 37,167, 21,168,169, 33,SYM, 38,170, 20,171, 31,172,173,174, /* DX */ - 175, 14,176,177, 34,178,179, 36, 32, 18, 40,180,181, 17,182, 39, /* EX */ - 183, 37,184, 21,185,186, 33,SYM, 38,187, 20,188, 31,189,190,191, /* FX */ + SYM,SYM,SYM,SYM,SYM,122,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 40, 16,123,124, 31, 36,125, 35, 33, 18, 46, 37,126, 17, 43, 49, /* CX */ + 47, 41, 39, 20,127,128, 32,SYM, 38,129, 21,130, 34, 42,131,132, /* DX */ + 40, 16,133,134, 31, 36,135, 35, 33, 18, 46, 37,136, 17, 43, 49, /* EX */ + 47, 41, 39, 20,137,138, 32,SYM, 38,139, 21,140, 34, 42,141,142, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ static const int Unicode_Char_size = 62; static const unsigned int Unicode_CharOrder[] = { - 65, 0, 66, 16, 67, 8, 68, 11, 69, 5, 70, 19, 71, 12, 72, 3, - 73, 1, 74, 27, 75, 25, 76, 9, 77, 13, 78, 2, 79, 10, 80, 22, - 81, 30, 82, 4, 83, 6, 84, 7, 85, 15, 86, 24, 87, 26, 88, 29, - 89, 23, 90, 28, 97, 0, 98, 16, 99, 8, 100, 11, 101, 5,102, 19, - 103, 12, 104, 3, 105, 1, 106, 27, 107, 25, 108, 9, 109, 13,110, 2, - 111, 10, 112, 22, 113, 30, 114, 4, 115, 6, 116, 7, 117, 15,118, 24, - 119, 26, 120, 29, 121, 23, 122, 28, 193, 14, 201, 18, 205, 17,211, 21, - 218, 20, 225, 14, 233, 18, 237, 17, 243, 21, 250, 20, + 65, 0, 66, 15, 67, 8, 68, 11, 69, 5, 70, 19, 71, 12, 72, 3, + 73, 1, 74, 27, 75, 23, 76, 10, 77, 13, 78, 2, 79, 9, 80, 22, + 81, 30, 82, 4, 83, 6, 84, 7, 85, 14, 86, 24, 87, 26, 88, 29, + 89, 25, 90, 28, 97, 0, 98, 15, 99, 8, 100, 11, 101, 5,102, 19, + 103, 12, 104, 3, 105, 1, 106, 27, 107, 23, 108, 10, 109, 13,110, 2, + 111, 9, 112, 22, 113, 30, 114, 4, 115, 6, 116, 7, 117, 14,118, 24, + 119, 26, 120, 29, 121, 25, 122, 28, 193, 16, 201, 18, 205, 17,211, 20, + 218, 21, 225, 16, 233, 18, 237, 17, 243, 20, 250, 21, }; /* Model Table: - * Total sequences: 707 - * First 512 sequences: 0.9976732191628278 - * Next 512 sequences (512-1024): 0.0023267808371722288 - * Rest: -3.5561831257524545e-17 + * Total considered sequences: 853 / 961 + * - Positive sequences: first 461 (0.995039617055503) + * - Probable sequences: next 163 (624-461) (0.003960483178947816) + * - Neutral sequences: last 337 (0.0009998997655491504) + * - Negative sequences: 108 (off-ratio) * Negative sequences: TODO */ static const PRUint8 IrishLangModel[] = { - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,0,3,0,0,3,3,3,3,3,2,3,3,2, - 3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,0,2,3,3,3,3,3,3,3,0,2,3,3,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,0,2,3,0,2, - 3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,0,0,0,0, - 3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,3,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,3,0,3,3,3,3,3,3,2,3,3,0, - 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,3,0,0, - 3,3,3,3,3,3,3,3,3,3,3,0,2,3,3,3,3,3,3,2,3,3,2,3,3,3,3,2,3,0,0, - 3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,3,3,3,3,3,2,3,0,3,2,0,3,2,2, - 3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,3,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,0,3,3,0,3,3,3,3,3,3,2,3,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,0,3,2,0,3,0,2,0,0, - 3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,3,3,2,3,3,0,2,0,2,2,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,2,0,3,3,3,3,3,3,2,2,3,3,3,0,0,2,0,2,0,0, - 0,3,3,0,3,2,3,3,3,3,0,3,3,3,0,0,3,3,0,3,0,0,3,0,2,0,0,0,2,0,0, - 3,3,3,2,3,3,3,3,3,3,2,3,3,3,3,2,3,3,0,2,0,0,3,2,2,2,0,0,3,2,0, - 3,3,3,3,3,3,3,2,2,3,3,2,0,0,3,3,3,3,3,2,3,3,3,3,0,0,0,2,0,0,0, - 2,0,3,0,3,0,3,3,3,3,3,3,3,2,0,0,3,0,0,2,3,2,2,0,2,0,0,0,0,0,0, - 3,3,2,0,2,2,2,3,0,2,2,2,0,2,0,0,2,0,0,0,2,0,2,0,2,0,0,0,0,0,0, - 3,3,0,3,3,3,2,3,2,3,3,0,3,2,3,3,2,3,3,3,0,3,0,2,0,0,0,0,0,0,0, - 0,3,3,0,3,0,3,3,3,3,0,3,3,2,0,0,3,0,0,0,0,0,3,0,0,0,0,0,0,0,0, - 0,3,3,0,3,0,3,3,3,3,0,3,3,3,0,0,3,0,0,2,3,0,3,0,2,0,0,0,2,0,0, - 3,3,2,3,3,3,3,3,3,3,3,2,0,2,3,3,0,3,3,0,2,3,3,2,0,0,0,2,0,2,0, - 3,2,3,0,2,3,3,2,3,3,3,2,0,3,0,3,2,0,0,2,0,0,0,2,0,0,3,0,0,0,0, - 3,3,2,0,3,3,3,2,0,2,3,0,2,0,3,2,0,3,3,0,0,3,0,2,2,0,0,0,0,0,0, - 3,3,3,3,2,3,3,0,0,3,3,0,0,3,2,3,2,0,2,0,0,0,2,3,2,2,2,0,0,0,0, - 3,3,3,3,2,3,3,2,0,3,3,2,0,0,0,2,0,0,0,0,0,0,2,2,0,2,2,0,0,0,0, - 3,3,2,0,0,3,0,0,0,0,3,0,0,0,0,3,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0, - 3,3,2,3,0,3,2,2,0,0,3,2,2,2,0,3,0,0,0,0,0,2,0,2,0,0,0,0,2,0,2, - 3,3,0,0,0,2,0,2,2,2,3,0,0,0,0,0,0,0,0,3,0,0,2,2,2,0,0,0,0,2,0, - 3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,0,3,0,1,3,3,3,3,3,2,3,2,2, + 3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,2,0,1,3,3,3,3,3,3,3,1,2,3,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,2,0,1, + 3,3,3,0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,3,2,1,0,0,0, + 3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,2,1,1, + 3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,3,2,0,3,3,3,3,3,2,3,3,1, + 3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,3,3,3,3,3,3,1,3,2,1,2,0,1, + 3,3,3,3,3,3,3,3,3,3,3,1,2,2,3,3,3,3,3,2,3,3,2,2,3,3,2,1,2,1,0, + 3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,2,3,3,3,3,3,3,1,3,1,2,1,1,2,0,3, + 2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,1,3,0,3,3,3,3,3,3,1,3,2,1, + 3,3,2,1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,1,2,0,0, + 3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,3,3,3,1,1,2,3,3,1,1,0,0, + 3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,2,3,3,3,2,3,2,0,1,1,1,2,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,2,1,3,3,3,3,3,3,2,3,3,3,0,2,3,0,0,1,0,0, + 3,3,3,2,3,3,3,3,3,2,3,3,3,3,2,3,3,3,1,2,0,0,3,2,2,2,1,1,2,2,0, + 3,3,2,3,3,3,3,2,2,3,3,1,1,1,3,2,3,3,3,1,3,2,3,0,0,3,0,1,0,0,2, + 0,3,3,0,3,0,3,3,3,0,3,3,3,3,0,3,0,2,0,3,0,1,3,0,1,0,0,0,0,0,0, + 2,1,3,0,3,0,3,3,3,3,3,3,2,2,1,3,0,0,0,2,1,3,1,1,2,0,0,0,0,0,0, + 3,3,2,0,2,2,2,2,1,2,2,2,1,1,1,2,1,1,0,1,2,2,1,1,1,0,0,0,0,0,0, + 3,3,2,3,3,3,1,2,1,3,3,0,2,1,3,0,3,3,3,3,3,2,0,0,0,2,0,0,0,0,0, + 1,3,3,1,3,0,3,3,3,0,3,3,3,3,0,2,0,0,0,2,0,2,3,0,1,0,1,0,1,0,0, + 0,3,3,0,3,0,3,3,3,0,3,3,2,3,0,3,0,1,0,0,0,0,3,0,0,0,0,1,0,0,0, + 3,3,2,3,3,3,3,3,2,3,3,2,0,1,3,2,3,3,3,2,3,3,3,1,0,2,0,1,1,1,0, + 3,3,2,3,3,3,3,1,1,3,3,1,0,3,3,1,0,0,1,0,1,0,0,2,1,3,1,0,0,0,0, + 3,3,2,0,2,3,3,2,0,3,2,0,2,0,2,1,3,2,3,1,3,1,0,1,1,2,0,0,0,0,0, + 3,2,3,1,2,3,3,2,2,3,3,2,1,2,2,3,0,0,0,1,0,0,2,1,1,1,2,0,1,1,0, + 3,3,2,2,2,3,2,2,0,3,2,1,0,0,0,0,0,0,0,1,0,0,1,1,0,2,2,0,0,0,0, + 3,3,1,0,2,3,0,0,0,3,0,0,0,1,3,0,1,1,0,0,1,0,0,1,0,0,0,1,0,0,0, + 3,3,1,3,0,3,2,1,0,2,1,1,2,1,2,1,1,0,0,0,2,0,0,1,1,2,1,0,2,0,0, + 2,3,0,0,0,2,0,2,1,2,0,0,0,0,1,0,0,0,0,2,0,0,1,0,1,0,1,0,0,0,0, + 2,1,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0, }; @@ -208,7 +209,7 @@ const SequenceModel Iso_8859_15IrishModel = Iso_8859_15_CharToOrderMap, IrishLangModel, 31, - (float)0.9976732191628278, + (float)0.9990001002344508, PR_TRUE, "ISO-8859-15", "ga" @@ -219,7 +220,7 @@ const SequenceModel Iso_8859_1IrishModel = Iso_8859_1_CharToOrderMap, IrishLangModel, 31, - (float)0.9976732191628278, + (float)0.9990001002344508, PR_TRUE, "ISO-8859-1", "ga" @@ -230,7 +231,7 @@ const SequenceModel Iso_8859_9IrishModel = Iso_8859_9_CharToOrderMap, IrishLangModel, 31, - (float)0.9976732191628278, + (float)0.9990001002344508, PR_TRUE, "ISO-8859-9", "ga" @@ -241,7 +242,7 @@ const SequenceModel Windows_1252IrishModel = Windows_1252_CharToOrderMap, IrishLangModel, 31, - (float)0.9976732191628278, + (float)0.9990001002344508, PR_TRUE, "WINDOWS-1252", "ga" @@ -254,5 +255,8 @@ const LanguageModel IrishModel = 62, IrishLangModel, 31, - (float)0.9976732191628278, + 3, + (float)0.3435309101596943, + 19, + (float)0.03729009503282062, }; diff --git a/src/LangModels/LangItalianModel.cpp b/src/LangModels/LangItalianModel.cpp index c9fd4f7..b9f5b71 100644 --- a/src/LangModels/LangItalianModel.cpp +++ b/src/LangModels/LangItalianModel.cpp @@ -42,7 +42,7 @@ /** * Generated by BuildLangModel.py - * On: 2021-03-16 01:31:12.602629 + * On: 2022-12-14 18:09:46.154350 **/ /* Character Mapping Table: @@ -68,18 +68,18 @@ static const unsigned char Iso_8859_1_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 2, 17, 9, 10, 1, 16, 14, 19, 0, 31, 26, 5, 13, 4, 3, /* 4X */ - 12, 20, 7, 8, 6, 11, 15, 28, 25, 27, 18,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 2, 17, 9, 10, 1, 16, 14, 19, 0, 31, 26, 5, 13, 4, 3, /* 6X */ - 12, 20, 7, 8, 6, 11, 15, 28, 25, 27, 18,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 2, 18, 9, 10, 1, 16, 14, 19, 0, 31, 24, 5, 13, 4, 3, /* 4X */ + 12, 20, 7, 8, 6, 11, 15, 28, 26, 27, 17,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 2, 18, 9, 10, 1, 16, 14, 19, 0, 31, 24, 5, 13, 4, 3, /* 6X */ + 12, 20, 7, 8, 6, 11, 15, 28, 26, 27, 17,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 54,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 21, 37, 46, 38, 35, 55, 39, 41, 22, 30, 40, 45, 29, 36, 48, 56, /* CX */ - 57, 42, 23, 33, 49, 58, 32,SYM, 52, 24, 43, 59, 34, 60, 61, 44, /* DX */ - 21, 37, 46, 38, 35, 62, 39, 41, 22, 30, 40, 45, 29, 36, 48, 63, /* EX */ - 64, 42, 23, 33, 49, 65, 32,SYM, 52, 24, 43, 66, 34, 67, 68, 69, /* FX */ + SYM,SYM,SYM,SYM,SYM, 61,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 21, 36, 41, 42, 33, 47, 35, 39, 22, 30, 44, 46, 29, 38, 54, 48, /* CX */ + 62, 43, 23, 37, 50, 53, 32,SYM, 49, 25, 51, 55, 34, 63, 56, 52, /* DX */ + 21, 36, 41, 42, 33, 47, 35, 39, 22, 30, 44, 46, 29, 38, 54, 48, /* EX */ + 64, 43, 23, 37, 50, 53, 32,SYM, 49, 25, 51, 55, 34, 65, 56, 66, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -89,18 +89,18 @@ static const unsigned char Iso_8859_3_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 2, 17, 9, 10, 1, 16, 14, 19, 0, 31, 26, 5, 13, 4, 3, /* 4X */ - 12, 20, 7, 8, 6, 11, 15, 28, 25, 27, 18,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 2, 17, 9, 10, 1, 16, 14, 19, 0, 31, 26, 5, 13, 4, 3, /* 6X */ - 12, 20, 7, 8, 6, 11, 15, 28, 25, 27, 18,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 2, 18, 9, 10, 1, 16, 14, 19, 0, 31, 24, 5, 13, 4, 3, /* 4X */ + 12, 20, 7, 8, 6, 11, 15, 28, 26, 27, 17,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 2, 18, 9, 10, 1, 16, 14, 19, 0, 31, 24, 5, 13, 4, 3, /* 6X */ + 12, 20, 7, 8, 6, 11, 15, 28, 26, 27, 17,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM, 70,SYM,SYM,SYM,ILL, 71,SYM,SYM, 72, 73, 53, 74,SYM,ILL, 50, /* AX */ - SYM, 75,SYM,SYM,SYM,SYM, 76,SYM,SYM, 77, 78, 53, 79,SYM,ILL, 50, /* BX */ - 21, 37, 46,ILL, 35, 80, 81, 41, 22, 30, 40, 45, 29, 36, 48, 82, /* CX */ - ILL, 42, 23, 33, 49, 83, 32,SYM, 84, 24, 43, 85, 34, 86, 87, 44, /* DX */ - 21, 37, 46,ILL, 35, 88, 89, 41, 22, 30, 40, 45, 29, 36, 48, 90, /* EX */ - ILL, 42, 23, 33, 49, 91, 32,SYM, 92, 24, 43, 93, 34, 94, 95,SYM, /* FX */ + SYM, 67,SYM,SYM,SYM,ILL, 68,SYM,SYM, 60, 59, 58, 69,SYM,ILL, 70, /* AX */ + SYM, 71,SYM,SYM,SYM,SYM, 72,SYM,SYM, 73, 59, 58, 74,SYM,ILL, 75, /* BX */ + 21, 36, 41,ILL, 33, 76, 77, 39, 22, 30, 44, 46, 29, 38, 54, 48, /* CX */ + ILL, 43, 23, 37, 50, 78, 32,SYM, 79, 25, 51, 55, 34, 80, 81, 52, /* DX */ + 21, 36, 41,ILL, 33, 82, 83, 39, 22, 30, 44, 46, 29, 38, 54, 48, /* EX */ + ILL, 43, 23, 37, 50, 84, 32,SYM, 85, 25, 51, 55, 34, 86, 87,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -110,18 +110,18 @@ static const unsigned char Iso_8859_9_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 2, 17, 9, 10, 1, 16, 14, 19, 0, 31, 26, 5, 13, 4, 3, /* 4X */ - 12, 20, 7, 8, 6, 11, 15, 28, 25, 27, 18,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 2, 17, 9, 10, 1, 16, 14, 19, 0, 31, 26, 5, 13, 4, 3, /* 6X */ - 12, 20, 7, 8, 6, 11, 15, 28, 25, 27, 18,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 2, 18, 9, 10, 1, 16, 14, 19, 0, 31, 24, 5, 13, 4, 3, /* 4X */ + 12, 20, 7, 8, 6, 11, 15, 28, 26, 27, 17,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 2, 18, 9, 10, 1, 16, 14, 19, 0, 31, 24, 5, 13, 4, 3, /* 6X */ + 12, 20, 7, 8, 6, 11, 15, 28, 26, 27, 17,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 96,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 21, 37, 46, 38, 35, 97, 39, 41, 22, 30, 40, 45, 29, 36, 48, 98, /* CX */ - 53, 42, 23, 33, 49, 99, 32,SYM, 52, 24, 43,100, 34,101,102, 44, /* DX */ - 21, 37, 46, 38, 35,103, 39, 41, 22, 30, 40, 45, 29, 36, 48,104, /* EX */ - 53, 42, 23, 33, 49,105, 32,SYM, 52, 24, 43,106, 34,107,108,109, /* FX */ + SYM,SYM,SYM,SYM,SYM, 88,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 21, 36, 41, 42, 33, 47, 35, 39, 22, 30, 44, 46, 29, 38, 54, 48, /* CX */ + 58, 43, 23, 37, 50, 53, 32,SYM, 49, 25, 51, 55, 34, 60, 59, 52, /* DX */ + 21, 36, 41, 42, 33, 47, 35, 39, 22, 30, 44, 46, 29, 38, 54, 48, /* EX */ + 58, 43, 23, 37, 50, 53, 32,SYM, 49, 25, 51, 55, 34, 89, 59, 90, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -131,18 +131,18 @@ static const unsigned char Iso_8859_15_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 2, 17, 9, 10, 1, 16, 14, 19, 0, 31, 26, 5, 13, 4, 3, /* 4X */ - 12, 20, 7, 8, 6, 11, 15, 28, 25, 27, 18,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 2, 17, 9, 10, 1, 16, 14, 19, 0, 31, 26, 5, 13, 4, 3, /* 6X */ - 12, 20, 7, 8, 6, 11, 15, 28, 25, 27, 18,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 2, 18, 9, 10, 1, 16, 14, 19, 0, 31, 24, 5, 13, 4, 3, /* 4X */ + 12, 20, 7, 8, 6, 11, 15, 28, 26, 27, 17,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 2, 18, 9, 10, 1, 16, 14, 19, 0, 31, 24, 5, 13, 4, 3, /* 6X */ + 12, 20, 7, 8, 6, 11, 15, 28, 26, 27, 17,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM, 51,SYM, 51,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,110,111,SYM,SYM,112,SYM,SYM,SYM, 47, 47,113,SYM, /* BX */ - 21, 37, 46, 38, 35,114, 39, 41, 22, 30, 40, 45, 29, 36, 48,115, /* CX */ - 116, 42, 23, 33, 49,117, 32,SYM, 52, 24, 43,118, 34,119,120, 44, /* DX */ - 21, 37, 46, 38, 35,121, 39, 41, 22, 30, 40, 45, 29, 36, 48,122, /* EX */ - 123, 42, 23, 33, 49,124, 32,SYM, 52, 24, 43,125, 34,126,127,128, /* FX */ + SYM,SYM,SYM,SYM,SYM,SYM, 40,SYM, 40,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ + SYM,SYM,SYM,SYM, 45, 91,SYM,SYM, 45,SYM,SYM,SYM, 57, 57, 92,SYM, /* BX */ + 21, 36, 41, 42, 33, 47, 35, 39, 22, 30, 44, 46, 29, 38, 54, 48, /* CX */ + 93, 43, 23, 37, 50, 53, 32,SYM, 49, 25, 51, 55, 34, 94, 56, 52, /* DX */ + 21, 36, 41, 42, 33, 47, 35, 39, 22, 30, 44, 46, 29, 38, 54, 48, /* EX */ + 95, 43, 23, 37, 50, 53, 32,SYM, 49, 25, 51, 55, 34, 96, 56, 97, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -152,79 +152,85 @@ static const unsigned char Windows_1252_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 2, 17, 9, 10, 1, 16, 14, 19, 0, 31, 26, 5, 13, 4, 3, /* 4X */ - 12, 20, 7, 8, 6, 11, 15, 28, 25, 27, 18,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 2, 17, 9, 10, 1, 16, 14, 19, 0, 31, 26, 5, 13, 4, 3, /* 6X */ - 12, 20, 7, 8, 6, 11, 15, 28, 25, 27, 18,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM,ILL,SYM,129,SYM,SYM,SYM,SYM,SYM,SYM, 51,SYM, 47,ILL,130,ILL, /* 8X */ - ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 51,SYM, 47,ILL,131,132, /* 9X */ + SYM, 2, 18, 9, 10, 1, 16, 14, 19, 0, 31, 24, 5, 13, 4, 3, /* 4X */ + 12, 20, 7, 8, 6, 11, 15, 28, 26, 27, 17,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 2, 18, 9, 10, 1, 16, 14, 19, 0, 31, 24, 5, 13, 4, 3, /* 6X */ + 12, 20, 7, 8, 6, 11, 15, 28, 26, 27, 17,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM,ILL,SYM, 98,SYM,SYM,SYM,SYM,SYM,SYM, 40,SYM, 57,ILL, 45,ILL, /* 8X */ + ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 40,SYM, 57,ILL, 45, 99, /* 9X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM,133,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 21, 37, 46, 38, 35,134, 39, 41, 22, 30, 40, 45, 29, 36, 48,135, /* CX */ - 136, 42, 23, 33, 49,137, 32,SYM, 52, 24, 43,138, 34,139,140, 44, /* DX */ - 21, 37, 46, 38, 35,141, 39, 41, 22, 30, 40, 45, 29, 36, 48,142, /* EX */ - 143, 42, 23, 33, 49,144, 32,SYM, 52, 24, 43,145, 34,146,147,148, /* FX */ + SYM,SYM,SYM,SYM,SYM,100,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 21, 36, 41, 42, 33, 47, 35, 39, 22, 30, 44, 46, 29, 38, 54, 48, /* CX */ + 101, 43, 23, 37, 50, 53, 32,SYM, 49, 25, 51, 55, 34,102, 56, 52, /* DX */ + 21, 36, 41, 42, 33, 47, 35, 39, 22, 30, 44, 46, 29, 38, 54, 48, /* EX */ + 103, 43, 23, 37, 50, 53, 32,SYM, 49, 25, 51, 55, 34,104, 56,105, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const int Unicode_Char_size = 68; +static const int Unicode_Char_size = 76; static const unsigned int Unicode_CharOrder[] = { - 65, 2, 66, 17, 67, 9, 68, 10, 69, 1, 70, 16, 71, 14, 72, 19, - 73, 0, 74, 31, 75, 26, 76, 5, 77, 13, 78, 4, 79, 3, 80, 12, - 81, 20, 82, 7, 83, 8, 84, 6, 85, 11, 86, 15, 87, 28, 88, 25, - 89, 27, 90, 18, 97, 2, 98, 17, 99, 9, 100, 10, 101, 1,102, 16, - 103, 14, 104, 19, 105, 0, 106, 31, 107, 26, 108, 5, 109, 13,110, 4, + 65, 2, 66, 18, 67, 9, 68, 10, 69, 1, 70, 16, 71, 14, 72, 19, + 73, 0, 74, 31, 75, 24, 76, 5, 77, 13, 78, 4, 79, 3, 80, 12, + 81, 20, 82, 7, 83, 8, 84, 6, 85, 11, 86, 15, 87, 28, 88, 26, + 89, 27, 90, 17, 97, 2, 98, 18, 99, 9, 100, 10, 101, 1,102, 16, + 103, 14, 104, 19, 105, 0, 106, 31, 107, 24, 108, 5, 109, 13,110, 4, 111, 3, 112, 12, 113, 20, 114, 7, 115, 8, 116, 6, 117, 11,118, 15, - 119, 28, 120, 25, 121, 27, 122, 18, 192, 21, 200, 22, 201, 30,204, 29, - 210, 23, 211, 33, 214, 32, 217, 24, 224, 21, 232, 22, 233, 30,236, 29, - 242, 23, 243, 33, 246, 32, 249, 24, + 119, 28, 120, 26, 121, 27, 122, 17, 192, 21, 193, 36, 196, 33,198, 35, + 200, 22, 201, 30, 204, 29, 210, 23, 211, 37, 214, 32, 217, 25,220, 34, + 224, 21, 225, 36, 228, 33, 230, 35, 232, 22, 233, 30, 236, 29,242, 23, + 243, 37, 246, 32, 249, 25, 252, 34, }; /* Model Table: - * Total sequences: 921 - * First 512 sequences: 0.9992462827093448 - * Next 512 sequences (512-1024): 0.0007537172906552294 - * Rest: -2.0166160408230382e-17 + * Total considered sequences: 1168 / 1444 + * - Positive sequences: first 312 (0.9950294718278113) + * - Probable sequences: next 206 (518-312) (0.003975533066303161) + * - Neutral sequences: last 926 (0.000994995105885521) + * - Negative sequences: 276 (off-ratio) * Negative sequences: TODO */ static const PRUint8 ItalianLangModel[] = { - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,2,2,0,2,3,0,3, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,0,3,3,3,3,2,0,2,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,3,3,3,3,0,0,3,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,0,0,3,3,3,3,0,0,2,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,0,3,3,3,3,3,2,0,2, - 3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,2,3,3,2,3,3,2,2,3, - 3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,2,3,2,3,3,0,2,3,3,3,3,0,2,3, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,3,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,0,3,3,3,3,3,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,3,2,2,3,3,2,0,3,2,0,3,3,2,3,2,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,3,0,3,2,3,2,2,2,3,3,3,2,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,2,2,2,2,3,0,3,3,3,0,3,2,3,0,0, - 3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,0,3,3,0,0,0,3,2,2,0,3,2,2,2,0,3,2, - 3,3,3,3,3,3,3,2,3,3,2,3,3,3,2,2,3,3,0,2,3,2,2,3,0,2,2,3,3,0,3,2,2,0, - 3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,2,2,3,0,3,0,0,0,3,2,2,2,2,2,2,2,0,3,2, - 3,3,3,3,2,3,2,3,2,2,0,3,3,0,2,3,0,2,2,0,0,0,0,3,0,0,0,2,0,2,2,0,2,0, - 3,3,3,3,3,3,3,3,3,3,0,3,3,2,3,0,3,2,0,2,0,2,2,2,0,0,2,0,0,0,2,0,2,0, - 3,3,3,3,3,3,2,3,3,2,2,3,0,2,0,0,0,3,3,2,0,2,2,2,2,0,2,3,2,3,2,0,2,0, - 3,3,3,3,2,2,2,0,2,2,2,3,2,2,2,0,0,3,3,2,2,0,0,3,0,0,2,2,2,0,0,2,2,3, - 3,3,3,3,3,3,3,3,3,0,0,3,2,3,0,0,2,2,2,0,0,2,0,0,0,0,2,3,3,2,3,2,3,0, - 2,0,2,0,0,0,0,2,0,0,0,3,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,2,2,3,2,0,3,2,0,0,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,2,2,0,3,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0, - 3,3,3,2,0,2,3,0,0,2,0,2,3,0,0,3,2,0,0,2,0,0,0,0,0,3,2,2,0,0,0,0,2,0, - 3,3,3,3,2,3,3,3,3,0,2,3,2,3,2,2,0,2,0,3,0,0,0,0,0,0,2,3,2,0,2,0,2,0, - 2,3,3,3,3,3,2,3,3,3,3,2,2,3,0,0,3,2,0,2,0,0,0,0,0,2,2,0,2,0,0,0,2,0, - 3,3,3,3,2,2,2,3,3,0,0,2,2,0,2,0,2,2,0,2,0,0,0,0,0,0,2,2,3,0,0,0,0,0, - 0,0,2,0,0,0,2,0,2,2,0,0,2,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,0,0,2,2,3,2,3,2,2,3,2,0,2,2,2,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,0,0,0,2,0,2,0,3,2,2,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0, - 2,0,0,0,3,3,2,3,3,2,2,0,2,0,2,3,2,0,0,3,0,0,0,0,0,0,2,0,2,0,0,0,0,0, - 2,0,2,0,3,0,0,2,2,2,0,0,2,2,0,2,0,2,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,2,1,0,1,2,1,0,0,0,1,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,0,2,2,0,3,3,2,0,0,2,0,1,0,0,0,1, + 3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,0,0,3,0,2,3,3,1,0,3,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,0,2,0,2,0,2,2,2,0,1,2,0,0,0,0,0,0, + 3,3,3,3,3,2,3,3,3,3,3,3,2,2,3,3,3,3,2,2,3,1,1,3,3,0,0,2,2,2,2,2,1,1,1,0,1,1, + 3,3,3,3,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,1,2,1,2,2,0,1,2,1,2,2,1,1,2,1,0,1,1, + 3,3,3,3,3,3,3,3,3,3,1,3,2,2,1,2,2,2,2,3,0,3,1,3,1,2,1,3,2,2,2,1,1,1,1,1,1,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,1,1,3,2,2,2,1,1,1,2,1,1,1, + 3,3,3,3,2,3,3,3,3,3,2,3,3,3,2,3,3,1,3,3,3,1,1,3,2,2,1,2,2,3,2,1,1,1,0,0,1,0, + 3,3,3,3,3,3,3,3,2,3,2,3,1,2,1,1,2,1,1,3,3,1,1,3,3,0,1,3,1,2,2,1,0,0,0,1,1,0, + 3,3,3,3,2,2,2,3,2,2,3,3,2,2,2,2,2,1,2,2,1,2,1,2,1,1,1,2,2,2,2,2,1,1,1,1,0,1, + 3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,2,1,0,1,3,2,0,2,1,2,3,1,2,0,0,0,0,1,0, + 3,3,3,3,2,3,3,3,3,3,2,3,3,2,1,1,2,1,1,3,1,1,1,2,1,1,1,1,1,2,1,1,1,1,0,2,0,1, + 3,3,3,3,2,2,2,2,3,2,2,3,3,3,1,1,1,1,3,1,0,1,1,2,1,0,1,2,1,1,2,1,1,1,2,0,1,1, + 3,3,3,3,3,3,2,3,2,2,2,3,1,2,3,1,1,1,2,3,1,0,1,2,1,1,0,2,2,1,1,1,2,1,1,0,0,1, + 3,3,3,3,1,2,1,3,2,1,1,3,0,1,1,3,1,0,1,0,0,1,1,2,1,0,0,1,0,1,1,1,1,2,0,1,1,0, + 3,3,3,3,1,3,2,3,1,1,1,3,1,1,2,0,3,1,1,1,0,1,2,1,1,1,0,1,0,0,1,1,1,0,1,1,0,0, + 3,3,3,3,1,1,1,0,1,1,1,3,1,1,1,1,0,3,1,1,1,1,1,2,1,0,0,1,1,1,0,1,0,0,1,0,1,0, + 3,3,3,3,3,3,2,3,2,2,2,3,2,1,1,1,1,0,3,1,0,1,1,1,1,2,0,2,1,2,2,1,1,1,1,0,0,1, + 3,3,3,3,2,3,2,3,2,1,2,3,1,2,1,1,1,1,1,1,0,0,1,0,1,0,0,2,1,1,3,1,1,1,1,0,1,1, + 1,0,2,0,0,1,1,1,1,0,0,3,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,0,0,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0, + 1,1,1,1,1,1,1,2,2,1,1,0,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,1,1,1,3,1,1,0,1,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,2,2,2,2,2,1,0,2,1,3,1,1,1,0,1,2,0,1,0,1,2,1,0,2,1,0,1,0,2,1,1,0,0,0, + 0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,2,2,2,1,1,2,0,0,2,1,1,1,1,0,3,1,0,0,1,0,0,0,0,0,1,3,0,1,0,0,0,0,0,0,0,1,0, + 2,2,3,2,2,3,2,2,2,2,2,1,2,2,2,1,1,1,2,1,0,1,1,0,1,1,1,2,1,1,0,1,0,0,1,0,1,0, + 3,3,3,3,2,2,2,1,2,1,1,1,0,1,0,0,1,1,1,2,0,0,0,0,1,0,0,1,2,0,0,0,0,1,1,0,0,0, + 0,1,1,0,1,0,1,1,1,1,1,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,2,2,2,1,2,1,2,2,1,2,0,1,1,2,1,1,1,2,1,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0, + 2,3,3,3,1,1,1,1,1,1,1,3,1,1,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,1,0,0,1, + 1,0,0,0,2,1,2,1,1,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0, + 1,1,0,0,2,1,1,1,1,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,2,1,1,2,1,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0, + 0,0,0,1,1,0,1,1,0,0,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,2,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0, + 1,0,0,0,2,1,0,1,1,1,0,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, }; @@ -232,8 +238,8 @@ const SequenceModel Iso_8859_1ItalianModel = { Iso_8859_1_CharToOrderMap, ItalianLangModel, - 34, - (float)0.9992462827093448, + 38, + (float)0.9990050048941145, PR_TRUE, "ISO-8859-1", "it" @@ -243,8 +249,8 @@ const SequenceModel Iso_8859_3ItalianModel = { Iso_8859_3_CharToOrderMap, ItalianLangModel, - 34, - (float)0.9992462827093448, + 38, + (float)0.9990050048941145, PR_TRUE, "ISO-8859-3", "it" @@ -254,8 +260,8 @@ const SequenceModel Iso_8859_9ItalianModel = { Iso_8859_9_CharToOrderMap, ItalianLangModel, - 34, - (float)0.9992462827093448, + 38, + (float)0.9990050048941145, PR_TRUE, "ISO-8859-9", "it" @@ -265,8 +271,8 @@ const SequenceModel Iso_8859_15ItalianModel = { Iso_8859_15_CharToOrderMap, ItalianLangModel, - 34, - (float)0.9992462827093448, + 38, + (float)0.9990050048941145, PR_TRUE, "ISO-8859-15", "it" @@ -276,8 +282,8 @@ const SequenceModel Windows_1252ItalianModel = { Windows_1252_CharToOrderMap, ItalianLangModel, - 34, - (float)0.9992462827093448, + 38, + (float)0.9990050048941145, PR_TRUE, "WINDOWS-1252", "it" @@ -287,8 +293,11 @@ const LanguageModel ItalianModel = { "it", Unicode_CharOrder, - 68, + 76, ItalianLangModel, - 34, - (float)0.9992462827093448, + 38, + 3, + (float)0.3420627434224581, + 16, + (float)0.03959671985669916, }; diff --git a/src/LangModels/LangLatvianModel.cpp b/src/LangModels/LangLatvianModel.cpp index 3b47d21..69122e3 100644 --- a/src/LangModels/LangLatvianModel.cpp +++ b/src/LangModels/LangLatvianModel.cpp @@ -42,7 +42,7 @@ /** * Generated by BuildLangModel.py - * On: 2021-03-16 19:30:28.293047 + * On: 2022-12-14 18:11:03.758761 **/ /* Character Mapping Table: @@ -68,18 +68,18 @@ static const unsigned char Iso_8859_4_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 17, 22, 13, 3, 25, 19, 28, 1, 15, 11, 9, 12, 7, 10, /* 4X */ - 16, 39, 5, 2, 4, 6, 14, 34, 35, 33, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 17, 22, 13, 3, 25, 19, 28, 1, 15, 11, 9, 12, 7, 10, /* 6X */ - 16, 39, 5, 2, 4, 6, 14, 34, 35, 33, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 0, 17, 22, 12, 3, 25, 19, 28, 1, 16, 11, 9, 13, 7, 10, /* 4X */ + 15, 40, 5, 2, 4, 6, 14, 32, 35, 34, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 17, 22, 12, 3, 25, 19, 28, 1, 16, 11, 9, 13, 7, 10, /* 6X */ + 15, 40, 5, 2, 4, 6, 14, 32, 35, 34, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM, 55, 56, 57,SYM, 58, 26,SYM,SYM, 23, 21, 31, 59,SYM, 29,SYM, /* AX */ - SYM, 60,SYM, 61,SYM, 62, 26,SYM,SYM, 23, 21, 31, 63, 49, 29, 49, /* BX */ - 8, 40, 64, 65, 41, 54, 42, 66, 32, 36, 67, 43, 46, 47, 44, 18, /* CX */ - 68, 24, 53, 30, 69, 70, 37,SYM, 71, 72, 73, 74, 38, 75, 27, 48, /* DX */ - 8, 40, 76, 77, 41, 54, 42, 78, 32, 36, 79, 43, 46, 47, 44, 18, /* EX */ - 80, 24, 53, 30, 81, 82, 37,SYM, 83, 84, 85, 86, 38, 87, 27,SYM, /* FX */ + SYM, 67, 68, 51,SYM, 69, 27,SYM,SYM, 23, 21, 30, 70,SYM, 29,SYM, /* AX */ + SYM, 71,SYM, 51,SYM, 72, 27,SYM,SYM, 23, 21, 30, 73, 63, 29, 63, /* BX */ + 8, 41, 60, 65, 38, 47, 36, 74, 33, 37, 75, 57, 43, 48, 61, 18, /* CX */ + 76, 24, 42, 31, 64, 45, 44,SYM, 59, 62, 54, 66, 39, 77, 26, 49, /* DX */ + 8, 41, 60, 65, 38, 47, 36, 78, 33, 37, 79, 57, 43, 48, 61, 18, /* EX */ + 80, 24, 42, 31, 64, 45, 44,SYM, 59, 62, 54, 66, 39, 81, 26,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -89,18 +89,18 @@ static const unsigned char Iso_8859_10_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 17, 22, 13, 3, 25, 19, 28, 1, 15, 11, 9, 12, 7, 10, /* 4X */ - 16, 39, 5, 2, 4, 6, 14, 34, 35, 33, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 17, 22, 13, 3, 25, 19, 28, 1, 15, 11, 9, 12, 7, 10, /* 6X */ - 16, 39, 5, 2, 4, 6, 14, 34, 35, 33, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 0, 17, 22, 12, 3, 25, 19, 28, 1, 16, 11, 9, 13, 7, 10, /* 4X */ + 15, 40, 5, 2, 4, 6, 14, 32, 35, 34, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 17, 22, 12, 3, 25, 19, 28, 1, 16, 11, 9, 13, 7, 10, /* 6X */ + 15, 40, 5, 2, 4, 6, 14, 32, 35, 34, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM, 88, 21, 31, 18, 89, 30,SYM, 26, 90, 23, 91, 29,SYM, 27, 49, /* AX */ - SYM, 92, 21, 31, 18, 93, 30,SYM, 26, 94, 23, 95, 29, 96, 27, 49, /* BX */ - 8, 40, 97, 98, 41, 54, 42, 99, 32, 36,100, 43, 46, 47, 44,101, /* CX */ - 52, 24, 53, 45,102,103, 37,104,105,106,107,108, 38,109, 51, 48, /* DX */ - 8, 40,110,111, 41, 54, 42,112, 32, 36,113, 43, 46, 47, 44,114, /* EX */ - 52, 24, 53, 45,115,116, 37,117,118,119,120,121, 38,122, 51,123, /* FX */ + SYM, 82, 21, 30, 18, 83, 31,SYM, 27, 84, 23, 85, 29,SYM, 26, 63, /* AX */ + SYM, 86, 21, 30, 18, 87, 31,SYM, 27, 88, 23, 89, 29, 90, 26, 63, /* BX */ + 8, 41, 60, 65, 38, 47, 36, 91, 33, 37, 92, 57, 43, 48, 61, 93, /* CX */ + 94, 24, 42, 46, 64, 45, 44, 95, 59, 62, 54, 66, 39, 53, 96, 49, /* DX */ + 8, 41, 60, 65, 38, 47, 36, 97, 33, 37, 98, 57, 43, 48, 61, 99, /* EX */ + 100, 24, 42, 46, 64, 45, 44,101, 59, 62, 54, 66, 39, 53,102,103, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -110,86 +110,130 @@ static const unsigned char Iso_8859_13_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 17, 22, 13, 3, 25, 19, 28, 1, 15, 11, 9, 12, 7, 10, /* 4X */ - 16, 39, 5, 2, 4, 6, 14, 34, 35, 33, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 17, 22, 13, 3, 25, 19, 28, 1, 15, 11, 9, 12, 7, 10, /* 6X */ - 16, 39, 5, 2, 4, 6, 14, 34, 35, 33, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 0, 17, 22, 12, 3, 25, 19, 28, 1, 16, 11, 9, 13, 7, 10, /* 4X */ + 15, 40, 5, 2, 4, 6, 14, 32, 35, 34, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 17, 22, 12, 3, 25, 19, 28, 1, 16, 11, 9, 13, 7, 10, /* 6X */ + 15, 40, 5, 2, 4, 6, 14, 32, 35, 34, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,124,SYM,125,SYM,SYM,SYM,SYM, 42, /* AX */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,126,SYM,127,SYM,SYM,SYM,SYM, 42, /* BX */ - 128,129, 8,130, 41, 54,131, 21, 32, 36,132, 46, 31, 30, 18, 26, /* CX */ - 23,133, 24, 45, 53,134, 37,SYM,135, 50,136, 27, 38,137, 29, 48, /* DX */ - 138,139, 8,140, 41, 54,141, 21, 32, 36,142, 46, 31, 30, 18, 26, /* EX */ - 23,143, 24, 45, 53,144, 37,SYM,145, 50,146, 27, 38,147, 29,SYM, /* FX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 59,SYM, 51,SYM,SYM,SYM,SYM, 36, /* AX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 59,SYM, 51,SYM,SYM,SYM,SYM, 36, /* BX */ + 104,105, 8, 52, 38, 47,106, 21, 33, 37, 58, 43, 30, 31, 18, 27, /* CX */ + 23, 56, 24, 46, 42, 45, 44,SYM, 62, 50, 55, 26, 39,107, 29, 49, /* DX */ + 108,109, 8, 52, 38, 47,110, 21, 33, 37, 58, 43, 30, 31, 18, 27, /* EX */ + 23, 56, 24, 46, 42, 45, 44,SYM, 62, 50, 55, 26, 39,111, 29,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const int Unicode_Char_size = 80; +static const int Unicode_Char_size = 82; static const unsigned int Unicode_CharOrder[] = { - 65, 0, 66, 17, 67, 22, 68, 13, 69, 3, 70, 25, 71, 19, 72, 28, - 73, 1, 74, 15, 75, 11, 76, 9, 77, 12, 78, 7, 79, 10, 80, 16, - 81, 39, 82, 5, 83, 2, 84, 4, 85, 6, 86, 14, 87, 34, 88, 35, - 89, 33, 90, 20, 97, 0, 98, 17, 99, 22, 100, 13, 101, 3,102, 25, - 103, 19, 104, 28, 105, 1, 106, 15, 107, 11, 108, 9, 109, 12,110, 7, - 111, 10, 112, 16, 113, 39, 114, 5, 115, 2, 116, 4, 117, 6,118, 14, - 119, 34, 120, 35, 121, 33, 122, 20, 201, 36, 214, 37, 220, 38,233, 36, - 246, 37, 252, 38, 256, 8, 257, 8, 268, 32, 269, 32, 274, 21,275, 21, - 290, 31, 291, 31, 298, 18, 299, 18, 310, 30, 311, 30, 315, 26,316, 26, - 325, 24, 326, 24, 352, 23, 353, 23, 362, 27, 363, 27, 381, 29,382, 29, + 65, 0, 66, 17, 67, 22, 68, 12, 69, 3, 70, 25, 71, 19, 72, 28, + 73, 1, 74, 16, 75, 11, 76, 9, 77, 13, 78, 7, 79, 10, 80, 15, + 81, 40, 82, 5, 83, 2, 84, 4, 85, 6, 86, 14, 87, 32, 88, 35, + 89, 34, 90, 20, 97, 0, 98, 17, 99, 22, 100, 12, 101, 3,102, 25, + 103, 19, 104, 28, 105, 1, 106, 16, 107, 11, 108, 9, 109, 13,110, 7, + 111, 10, 112, 15, 113, 40, 114, 5, 115, 2, 116, 4, 117, 6,118, 14, + 119, 32, 120, 35, 121, 34, 122, 20, 196, 38, 198, 36, 201, 37,220, 39, + 228, 38, 230, 36, 233, 37, 252, 39, 256, 8, 257, 8, 268, 33,269, 33, + 274, 21, 275, 21, 290, 30, 291, 30, 298, 18, 299, 18, 310, 31,311, 31, + 315, 27, 316, 27, 325, 24, 326, 24, 352, 23, 353, 23, 362, 26,363, 26, + 381, 29, 382, 29, }; /* Model Table: - * Total sequences: 982 - * First 512 sequences: 0.9904642991017133 - * Next 512 sequences (512-1024): 0.009535700898286757 - * Rest: -5.377642775528102e-17 + * Total considered sequences: 1210 / 1681 + * - Positive sequences: first 612 (0.9950080943923969) + * - Probable sequences: next 215 (827-612) (0.003994106293262911) + * - Neutral sequences: last 854 (0.0009977993143401864) + * - Negative sequences: 471 (off-ratio) * Negative sequences: TODO */ static const PRUint8 LatvianLangModel[] = { - 2,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,3,0,3,3,2,2,3,2,2,2,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,2,3,3,3,3,3,2,3,3,3,2,3,0,0,2,2,0,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,2,3,2,0,2,2,2,3,2,2,0,0,0,2,2,0,0,0,2,2, - 3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,3,0,3,3,2,3,2,2,2,2,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,3,3,3,0,0,2,0,2,2,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,2,2,3,3,2,3,2,2,2,2,0,2,2,2,2, - 3,3,3,2,3,3,2,3,3,3,2,3,3,3,3,3,3,3,2,3,3,2,3,3,3,0,3,0,2,2,2,2,3,2,0,0,2,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,3,3,3,2,3,3,3,0,3,0,2,2,2,0,0,3,2,0,0,2,0,0,2, - 2,2,3,2,3,3,2,3,0,3,0,3,3,3,3,3,3,3,0,2,3,0,3,3,3,3,3,0,0,2,0,2,2,0,0,0,0,0,0,0, - 3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,0,3,2,2,0,2,2,2,2,2,2,0,0,0, - 3,2,3,2,3,3,3,3,2,3,2,3,3,3,3,3,3,3,0,3,3,2,3,3,3,3,3,0,2,3,2,3,2,2,2,2,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,2,2,2,2,2,2,3,3,2,0,3,2,2,0,0,0,0,2,0,0,2,2,2,0, - 3,3,3,3,3,2,3,3,3,2,3,3,2,3,2,3,3,3,3,2,0,3,2,2,0,2,0,3,0,0,0,2,0,2,0,2,2,0,2,0, - 3,3,3,3,2,3,3,3,3,2,3,2,3,0,3,2,2,2,3,2,3,3,2,2,2,0,0,3,0,3,0,0,0,0,2,0,2,0,2,0, - 3,3,3,3,2,2,3,2,3,2,3,2,2,2,2,3,3,2,3,2,2,3,2,0,2,2,0,2,0,0,0,0,0,2,0,0,0,0,0,0, - 3,3,3,3,0,2,3,3,3,2,3,2,2,2,2,0,2,0,2,2,0,3,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,2,3,2,3,2,2,2,2,2,0,2,2,0,0,0,0,2,0,0,0, - 3,3,3,3,2,3,3,2,3,3,3,2,2,2,2,2,2,2,3,0,2,3,2,2,0,0,2,3,2,0,0,0,2,2,0,0,0,2,2,0, - 0,0,3,0,3,3,0,3,0,3,0,3,3,3,3,3,3,3,0,3,3,0,3,3,3,2,2,0,0,2,2,0,2,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,2,3,3,3,2,2,2,2,0,2,0,0,2,2,2,0,3,0,2,3,3,2,2,0,0,0,2,0,0,2,0,2,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,3,2,3,0,0,2,2,0,0,0,0,2,2,0,0,0,0,0, - 2,0,3,0,3,3,2,3,0,3,0,3,3,3,3,3,2,2,0,3,2,0,3,3,2,2,3,0,0,2,2,3,0,0,0,0,0,0,0,0, - 3,3,3,3,2,2,3,2,3,2,3,3,2,2,2,0,2,2,3,0,2,3,2,2,0,0,0,2,3,0,0,2,0,2,0,0,0,0,0,0, - 3,3,3,3,3,2,3,3,3,3,3,2,2,2,3,2,2,2,3,2,2,3,0,0,2,0,2,2,0,0,3,0,2,0,0,0,0,0,0,0, - 3,3,2,3,0,0,3,2,3,0,3,0,2,2,2,2,2,2,0,2,0,3,2,3,0,0,0,2,0,0,2,2,0,0,0,0,0,0,0,0, - 3,3,3,3,2,3,3,2,2,3,3,2,0,0,0,0,0,0,2,2,0,2,0,2,0,2,0,2,0,0,0,0,0,0,0,0,2,0,2,0, - 3,3,2,3,0,2,3,2,3,2,3,2,2,2,2,2,2,0,2,0,0,2,2,2,2,0,2,2,0,0,2,3,0,0,0,0,0,0,0,0, - 0,2,3,0,3,3,0,3,0,3,2,3,2,3,3,2,3,0,0,2,3,0,3,2,0,2,0,0,2,2,0,0,0,0,0,0,0,0,0,0, - 3,3,2,3,2,2,2,3,2,2,3,2,2,0,0,2,0,0,2,0,0,2,0,0,0,0,0,2,2,0,0,0,0,2,2,0,0,0,0,0, - 3,3,2,3,0,2,3,2,3,2,3,2,2,0,2,0,0,0,2,0,2,2,0,0,2,0,0,2,0,0,2,2,0,0,0,0,0,0,0,0, - 3,3,2,3,2,0,2,0,2,0,2,0,0,0,0,0,0,0,3,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,3,0,3,0,0,2,0,0,0,2,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,2,2,3,0,0,3,2,2,0,2,2,2,0,0,0,2,0,2,0,0,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0, - 2,2,2,2,0,2,2,2,0,2,2,2,2,2,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0, - 2,2,2,2,2,0,0,0,0,2,2,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0, - 2,2,0,0,0,0,2,0,0,0,2,0,0,0,2,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0, - 0,0,2,0,2,2,0,2,0,2,2,0,0,2,0,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,2,0,0,0,0,2,0,2,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0, - 0,0,2,0,0,2,0,2,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,1,3,2,3,3,3,3,3,3,3,2,3, + 3,2,3,3,3,3,0,3,3,3,3,2,2,3,3,2,0,0,0,0,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3, + 3,2,3,3,3,3,1,3,3,3,3,3,2,3,0,2,0,1,0,0,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3, + 3,3,3,1,0,2,3,2,3,1,0,1,2,0,2,0,0,1,1,1,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3, + 3,1,3,3,3,3,0,3,3,3,3,3,3,3,3,2,0,0,0,0,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,2,3,3,3,0,1,2,3,1,2,2,0,1,0,1,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3, + 3,3,3,3,3,3,3,3,3,2,3,3,2,1,3,1,0,2,1,2,0, + 3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3,3,2,3, + 3,2,3,3,3,2,0,3,2,2,2,2,1,3,1,2,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3, + 3,3,3,3,0,3,2,0,3,2,0,0,1,3,2,0,0,1,0,0,1, + 2,2,3,1,3,3,1,3,0,3,0,3,3,3,3,3,3,3,0,3, + 3,0,3,3,3,3,0,3,1,3,1,1,0,2,0,0,0,0,0,0,0, + 3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,0,3,3,0,3,2,1,1,1,2,2,1,0,2,1,1,0, + 3,2,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,1,3, + 3,2,3,3,3,3,0,3,3,2,3,2,3,3,2,2,0,0,0,1,0, + 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,1,1,2,2, + 2,2,3,3,3,1,3,3,3,0,0,0,1,0,2,0,0,0,1,1,0, + 3,3,3,3,2,3,3,3,3,2,3,2,2,3,3,2,2,2,3,2, + 3,3,1,1,2,2,3,0,2,3,0,0,2,0,2,0,0,1,1,0,0, + 3,3,3,3,3,2,3,3,3,2,3,3,3,3,2,3,3,3,3,3, + 2,3,2,3,0,1,3,0,1,1,2,0,0,0,2,1,0,1,1,1,0, + 3,3,3,3,1,2,3,3,3,3,3,3,3,1,2,3,3,2,3,2, + 2,3,1,1,0,1,0,1,1,0,0,0,0,0,1,1,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,2,2,2,2,3,1,1,0,0,1,2,0,3,1,2,0,0, + 3,3,3,3,1,1,3,3,3,1,3,1,2,1,2,0,1,1,2,0, + 0,3,1,1,0,1,3,0,0,0,0,0,0,0,0,0,0,0,0,1,0, + 3,3,3,3,2,3,3,3,3,3,3,3,2,1,3,2,3,2,3,1, + 1,3,1,1,0,1,3,3,2,1,0,0,0,0,1,0,0,1,0,1,0, + 0,1,3,1,3,3,0,3,0,3,0,3,3,3,3,3,3,3,1,3, + 3,0,3,3,3,1,0,3,0,2,1,2,0,2,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,1,2,2,3,2,0,1,2,2, + 2,2,0,3,1,2,3,3,2,2,0,0,1,0,2,0,0,1,0,1,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3, + 2,3,3,2,3,1,2,0,1,0,0,1,2,0,2,0,0,0,0,0,0, + 0,0,3,1,3,3,1,3,0,3,0,3,3,3,3,2,3,2,0,3, + 3,1,3,3,2,1,0,3,0,2,3,2,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,1,3,3,3,3,2,2,2,2,1,3,3,2, + 2,3,2,0,0,0,1,0,3,0,1,0,0,1,3,0,0,0,0,0,2, + 3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,2,0,3,2, + 2,3,0,1,2,0,1,2,0,0,0,3,1,2,0,0,0,0,0,0,0, + 3,3,2,3,1,1,3,2,3,2,3,1,1,1,2,2,0,2,0,1, + 1,3,1,3,1,0,2,0,0,0,2,3,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,1,1,3,3,1,0,2,0,1,2,0,2,2, + 1,3,1,0,0,3,2,0,0,0,0,0,0,0,0,0,0,1,0,1,0, + 0,0,3,0,3,3,0,3,0,3,1,3,3,2,3,3,2,1,0,3, + 3,0,3,3,0,1,1,0,1,2,0,2,0,0,0,0,0,0,0,0,0, + 3,3,2,3,2,2,3,1,3,2,3,1,1,2,2,1,2,1,1,0, + 1,2,0,3,3,0,3,1,0,0,3,2,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,2,3,1,0,2,0,0,0,1,3,0, + 0,2,1,0,0,0,2,1,1,0,0,0,2,0,1,0,0,1,0,0,0, + 3,3,3,3,0,2,3,3,3,2,3,2,1,2,2,1,1,1,3,0, + 0,2,1,1,3,0,2,0,0,0,1,0,0,0,1,0,0,0,0,0,0, + 2,3,0,3,0,0,3,0,0,0,0,0,0,0,0,0,0,0,2,0, + 0,3,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, + 3,3,1,3,1,0,3,0,2,0,2,0,0,0,1,0,0,0,3,0, + 0,3,0,0,0,0,2,0,0,0,0,1,0,0,0,0,0,0,0,0,0, + 3,3,3,3,1,1,1,1,0,2,3,1,0,1,0,0,1,0,0,0, + 0,1,1,0,0,0,0,0,2,0,0,0,2,0,2,0,0,0,0,0,0, + 3,3,3,3,1,0,3,1,2,0,2,2,0,0,1,2,0,0,1,0, + 0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 2,2,3,2,2,2,2,2,0,2,2,2,2,2,1,1,0,3,0,0, + 0,0,3,0,0,1,0,0,0,0,0,0,2,0,1,0,0,0,0,0,0, + 2,2,1,1,2,0,1,0,0,1,1,1,1,1,1,2,0,0,0,0, + 0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,2,0,0,0,0,0, + 0,1,1,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,0,0,1,1,1,0,1,0,1,1,0,1,1,1,0,0,0,0,0, + 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0, + 0,0,0,0,1,1,1,2,0,1,0,0,2,1,0,1,0,0,0,1, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,1,0,1,1,0,2,0,1,0,1,1,1,0,0,0,1,0,1, + 0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0, + 1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1, }; @@ -197,8 +241,8 @@ const SequenceModel Iso_8859_4LatvianModel = { Iso_8859_4_CharToOrderMap, LatvianLangModel, - 40, - (float)0.9904642991017133, + 41, + (float)0.9990022006856598, PR_TRUE, "ISO-8859-4", "lv" @@ -208,8 +252,8 @@ const SequenceModel Iso_8859_10LatvianModel = { Iso_8859_10_CharToOrderMap, LatvianLangModel, - 40, - (float)0.9904642991017133, + 41, + (float)0.9990022006856598, PR_TRUE, "ISO-8859-10", "lv" @@ -219,8 +263,8 @@ const SequenceModel Iso_8859_13LatvianModel = { Iso_8859_13_CharToOrderMap, LatvianLangModel, - 40, - (float)0.9904642991017133, + 41, + (float)0.9990022006856598, PR_TRUE, "ISO-8859-13", "lv" @@ -230,8 +274,11 @@ const LanguageModel LatvianModel = { "lv", Unicode_CharOrder, - 80, + 82, LatvianLangModel, - 40, - (float)0.9904642991017133, + 41, + 4, + (float)0.3598243274225239, + 22, + (float)0.0337872650940117, }; diff --git a/src/LangModels/LangLithuanianModel.cpp b/src/LangModels/LangLithuanianModel.cpp index d2fa554..4e742ef 100644 --- a/src/LangModels/LangLithuanianModel.cpp +++ b/src/LangModels/LangLithuanianModel.cpp @@ -42,7 +42,7 @@ /** * Generated by BuildLangModel.py - * On: 2021-03-16 19:26:36.950339 + * On: 2022-12-14 18:30:25.794872 **/ /* Character Mapping Table: @@ -68,18 +68,18 @@ static const unsigned char Iso_8859_4_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 1, 18, 23, 12, 4, 25, 16, 29, 0, 14, 9, 10, 11, 7, 3, /* 4X */ - 15, 39, 5, 2, 6, 8, 13, 33, 32, 19, 28,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 1, 18, 23, 12, 4, 25, 16, 29, 0, 14, 9, 10, 11, 7, 3, /* 6X */ - 15, 39, 5, 2, 6, 8, 13, 33, 32, 19, 28,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 1, 18, 23, 12, 5, 28, 16, 29, 0, 15, 9, 10, 11, 7, 3, /* 4X */ + 14, 35, 4, 2, 6, 8, 13, 33, 32, 19, 25,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 1, 18, 23, 12, 5, 28, 16, 29, 0, 15, 9, 10, 11, 7, 3, /* 6X */ + 14, 35, 4, 2, 6, 8, 13, 33, 32, 19, 25,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM, 27, 68, 69,SYM, 40, 60,SYM,SYM, 21, 56, 70, 71,SYM, 22,SYM, /* AX */ - SYM, 27,SYM, 72,SYM, 40, 60,SYM,SYM, 21, 56, 73, 74, 67, 22, 67, /* BX */ - 45, 35, 75, 37, 41, 49, 54, 30, 24, 36, 31, 76, 17, 43, 77, 50, /* CX */ - 63, 61, 42, 78, 62, 46, 38,SYM, 55, 20, 52, 79, 51, 44, 26, 59, /* DX */ - 45, 35, 80, 37, 41, 49, 54, 30, 24, 36, 31, 81, 17, 43, 82, 50, /* EX */ - 63, 61, 42, 83, 62, 46, 38,SYM, 55, 20, 52, 84, 51, 44, 26,SYM, /* FX */ + SYM, 26, 68, 69,SYM, 66, 56,SYM,SYM, 21, 49, 70, 71,SYM, 22,SYM, /* AX */ + SYM, 26,SYM, 72,SYM, 66, 56,SYM,SYM, 21, 49, 73, 74, 75, 22, 76, /* BX */ + 40, 38, 77, 53, 37, 47, 59, 30, 24, 34, 31, 46, 17, 39, 65, 44, /* CX */ + 58, 51, 41, 63, 54, 60, 36,SYM, 55, 20, 50, 64, 42, 67, 27, 52, /* DX */ + 40, 38, 78, 53, 37, 47, 59, 30, 24, 34, 31, 46, 17, 39, 65, 44, /* EX */ + 58, 51, 41, 63, 54, 60, 36,SYM, 55, 20, 50, 64, 42, 67, 27,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -89,18 +89,18 @@ static const unsigned char Iso_8859_10_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 1, 18, 23, 12, 4, 25, 16, 29, 0, 14, 9, 10, 11, 7, 3, /* 4X */ - 15, 39, 5, 2, 6, 8, 13, 33, 32, 19, 28,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 1, 18, 23, 12, 4, 25, 16, 29, 0, 14, 9, 10, 11, 7, 3, /* 6X */ - 15, 39, 5, 2, 6, 8, 13, 33, 32, 19, 28,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 1, 18, 23, 12, 5, 28, 16, 29, 0, 15, 9, 10, 11, 7, 3, /* 4X */ + 14, 35, 4, 2, 6, 8, 13, 33, 32, 19, 25,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 1, 18, 23, 12, 5, 28, 16, 29, 0, 15, 9, 10, 11, 7, 3, /* 6X */ + 14, 35, 4, 2, 6, 8, 13, 33, 32, 19, 25,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM, 27, 56, 85, 50, 40, 86,SYM, 60, 63, 21, 87, 22,SYM, 26, 67, /* AX */ - SYM, 27, 56, 88, 50, 40, 89,SYM, 60, 63, 21, 90, 22, 91, 26, 67, /* BX */ - 45, 35, 92, 37, 41, 49, 54, 30, 24, 36, 31, 93, 17, 43, 94, 58, /* CX */ - 65, 61, 42, 34, 62, 46, 38, 44, 55, 20, 52, 95, 51, 48, 96, 59, /* DX */ - 45, 35, 97, 37, 41, 49, 54, 30, 24, 36, 31, 98, 17, 43, 99, 58, /* EX */ - 65, 61, 42, 34, 62, 46, 38, 44, 55, 20, 52,100, 51, 48,101,102, /* FX */ + SYM, 26, 49, 79, 44, 66, 63,SYM, 56, 58, 21, 80, 22,SYM, 27, 81, /* AX */ + SYM, 26, 49, 82, 44, 66, 63,SYM, 56, 58, 21, 83, 22, 84, 27, 85, /* BX */ + 40, 38, 86, 53, 37, 47, 59, 30, 24, 34, 31, 46, 17, 39, 65, 57, /* CX */ + 87, 51, 41, 45, 54, 60, 36, 67, 55, 20, 50, 64, 42, 88, 89, 52, /* DX */ + 40, 38, 90, 53, 37, 47, 59, 30, 24, 34, 31, 46, 17, 39, 65, 57, /* EX */ + 91, 51, 41, 45, 54, 60, 36, 67, 55, 20, 50, 64, 42, 92, 93, 94, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -110,86 +110,82 @@ static const unsigned char Iso_8859_13_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 1, 18, 23, 12, 4, 25, 16, 29, 0, 14, 9, 10, 11, 7, 3, /* 4X */ - 15, 39, 5, 2, 6, 8, 13, 33, 32, 19, 28,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 1, 18, 23, 12, 4, 25, 16, 29, 0, 14, 9, 10, 11, 7, 3, /* 6X */ - 15, 39, 5, 2, 6, 8, 13, 33, 32, 19, 28,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 1, 18, 23, 12, 5, 28, 16, 29, 0, 15, 9, 10, 11, 7, 3, /* 4X */ + 14, 35, 4, 2, 6, 8, 13, 33, 32, 19, 25,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 1, 18, 23, 12, 5, 28, 16, 29, 0, 15, 9, 10, 11, 7, 3, /* 6X */ + 14, 35, 4, 2, 6, 8, 13, 33, 32, 19, 25,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 55,SYM,103,SYM,SYM,SYM,SYM, 54, /* AX */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 55,SYM,104,SYM,SYM,SYM,SYM, 54, /* BX */ - 27, 30, 45, 53, 41, 49, 31, 56, 24, 36,105, 17,106,107, 50, 60, /* CX */ - 21, 57, 61, 34, 42, 46, 38,SYM, 20, 47, 64, 26, 51, 66, 22, 59, /* DX */ - 27, 30, 45, 53, 41, 49, 31, 56, 24, 36,108, 17,109,110, 50, 60, /* EX */ - 21, 57, 61, 34, 42, 46, 38,SYM, 20, 47, 64, 26, 51, 66, 22,SYM, /* FX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 55,SYM, 95,SYM,SYM,SYM,SYM, 59, /* AX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 55,SYM, 96,SYM,SYM,SYM,SYM, 59, /* BX */ + 26, 30, 40, 48, 37, 47, 31, 49, 24, 34, 97, 17, 98, 63, 44, 56, /* CX */ + 21, 99, 51, 45, 41, 60, 36,SYM, 20, 43, 61, 27, 42, 62, 22, 52, /* DX */ + 26, 30, 40, 48, 37, 47, 31, 49, 24, 34,100, 17,101, 63, 44, 56, /* EX */ + 21,102, 51, 45, 41, 60, 36,SYM, 20, 43, 61, 27, 42, 62, 22,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const int Unicode_Char_size = 80; +static const int Unicode_Char_size = 72; static const unsigned int Unicode_CharOrder[] = { - 65, 1, 66, 18, 67, 23, 68, 12, 69, 4, 70, 25, 71, 16, 72, 29, - 73, 0, 74, 14, 75, 9, 76, 10, 77, 11, 78, 7, 79, 3, 80, 15, - 81, 39, 82, 5, 83, 2, 84, 6, 85, 8, 86, 13, 87, 33, 88, 32, - 89, 19, 90, 28, 97, 1, 98, 18, 99, 23, 100, 12, 101, 4,102, 25, - 103, 16, 104, 29, 105, 0, 106, 14, 107, 9, 108, 10, 109, 11,110, 7, - 111, 3, 112, 15, 113, 39, 114, 5, 115, 2, 116, 6, 117, 8,118, 13, - 119, 33, 120, 32, 121, 19, 122, 28, 193, 35, 195, 37, 201, 36,211, 34, - 214, 38, 225, 35, 227, 37, 233, 36, 243, 34, 246, 38, 260, 27,261, 27, + 65, 1, 66, 18, 67, 23, 68, 12, 69, 5, 70, 28, 71, 16, 72, 29, + 73, 0, 74, 15, 75, 9, 76, 10, 77, 11, 78, 7, 79, 3, 80, 14, + 81, 35, 82, 4, 83, 2, 84, 6, 85, 8, 86, 13, 87, 33, 88, 32, + 89, 19, 90, 25, 97, 1, 98, 18, 99, 23, 100, 12, 101, 5,102, 28, + 103, 16, 104, 29, 105, 0, 106, 15, 107, 9, 108, 10, 109, 11,110, 7, + 111, 3, 112, 14, 113, 35, 114, 4, 115, 2, 116, 6, 117, 8,118, 13, + 119, 33, 120, 32, 121, 19, 122, 25, 201, 34, 233, 34, 260, 26,261, 26, 268, 24, 269, 24, 278, 17, 279, 17, 280, 31, 281, 31, 302, 30,303, 30, - 352, 21, 353, 21, 362, 26, 363, 26, 370, 20, 371, 20, 381, 22,382, 22, + 352, 21, 353, 21, 362, 27, 363, 27, 370, 20, 371, 20, 381, 22,382, 22, }; /* Model Table: - * Total sequences: 1138 - * First 512 sequences: 0.9919219576954762 - * Next 512 sequences (512-1024): 0.007740222486946524 - * Rest: 0.00033781981757727893 + * Total considered sequences: 1175 / 1296 + * - Positive sequences: first 557 (0.995037232341861) + * - Probable sequences: next 229 (786-557) (0.0039665071105200456) + * - Neutral sequences: last 510 (0.0009962605476189212) + * - Negative sequences: 121 (off-ratio) * Negative sequences: TODO */ static const PRUint8 LithuanianLangModel[] = { - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,3,3,2,0,2,3,2,2,2,2,2,2,2, - 3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,0,3,3,3,3,3,0,0,3,3,0,0,1,3,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,2,0,3,3,2,3,3,2,3,2,2,0,2,2,2,2,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,0,3,3,3,2,3,0,0,3,3,0,0,2,2,0,0,0,2,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,0,3,3,3,3,3,0,0,3,2,2,0,2,3,0,0,0,0,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,3,3,1,2,2,2,2,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,2,2,3,3,3,3,2,2,2,0,2,3,3,2,3,3,3,0,2,0,2,2,1,2,0, - 3,3,3,3,3,3,3,3,3,3,2,2,3,2,2,2,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,3,2,1,2,0,2,2,0,1, - 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,0,3,2,0,3,3,3,3,2,0,0,3,2,0,0,0,2,0,2,0,0,0,0, - 3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,2,2,3,1,3,3,3,0,3,1,2,3,3,2,3,2,3,0,2,1,2,1,2,1,0, - 3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,2,2,2,3,3,3,2,3,3,3,0,2,1,2,2,0,2,0, - 3,3,3,3,3,2,3,2,3,2,2,3,2,2,2,3,2,3,3,3,3,2,3,2,1,2,3,3,2,0,0,3,0,2,3,2,2,2,1,0, - 3,3,3,3,3,3,2,2,3,3,2,3,2,3,2,2,2,3,2,3,3,1,3,2,0,2,2,3,2,2,2,3,0,2,2,2,2,0,2,2, - 3,3,3,3,3,2,2,2,3,2,3,2,2,0,2,2,2,3,0,3,3,2,0,2,0,0,2,3,3,0,2,3,0,0,2,2,2,0,0,0, - 3,3,2,3,3,2,2,2,3,2,0,0,0,0,0,2,2,3,0,2,3,1,0,0,0,0,3,3,0,0,3,3,0,0,2,2,2,0,2,0, - 3,3,3,3,3,3,3,2,3,3,3,2,2,3,3,2,2,3,0,3,2,3,2,2,2,2,2,2,0,3,2,2,0,1,0,1,2,1,0,0, - 3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,2,3,2,3,3,2,2,2,0,0,3,3,3,3,2,2,0,2,0,0,0,2,2,0, - 2,0,3,0,0,3,3,3,2,3,3,3,3,3,3,2,3,0,2,0,0,2,3,2,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,2,3,2,3,0,2,2,2,0,0,3,2,2,3,2,2,2,0,0,3,3,2,2,1,3,0,2,0,1,0,0,1,0, - 2,3,3,2,2,3,3,3,2,3,3,3,3,3,3,3,3,0,3,2,0,3,2,2,3,2,2,0,3,0,0,0,0,0,0,2,0,0,0,0, - 0,0,2,0,0,0,0,0,0,0,0,0,1,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,3,2,3,2,1,2,2,3,1,2,2,0,0,3,2,0,0,0,0,0,1,0,0, - 3,3,3,3,3,2,3,3,3,2,2,3,3,3,2,2,2,3,2,3,2,2,0,0,0,2,2,2,0,0,2,2,0,0,0,0,0,1,0,0, - 3,3,2,3,3,2,3,0,3,3,3,2,2,2,0,0,2,2,2,2,0,0,0,2,0,2,3,2,2,3,0,0,0,0,0,2,2,0,0,2, - 3,3,0,2,3,0,0,0,2,2,0,0,1,0,0,2,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0, - 3,3,2,3,3,3,2,0,3,2,3,2,0,0,2,0,2,2,2,2,2,0,0,2,0,2,2,1,0,0,0,0,0,0,0,1,0,0,2,0, - 1,0,3,0,0,3,3,3,0,3,2,3,3,2,0,2,2,0,2,0,0,3,2,0,3,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0, - 0,0,3,0,0,2,0,0,0,2,2,2,0,2,3,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,2,3,3,2,2,2,3,2,3,3,3,2,0,2,2,2,2,3,3,0,0,2,0,0,2,2,2,2,0,2,0,2,0,2,2,0,0,0, - 3,3,2,3,3,3,3,3,3,2,2,2,2,2,2,0,0,1,2,3,2,0,0,0,0,2,2,2,0,0,0,0,0,2,0,2,0,0,0,0, - 0,2,3,0,2,3,3,2,0,3,3,2,2,3,0,3,3,2,2,0,0,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0, - 3,2,0,0,2,2,2,0,2,0,0,0,0,3,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0, - 3,3,2,2,3,2,0,2,2,0,2,0,0,0,0,0,0,0,0,2,0,0,0,1,0,0,0,0,0,2,0,0,0,2,0,0,0,0,1,0, - 1,0,0,0,0,2,2,2,0,3,2,0,2,0,2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0, - 2,0,2,0,2,2,0,2,2,0,2,2,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0, - 2,0,2,0,0,2,1,2,0,0,1,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, - 0,0,1,2,0,1,2,2,0,2,2,2,2,2,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,2,0,0,2,2,2,0,2,2,2,2,0,0,0,2,0,0,0,0,0,0,1,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,2,0,1,3,2,0,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,3,3,3,3,3,0,1,3,3,0,0,2,3,0,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,2,1,3,3,2,3,3,3,3,3,2,0,2,2,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,0,3,3,3,3,3,0,0,3,3,0,0,2,3,0,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,1,2,2,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,0,3,3,3,3,3,0,1,3,2,3,0,2,3,0,2, + 3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,3,3,3,3,3,2,2,2,0,2,3,3,2,3,3,3,0,2,1,1, + 3,3,3,3,3,3,3,3,3,3,2,2,3,3,2,2,3,3,3,3,3,2,3,3,3,3,3,2,3,2,3,3,1,1,1,0, + 3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,1,3,2,0,3,3,3,3,3,0,0,3,2,0,0,2,1,1,1, + 3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,1,2,3,2,3,3,3,0,3,2,0,3,3,2,3,3,3,0,2,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,3,3,3,3,3,3,0,1,2,0, + 3,3,3,3,2,3,3,2,3,2,3,3,2,2,3,2,2,3,3,3,3,2,3,2,1,3,3,3,2,1,1,3,1,2,2,0, + 3,3,3,3,3,3,2,3,3,3,3,3,2,3,2,2,2,3,2,3,3,1,3,2,0,3,3,3,0,2,3,3,0,2,2,0, + 3,3,3,3,2,3,1,2,3,1,3,1,2,1,2,2,2,3,1,3,3,1,0,1,2,3,3,3,1,0,2,3,0,0,1,0, + 3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,2,3,2,3,1,2,1,1,3,3,1,3,2,2,0,0,1,0, + 3,3,1,3,2,3,2,1,3,2,0,0,1,0,1,0,1,3,0,2,3,1,0,1,0,0,3,3,1,0,3,3,0,0,0,0, + 3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,1,2,3,2,3,3,2,3,1,1,3,3,3,1,3,2,3,0,2,1,0, + 0,0,3,0,3,0,3,3,1,3,3,3,3,3,3,3,3,0,2,0,0,2,3,2,2,2,0,0,2,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,2,3,2,3,1,3,2,1,2,0,3,2,3,3,1,1,1,1,3,3,3,1,2,1,3,0,1,0,0, + 2,3,3,2,3,3,3,3,2,3,3,3,3,3,3,3,3,0,3,1,0,3,3,2,3,3,0,1,1,1,0,0,1,0,0,0, + 0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,1,3,3,2,3,2,2,2,0,3,0,2,2,1,0,2,2,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,3,2,3,2,1,0,0,1,0,2,2,2,1,2,1,0,0,0,0, + 3,3,2,3,3,3,3,1,3,3,3,2,2,1,0,1,2,3,1,2,1,0,1,2,0,2,2,3,1,3,0,0,1,0,1,2, + 3,3,0,2,0,3,0,1,2,2,0,0,2,0,0,0,0,2,0,2,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0, + 3,3,2,3,2,3,2,2,3,1,3,3,3,2,1,0,2,3,2,3,3,0,0,0,0,2,1,2,0,1,1,2,0,1,1,0, + 0,0,3,0,2,0,0,1,0,2,3,1,0,2,0,3,0,0,0,0,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,1,3,0,3,0,3,3,0,3,2,3,3,2,2,1,3,0,2,0,0,3,2,0,3,3,0,0,0,0,0,0,0,0,0,0, + 3,3,2,3,3,3,3,0,3,2,3,1,1,1,0,2,2,2,0,1,1,0,0,2,0,0,1,1,2,1,0,0,0,0,2,0, + 3,3,2,3,3,3,3,3,3,2,2,3,2,1,0,2,1,2,1,3,2,0,0,0,0,0,1,1,0,0,0,0,0,2,0,1, + 0,1,3,0,3,1,3,2,0,3,2,2,2,3,3,2,3,2,1,0,0,1,3,1,0,0,0,0,1,0,0,0,0,0,0,0, + 0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,2,0,1,1,2,2,0,1,0,1,0,0,3,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,1,0,0,3,0,0,0, + 3,3,2,2,2,3,0,2,1,1,1,1,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,2,0,0,0,2,0,0, + 0,0,2,1,2,1,1,2,0,0,1,2,2,1,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,1, + 1,0,1,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, }; @@ -197,8 +193,8 @@ const SequenceModel Iso_8859_4LithuanianModel = { Iso_8859_4_CharToOrderMap, LithuanianLangModel, - 40, - (float)0.9919219576954762, + 36, + (float)0.9990037394523811, PR_TRUE, "ISO-8859-4", "lt" @@ -208,8 +204,8 @@ const SequenceModel Iso_8859_10LithuanianModel = { Iso_8859_10_CharToOrderMap, LithuanianLangModel, - 40, - (float)0.9919219576954762, + 36, + (float)0.9990037394523811, PR_TRUE, "ISO-8859-10", "lt" @@ -219,8 +215,8 @@ const SequenceModel Iso_8859_13LithuanianModel = { Iso_8859_13_CharToOrderMap, LithuanianLangModel, - 40, - (float)0.9919219576954762, + 36, + (float)0.9990037394523811, PR_TRUE, "ISO-8859-13", "lt" @@ -230,8 +226,11 @@ const LanguageModel LithuanianModel = { "lt", Unicode_CharOrder, - 80, + 72, LithuanianLangModel, - 40, - (float)0.9919219576954762, + 36, + 4, + (float)0.39411092741692494, + 23, + (float)0.03166060999861138, }; diff --git a/src/LangModels/LangMalteseModel.cpp b/src/LangModels/LangMalteseModel.cpp index e0bdf42..6beb493 100644 --- a/src/LangModels/LangMalteseModel.cpp +++ b/src/LangModels/LangMalteseModel.cpp @@ -42,7 +42,7 @@ /** * Generated by BuildLangModel.py - * On: 2021-03-16 19:33:28.446672 + * On: 2022-12-14 18:10:59.208511 **/ /* Character Mapping Table: @@ -68,75 +68,76 @@ static const unsigned char Iso_8859_3_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 1, 15, 28, 13, 4, 16, 18, 22, 0, 9, 12, 3, 10, 5, 8, /* 4X */ - 14, 27, 6, 11, 2, 7, 25, 19, 26, 30, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 1, 15, 28, 13, 4, 16, 18, 22, 0, 9, 12, 3, 10, 5, 8, /* 6X */ - 14, 27, 6, 11, 2, 7, 25, 19, 26, 30, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 0, 14, 28, 13, 4, 16, 17, 21, 1, 9, 12, 2, 10, 5, 8, /* 4X */ + 15, 27, 6, 11, 3, 7, 24, 19, 26, 30, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 14, 28, 13, 4, 16, 17, 21, 1, 9, 12, 2, 10, 5, 8, /* 6X */ + 15, 27, 6, 11, 3, 7, 24, 19, 26, 30, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM, 17,SYM,SYM,SYM,ILL, 49,SYM,SYM, 50, 51, 52, 53,SYM,ILL, 21, /* AX */ - SYM, 17,SYM,SYM,SYM,SYM, 54,SYM,SYM, 55, 56, 57, 58,SYM,ILL, 21, /* BX */ - 29, 36, 48,ILL, 41, 24, 59, 40, 33, 31, 60, 39, 46, 35, 61, 62, /* CX */ - ILL, 38, 32, 34, 43, 23, 37,SYM, 63, 47, 44, 64, 45, 65, 66, 42, /* DX */ - 29, 36, 48,ILL, 41, 24, 67, 40, 33, 31, 68, 39, 46, 35, 69, 70, /* EX */ - ILL, 38, 32, 34, 43, 23, 37,SYM, 71, 47, 44, 72, 45, 73, 74,SYM, /* FX */ + SYM, 18,SYM,SYM,SYM,ILL, 51,SYM,SYM, 52, 53, 40, 54,SYM,ILL, 22, /* AX */ + SYM, 18,SYM,SYM,SYM,SYM, 55,SYM,SYM, 43, 56, 40, 57,SYM,ILL, 22, /* BX */ + 29, 34, 49,ILL, 44, 25, 58, 50, 38, 32, 59, 41, 48, 36, 60, 61, /* CX */ + ILL, 39, 35, 33, 47, 23, 31,SYM, 62, 45, 42, 63, 37, 64, 65, 46, /* DX */ + 29, 34, 49,ILL, 44, 25, 66, 50, 38, 32, 67, 41, 48, 36, 68, 69, /* EX */ + ILL, 39, 35, 33, 47, 23, 31,SYM, 70, 45, 42, 71, 37, 72, 73,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ static const int Unicode_Char_size = 62; static const unsigned int Unicode_CharOrder[] = { - 65, 1, 66, 15, 67, 28, 68, 13, 69, 4, 70, 16, 71, 18, 72, 22, - 73, 0, 74, 9, 75, 12, 76, 3, 77, 10, 78, 5, 79, 8, 80, 14, - 81, 27, 82, 6, 83, 11, 84, 2, 85, 7, 86, 25, 87, 19, 88, 26, - 89, 30, 90, 20, 97, 1, 98, 15, 99, 28, 100, 13, 101, 4,102, 16, - 103, 18, 104, 22, 105, 0, 106, 9, 107, 12, 108, 3, 109, 10,110, 5, - 111, 8, 112, 14, 113, 27, 114, 6, 115, 11, 116, 2, 117, 7,118, 25, - 119, 19, 120, 26, 121, 30, 122, 20, 192, 29, 224, 29, 266, 24,267, 24, - 288, 23, 289, 23, 294, 17, 295, 17, 379, 21, 380, 21, + 65, 0, 66, 14, 67, 28, 68, 13, 69, 4, 70, 16, 71, 17, 72, 21, + 73, 1, 74, 9, 75, 12, 76, 2, 77, 10, 78, 5, 79, 8, 80, 15, + 81, 27, 82, 6, 83, 11, 84, 3, 85, 7, 86, 24, 87, 19, 88, 26, + 89, 30, 90, 20, 97, 0, 98, 14, 99, 28, 100, 13, 101, 4,102, 16, + 103, 17, 104, 21, 105, 1, 106, 9, 107, 12, 108, 2, 109, 10,110, 5, + 111, 8, 112, 15, 113, 27, 114, 6, 115, 11, 116, 3, 117, 7,118, 24, + 119, 19, 120, 26, 121, 30, 122, 20, 192, 29, 224, 29, 266, 25,267, 25, + 288, 23, 289, 23, 294, 18, 295, 18, 379, 22, 380, 22, }; /* Model Table: - * Total sequences: 888 - * First 512 sequences: 0.9960434044151966 - * Next 512 sequences (512-1024): 0.0039565955848034195 - * Rest: 1.5612511283791264e-17 + * Total considered sequences: 936 / 961 + * - Positive sequences: first 512 (0.9950079702120929) + * - Probable sequences: next 197 (709-512) (0.003994232243462514) + * - Neutral sequences: last 252 (0.000997797544444623) + * - Negative sequences: 25 (off-ratio) * Negative sequences: TODO */ static const PRUint8 MalteseLangModel[] = { - 3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,0,2, 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3, - 3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,3,3,3,3,3,2,0,3,0,0,3,3,3,2,3,3, - 3,3,3,3,3,2,2,3,3,3,3,3,3,3,2,3,3,3,3,3,2,0,3,3,0,3,3,3,2,0,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,0,2, + 3,3,3,3,3,2,2,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,0,3,3,0,2,3,2,0,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,3,3,3,3,2,3,1,0,3,1,3,3,2,3,3, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3, - 3,3,3,3,3,3,2,3,3,3,2,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,3,3,3,3,3,2,3,0,3, - 3,2,3,3,3,3,3,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,2, + 3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,2,3,3,0,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,3,3,3,3,2,3,0,3, + 3,3,3,3,3,3,3,1,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,2, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3, - 3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,0,3,3,2,2,2,2,3,2,0,0, - 3,3,2,3,3,3,3,3,3,2,3,3,3,3,3,3,2,3,3,3,0,2,3,2,2,2,3,2,2,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,0,3,2,0,3,0,3,3,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,3,3,2,0,3,0,0,2,0,2,0,2,0,0,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,2,3,2,3,2,0,3,2,0,2,2,0,0,2,2, - 3,3,2,3,3,3,3,3,3,3,2,3,2,2,3,0,2,0,2,0,0,0,3,2,2,2,0,0,2,0,0, - 3,3,3,3,3,3,3,3,3,3,0,3,2,3,0,3,2,3,3,3,0,3,2,0,0,0,2,3,3,0,2, - 3,3,3,3,3,3,3,3,3,3,0,3,2,2,0,2,3,0,2,0,0,0,2,0,0,0,0,3,2,0,2, - 3,3,3,3,3,2,3,3,3,3,3,3,2,3,0,3,2,3,0,2,0,3,3,2,0,0,2,3,0,0,0, - 3,3,2,3,3,3,3,3,3,2,2,2,2,3,2,3,0,3,3,3,2,3,3,0,0,3,0,0,2,2,2, - 3,3,3,3,3,3,3,2,3,2,2,3,3,3,2,2,2,2,2,3,0,2,2,3,2,2,2,2,0,0,2, - 3,3,2,2,3,2,2,3,2,3,2,0,0,0,2,0,0,0,2,2,3,0,0,0,0,2,0,2,0,0,0, - 3,3,2,3,3,2,0,3,3,3,3,0,0,3,0,2,2,0,3,2,0,2,2,0,0,3,0,0,0,0,0, - 3,3,3,2,3,2,3,3,3,0,2,2,2,2,2,2,0,0,0,2,2,0,2,0,0,2,0,2,0,0,2, - 3,3,2,3,3,3,3,3,3,3,2,0,0,3,0,2,0,2,3,2,2,2,0,3,0,2,0,0,0,2,0, - 3,3,2,2,3,0,2,2,0,3,0,0,2,0,2,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0, - 3,3,0,2,3,2,3,3,3,3,0,2,0,3,2,0,0,0,0,0,0,0,0,0,0,3,0,0,2,0,2, - 3,3,3,3,3,2,3,3,3,3,3,0,2,2,0,2,2,0,2,2,0,0,3,0,0,2,3,0,2,0,0, - 3,3,3,2,3,0,3,3,3,3,2,3,2,3,0,3,3,0,3,3,0,0,2,2,2,0,2,3,0,0,0, - 3,3,3,3,3,0,2,3,3,2,0,3,3,3,2,2,2,0,0,0,2,0,3,0,0,0,0,2,2,0,2, - 2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,2,2,2,2,2,2,0,2,2,2,2,2,2,2,2,0,0,2,2,0,0,0,0,0,2,0,0,2,0,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,1,2,2,3,2,2,1,3,0,0,0, + 3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,0,3,1,1,1,2,3,3,1,0,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,3,0,1,2,0,0,3,3,0,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,2,0,1,3,0,3,1,0,1,2,2,0,1,0,2, + 3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,2,3,3,3,0,3,0,1,1,0,2,0,1,2,2, + 3,3,3,3,3,3,3,3,3,3,0,3,2,3,3,0,1,2,3,3,0,2,2,0,0,0,1,3,3,0,2, + 3,3,3,3,3,1,3,3,3,3,2,2,1,2,1,3,0,2,0,1,0,3,0,0,1,1,1,0,1,0,0, + 3,3,3,3,3,3,3,3,3,3,1,3,1,2,2,0,3,2,0,0,0,2,0,1,0,0,0,3,2,0,2, + 3,3,3,2,3,3,3,3,3,2,2,2,1,3,2,1,0,3,3,3,1,3,3,0,3,0,0,1,1,1,2, + 3,3,3,3,3,2,3,3,3,3,3,3,2,3,3,0,2,0,3,2,0,3,2,2,0,0,1,3,0,0,0, + 3,3,3,3,3,3,3,2,3,1,2,3,3,3,1,2,1,1,2,3,0,2,3,3,1,0,2,1,0,0,2, + 3,3,1,1,3,2,2,3,2,3,2,1,0,0,2,1,1,2,0,1,3,0,0,0,0,0,0,1,0,0,0, + 3,3,3,3,3,3,3,3,3,0,2,2,2,2,1,2,1,0,0,2,1,2,0,0,1,0,0,1,2,0,2, + 3,3,3,1,3,3,2,3,3,3,3,0,0,3,3,0,2,3,0,2,0,0,3,0,3,0,0,0,0,0,0, + 3,3,3,2,3,3,3,3,3,3,1,1,0,3,2,0,0,2,1,2,1,0,2,3,1,0,0,0,0,2,0, + 3,3,2,1,3,1,3,3,3,3,0,1,1,3,1,2,0,1,0,0,0,0,0,0,3,0,0,0,1,0,2, + 3,3,2,1,3,0,1,2,1,3,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0, + 3,3,3,3,3,2,3,3,3,3,3,0,2,1,2,1,2,1,0,2,0,3,0,0,2,0,3,0,2,0,0, + 3,3,2,3,3,0,3,3,3,3,1,3,2,3,3,0,3,3,0,3,0,2,0,1,0,2,1,3,0,0,0, + 3,3,2,3,3,0,2,2,3,1,0,2,3,2,1,1,2,0,0,0,0,3,0,0,0,0,0,1,2,0,2, + 0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,2,2,1,2,2,3,2,2,2,2,2,1,2,1,1,0,0,0,1,0,0,0,0,1,0,0,0,2,0,2, }; @@ -145,7 +146,7 @@ const SequenceModel Iso_8859_3MalteseModel = Iso_8859_3_CharToOrderMap, MalteseLangModel, 31, - (float)0.9960434044151966, + (float)0.9990022024555554, PR_TRUE, "ISO-8859-3", "mt" @@ -158,5 +159,8 @@ const LanguageModel MalteseModel = 62, MalteseLangModel, 31, - (float)0.9960434044151966, + 3, + (float)0.32403525534777605, + 22, + (float)0.03601583370707642, }; diff --git a/src/LangModels/LangNorwegianModel.cpp b/src/LangModels/LangNorwegianModel.cpp index 2bc2281..bcf8d52 100644 --- a/src/LangModels/LangNorwegianModel.cpp +++ b/src/LangModels/LangNorwegianModel.cpp @@ -42,7 +42,7 @@ /** * Generated by BuildLangModel.py - * On: 2022-11-30 20:29:27.551827 + * On: 2022-12-14 18:12:08.670376 **/ /* Character Mapping Table: @@ -68,17 +68,17 @@ static const unsigned char Ibm865_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 5, 17, 23, 9, 0, 14, 10, 18, 6, 22, 11, 7, 12, 2, 8, /* 4X */ - 16, 29, 1, 4, 3, 15, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 5, 17, 23, 9, 0, 14, 10, 18, 6, 22, 11, 7, 12, 2, 8, /* 6X */ - 16, 29, 1, 4, 3, 15, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */ - 37, 36, 28, 45, 31, 43, 19, 37, 53, 39, 44, 59, 56, 54, 31, 19, /* 8X */ - 28, 24, 24, 41, 30, 48, 62, 55, 63, 30, 36, 21,SYM, 21,SYM,SYM, /* 9X */ - 33, 34, 35, 40, 49, 49,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ + SYM, 5, 17, 23, 9, 0, 14, 11, 18, 6, 21, 10, 7, 12, 2, 8, /* 4X */ + 16, 29, 1, 4, 3, 15, 13, 25, 28, 20, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 5, 17, 23, 9, 0, 14, 11, 18, 6, 21, 10, 7, 12, 2, 8, /* 6X */ + 16, 29, 1, 4, 3, 15, 13, 25, 28, 20, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */ + 39, 31, 27, 50, 30, 41, 19, 39, 45, 52, 40, 47, 54, 63, 30, 19, /* 8X */ + 27, 24, 24, 42, 32, 49, 58, 55, 59, 32, 31, 22,SYM, 22,SYM,SYM, /* 9X */ + 33, 35, 36, 46, 38, 38,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* CX */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* DX */ - 52, 60, 64, 65, 61, 61, 66, 47, 67, 68, 69, 70, 71, 72, 42,SYM, /* EX */ + 62, 48, 66, 65, 61, 61, 67, 64, 68, 69, 70, 60, 71, 72, 73,SYM, /* EX */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -89,18 +89,18 @@ static const unsigned char Iso_8859_15_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 5, 17, 23, 9, 0, 14, 10, 18, 6, 22, 11, 7, 12, 2, 8, /* 4X */ - 16, 29, 1, 4, 3, 15, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 5, 17, 23, 9, 0, 14, 10, 18, 6, 22, 11, 7, 12, 2, 8, /* 6X */ - 16, 29, 1, 4, 3, 15, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 5, 17, 23, 9, 0, 14, 11, 18, 6, 21, 10, 7, 12, 2, 8, /* 4X */ + 16, 29, 1, 4, 3, 15, 13, 25, 28, 20, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 5, 17, 23, 9, 0, 14, 11, 18, 6, 21, 10, 7, 12, 2, 8, /* 6X */ + 16, 29, 1, 4, 3, 15, 13, 25, 28, 20, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM, 58,SYM, 58,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM, 73, 74,SYM,SYM, 75,SYM,SYM,SYM, 50, 50, 76,SYM, /* BX */ - 43, 33, 45, 57, 31, 19, 24, 37, 44, 28, 53, 39, 54, 34, 56, 59, /* CX */ - 32, 49, 48, 35, 41, 46, 30,SYM, 21, 55, 40, 77, 36, 51, 38, 60, /* DX */ - 43, 33, 45, 57, 31, 19, 24, 37, 44, 28, 53, 39, 54, 34, 56, 59, /* EX */ - 32, 49, 48, 35, 41, 46, 30,SYM, 21, 55, 40, 78, 36, 51, 38, 79, /* FX */ + SYM,SYM,SYM,SYM,SYM,SYM, 53,SYM, 53,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ + SYM,SYM,SYM,SYM, 57, 74,SYM,SYM, 57,SYM,SYM,SYM, 43, 43, 59,SYM, /* BX */ + 41, 33, 50, 44, 30, 19, 24, 39, 40, 27, 45, 52, 63, 35, 54, 47, /* CX */ + 34, 38, 49, 36, 42, 56, 32,SYM, 22, 55, 46, 58, 31, 51, 37, 48, /* DX */ + 41, 33, 50, 44, 30, 19, 24, 39, 40, 27, 45, 52, 63, 35, 54, 47, /* EX */ + 34, 38, 49, 36, 42, 56, 32,SYM, 22, 55, 46, 58, 31, 51, 37, 59, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -110,18 +110,18 @@ static const unsigned char Iso_8859_1_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 5, 17, 23, 9, 0, 14, 10, 18, 6, 22, 11, 7, 12, 2, 8, /* 4X */ - 16, 29, 1, 4, 3, 15, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 5, 17, 23, 9, 0, 14, 10, 18, 6, 22, 11, 7, 12, 2, 8, /* 6X */ - 16, 29, 1, 4, 3, 15, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 5, 17, 23, 9, 0, 14, 11, 18, 6, 21, 10, 7, 12, 2, 8, /* 4X */ + 16, 29, 1, 4, 3, 15, 13, 25, 28, 20, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 5, 17, 23, 9, 0, 14, 11, 18, 6, 21, 10, 7, 12, 2, 8, /* 6X */ + 16, 29, 1, 4, 3, 15, 13, 25, 28, 20, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 80,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 43, 33, 45, 57, 31, 19, 24, 37, 44, 28, 53, 39, 54, 34, 56, 59, /* CX */ - 32, 49, 48, 35, 41, 46, 30,SYM, 21, 55, 40, 81, 36, 51, 38, 60, /* DX */ - 43, 33, 45, 57, 31, 19, 24, 37, 44, 28, 53, 39, 54, 34, 56, 59, /* EX */ - 32, 49, 48, 35, 41, 46, 30,SYM, 21, 55, 40, 82, 36, 51, 38, 83, /* FX */ + SYM,SYM,SYM,SYM,SYM, 75,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 41, 33, 50, 44, 30, 19, 24, 39, 40, 27, 45, 52, 63, 35, 54, 47, /* CX */ + 34, 38, 49, 36, 42, 56, 32,SYM, 22, 55, 46, 58, 31, 51, 37, 48, /* DX */ + 41, 33, 50, 44, 30, 19, 24, 39, 40, 27, 45, 52, 63, 35, 54, 47, /* EX */ + 34, 38, 49, 36, 42, 56, 32,SYM, 22, 55, 46, 58, 31, 51, 37, 59, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -131,75 +131,75 @@ static const unsigned char Windows_1252_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 5, 17, 23, 9, 0, 14, 10, 18, 6, 22, 11, 7, 12, 2, 8, /* 4X */ - 16, 29, 1, 4, 3, 15, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 5, 17, 23, 9, 0, 14, 10, 18, 6, 22, 11, 7, 12, 2, 8, /* 6X */ - 16, 29, 1, 4, 3, 15, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM,ILL,SYM, 84,SYM,SYM,SYM,SYM,SYM,SYM, 58,SYM, 50,ILL, 85,ILL, /* 8X */ - ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 58,SYM, 50,ILL, 86, 87, /* 9X */ + SYM, 5, 17, 23, 9, 0, 14, 11, 18, 6, 21, 10, 7, 12, 2, 8, /* 4X */ + 16, 29, 1, 4, 3, 15, 13, 25, 28, 20, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 5, 17, 23, 9, 0, 14, 11, 18, 6, 21, 10, 7, 12, 2, 8, /* 6X */ + 16, 29, 1, 4, 3, 15, 13, 25, 28, 20, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM,ILL,SYM, 76,SYM,SYM,SYM,SYM,SYM,SYM, 53,SYM, 43,ILL, 57,ILL, /* 8X */ + ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 53,SYM, 43,ILL, 57, 59, /* 9X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 88,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 43, 33, 45, 57, 31, 19, 24, 37, 44, 28, 53, 39, 54, 34, 56, 59, /* CX */ - 32, 49, 48, 35, 41, 46, 30,SYM, 21, 55, 40, 89, 36, 51, 38, 60, /* DX */ - 43, 33, 45, 57, 31, 19, 24, 37, 44, 28, 53, 39, 54, 34, 56, 59, /* EX */ - 32, 49, 48, 35, 41, 46, 30,SYM, 21, 55, 40, 90, 36, 51, 38, 91, /* FX */ + SYM,SYM,SYM,SYM,SYM, 77,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 41, 33, 50, 44, 30, 19, 24, 39, 40, 27, 45, 52, 63, 35, 54, 47, /* CX */ + 34, 38, 49, 36, 42, 56, 32,SYM, 22, 55, 46, 58, 31, 51, 37, 48, /* DX */ + 41, 33, 50, 44, 30, 19, 24, 39, 40, 27, 45, 52, 63, 35, 54, 47, /* EX */ + 34, 38, 49, 36, 42, 56, 32,SYM, 22, 55, 46, 58, 31, 51, 37, 59, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ static const int Unicode_Char_size = 60; static const unsigned int Unicode_CharOrder[] = { - 65, 5, 66, 17, 67, 23, 68, 9, 69, 0, 70, 14, 71, 10, 72, 18, - 73, 6, 74, 22, 75, 11, 76, 7, 77, 12, 78, 2, 79, 8, 80, 16, - 81, 29, 82, 1, 83, 4, 84, 3, 85, 15, 86, 13, 87, 25, 88, 27, + 65, 5, 66, 17, 67, 23, 68, 9, 69, 0, 70, 14, 71, 11, 72, 18, + 73, 6, 74, 21, 75, 10, 76, 7, 77, 12, 78, 2, 79, 8, 80, 16, + 81, 29, 82, 1, 83, 4, 84, 3, 85, 15, 86, 13, 87, 25, 88, 28, 89, 20, 90, 26, 97, 5, 98, 17, 99, 23, 100, 9, 101, 0,102, 14, - 103, 10, 104, 18, 105, 6, 106, 22, 107, 11, 108, 7, 109, 12,110, 2, + 103, 11, 104, 18, 105, 6, 106, 21, 107, 10, 108, 7, 109, 12,110, 2, 111, 8, 112, 16, 113, 29, 114, 1, 115, 4, 116, 3, 117, 15,118, 13, - 119, 25, 120, 27, 121, 20, 122, 26, 197, 19, 198, 24, 201, 28,216, 21, - 229, 19, 230, 24, 233, 28, 248, 21, + 119, 25, 120, 28, 121, 20, 122, 26, 197, 19, 198, 24, 201, 27,216, 22, + 229, 19, 230, 24, 233, 27, 248, 22, }; /* Model Table: - * Total considered sequences: 967 / 900 - * - Positive sequences: first 442 (0.9950425176429516) - * - Probable sequences: next 157 (599-442) (0.0039580060347621515) - * - Neutral sequences: last 301 (0.0009994763222862524) - * - Negative sequences: -67 (off-ratio) + * Total considered sequences: 1114 / 900 + * - Positive sequences: first 470 (0.9950174595185775) + * - Probable sequences: next 194 (664-470) (0.003983282132802413) + * - Neutral sequences: last 236 (0.0009992583486201356) + * - Negative sequences: -214 (off-ratio) * Negative sequences: TODO */ static const PRUint8 NorwegianLangModel[] = { - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,3,2,3,2,3,0,1, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,2,1,2,1, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,1,2,0,2,1, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,0,2,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,0,1,1, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,1,2,3,0,2,2,2,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,3,1,2,2,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,1,0,1,1,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,1,2,3,0,2,2,2,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,2,2,1,1,0,2,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,3,0,1,1,0,1,0,1, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,3,3,3,2,3,1,1,1,0,0,1,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,2,0,0,0,0,1, - 3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,3,3,3,1,2,2,1,3,0,0,0,0,0, - 3,3,2,3,2,3,3,3,3,1,2,1,1,0,3,3,0,1,2,3,3,3,3,2,2,0,1,1,2,1, - 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,3,3,2,1,1,2,1,2,0,1,2,2,1,1, - 3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,3,3,2,3,3,2,3,1,1,1,0,1,0,1,0, - 3,3,3,1,3,3,3,3,3,2,0,1,1,0,2,3,2,3,1,3,3,3,3,1,3,1,0,0,1,0, - 3,3,3,3,2,3,3,2,3,1,1,2,2,3,1,3,1,2,2,3,3,3,3,1,2,2,0,1,0,1, - 3,3,3,3,3,1,2,3,1,3,3,3,2,3,2,0,3,2,2,0,0,1,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,2,3,3,3,3,3,3,3,2,2,3,3,2,0,1,1,1,2,0,1,1,1,0,0, - 3,3,3,3,3,1,2,3,1,3,3,3,3,3,2,0,3,2,1,0,3,1,1,0,0,0,0,0,0,0, - 3,1,1,1,1,3,3,1,3,1,0,1,2,0,2,3,1,0,1,2,1,3,1,0,3,0,0,0,0,0, - 3,2,2,3,2,3,3,3,3,2,1,3,2,0,0,3,0,2,3,0,3,0,0,2,1,1,1,0,0,1, - 0,3,2,2,2,0,1,2,0,1,1,1,1,1,2,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0, - 3,1,2,2,2,3,3,1,3,1,0,1,1,0,1,1,0,1,2,0,1,0,0,0,0,2,1,0,0,0, - 3,0,2,1,1,2,2,1,3,0,1,0,1,0,1,2,0,1,1,0,1,0,1,0,0,1,2,0,0,0, - 2,0,1,1,1,2,2,1,2,0,0,1,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0,1,0,0, - 2,2,3,2,2,1,1,1,0,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0, - 0,1,0,0,0,2,1,0,1,0,0,0,0,1,0,3,0,0,0,0,0,0,0,1,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,3,1,3,2,0,3,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,1,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,1,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,2,1,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,0,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,1,3,0,3,2,1,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,2,3,3,2,2,2,1,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,1,2,1,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,2,2,1,3,0,3,2,0,2,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,1,2,2,1,2,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,2,2,2,1,0,1,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,1,2,1,1,1,0,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,1,1,2,1,1, + 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,3,2,3,3,0,3,1,0,1,0,0, + 3,3,3,3,2,3,3,3,3,1,2,2,1,1,3,3,0,1,2,3,3,3,3,2,3,0,0,2,0,1, + 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,3,3,2,1,2,2,2,3,1,1,2,3,2,2, + 3,3,3,3,3,3,3,3,3,3,2,3,2,2,3,3,3,2,3,3,2,1,3,0,1,1,1,2,0,0, + 3,3,3,2,3,3,3,3,3,2,2,1,1,1,2,3,1,3,1,3,3,3,3,1,3,1,0,2,0,0, + 3,3,3,3,3,3,3,3,3,2,1,1,2,3,1,3,1,1,2,3,3,3,3,1,3,2,0,2,1,1, + 3,3,3,3,3,1,2,3,2,3,3,2,1,3,2,1,3,2,2,1,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,2,0,1,1,1,2,0,1,1,1,1,0, + 3,1,2,2,2,3,3,1,3,2,2,1,2,1,2,3,1,0,1,2,2,1,3,0,3,0,0,0,0,0, + 3,3,3,3,3,1,2,3,1,3,3,3,3,3,2,1,3,3,2,0,3,2,1,0,0,1,0,0,0,0, + 3,3,2,3,2,3,3,3,3,2,3,1,2,1,1,3,1,2,3,0,3,0,0,2,0,0,1,1,0,1, + 0,3,2,3,2,0,2,2,1,2,1,2,1,1,2,0,2,1,1,0,0,0,0,0,1,0,0,0,0,0, + 3,2,2,1,2,3,3,1,3,1,1,0,1,0,2,2,1,0,2,0,1,0,1,1,1,2,1,0,0,0, + 3,1,1,1,2,3,2,1,3,1,1,1,1,1,0,2,0,1,2,0,2,1,0,1,0,2,2,0,0,0, + 2,2,3,2,2,2,1,2,1,2,0,1,1,1,1,1,0,3,0,0,0,0,0,2,0,0,0,1,0,0, + 2,0,0,2,1,2,2,1,2,0,0,0,0,1,2,1,1,1,1,0,1,0,1,1,0,0,0,0,2,0, + 0,0,0,1,0,2,1,1,0,0,0,0,0,1,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,1, }; @@ -208,7 +208,7 @@ const SequenceModel Ibm865NorwegianModel = Ibm865_CharToOrderMap, NorwegianLangModel, 30, - (float)0.9990005236777137, + (float)0.9990007416513799, PR_TRUE, "IBM865", "no" @@ -219,7 +219,7 @@ const SequenceModel Iso_8859_15NorwegianModel = Iso_8859_15_CharToOrderMap, NorwegianLangModel, 30, - (float)0.9990005236777137, + (float)0.9990007416513799, PR_TRUE, "ISO-8859-15", "no" @@ -230,7 +230,7 @@ const SequenceModel Iso_8859_1NorwegianModel = Iso_8859_1_CharToOrderMap, NorwegianLangModel, 30, - (float)0.9990005236777137, + (float)0.9990007416513799, PR_TRUE, "ISO-8859-1", "no" @@ -241,7 +241,7 @@ const SequenceModel Windows_1252NorwegianModel = Windows_1252_CharToOrderMap, NorwegianLangModel, 30, - (float)0.9990005236777137, + (float)0.9990007416513799, PR_TRUE, "WINDOWS-1252", "no" @@ -254,5 +254,8 @@ const LanguageModel NorwegianModel = 60, NorwegianLangModel, 30, - (float)0.9995751776807141, + 4, + (float)0.3967761585890805, + 19, + (float)0.03279587657601158, }; diff --git a/src/LangModels/LangPolishModel.cpp b/src/LangModels/LangPolishModel.cpp index 690738f..c770d94 100644 --- a/src/LangModels/LangPolishModel.cpp +++ b/src/LangModels/LangPolishModel.cpp @@ -42,7 +42,7 @@ /** * Generated by BuildLangModel.py - * On: 2021-03-16 19:54:55.178474 + * On: 2022-12-14 18:27:15.211093 **/ /* Character Mapping Table: @@ -68,18 +68,18 @@ static const unsigned char Iso_8859_2_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 21, 11, 15, 3, 26, 20, 22, 1, 18, 6, 14, 16, 5, 2, /* 4X */ - 13, 37, 4, 7, 10, 17, 30, 9, 33, 12, 8,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 21, 11, 15, 3, 26, 20, 22, 1, 18, 6, 14, 16, 5, 2, /* 6X */ - 13, 37, 4, 7, 10, 17, 30, 9, 33, 12, 8,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 0, 21, 11, 14, 3, 27, 20, 22, 1, 18, 8, 16, 15, 4, 2, /* 4X */ + 13, 36, 5, 6, 10, 17, 31, 9, 32, 12, 7,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 21, 11, 14, 3, 27, 20, 22, 1, 18, 8, 16, 15, 4, 2, /* 6X */ + 13, 36, 5, 6, 10, 17, 31, 9, 32, 12, 7,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM, 24,SYM, 19,SYM, 68, 29,SYM,SYM, 40, 67, 74, 32,SYM, 43, 28, /* AX */ - SYM, 24,SYM, 19,SYM, 68, 29,SYM,SYM, 40, 67, 74, 32,SYM, 43, 28, /* BX */ - 86, 35, 61, 53, 41, 87, 31, 45, 42, 34, 23, 52, 48, 36, 80, 77, /* CX */ - 58, 27, 82, 25, 59, 57, 38,SYM, 50, 75, 49, 79, 39, 51, 78, 54, /* DX */ - 88, 35, 61, 53, 41, 89, 31, 45, 42, 34, 23, 52, 48, 36, 80, 77, /* EX */ - 58, 27, 82, 25, 59, 57, 38,SYM, 50, 75, 49, 79, 39, 51, 78,SYM, /* FX */ + SYM, 24,SYM, 19,SYM, 84, 28,SYM,SYM, 42, 72, 85, 33,SYM, 44, 26, /* AX */ + SYM, 24,SYM, 19,SYM, 86, 28,SYM,SYM, 42, 72, 87, 33,SYM, 44, 26, /* BX */ + 88, 35, 65, 52, 41, 89, 30, 46, 43, 34, 23, 56, 47, 38, 75, 90, /* CX */ + 63, 29, 70, 25, 61, 78, 39,SYM, 51, 76, 55, 91, 37, 54, 77, 53, /* DX */ + 92, 35, 65, 52, 41, 93, 30, 46, 43, 34, 23, 56, 47, 38, 75, 94, /* EX */ + 63, 29, 70, 25, 61, 78, 39,SYM, 51, 76, 55, 95, 37, 54, 77,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -89,18 +89,18 @@ static const unsigned char Iso_8859_13_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 21, 11, 15, 3, 26, 20, 22, 1, 18, 6, 14, 16, 5, 2, /* 4X */ - 13, 37, 4, 7, 10, 17, 30, 9, 33, 12, 8,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 21, 11, 15, 3, 26, 20, 22, 1, 18, 6, 14, 16, 5, 2, /* 6X */ - 13, 37, 4, 7, 10, 17, 30, 9, 33, 12, 8,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 0, 21, 11, 14, 3, 27, 20, 22, 1, 18, 8, 16, 15, 4, 2, /* 4X */ + 13, 36, 5, 6, 10, 17, 31, 9, 32, 12, 7,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 21, 11, 14, 3, 27, 20, 22, 1, 18, 8, 16, 15, 4, 2, /* 6X */ + 13, 36, 5, 6, 10, 17, 31, 9, 32, 12, 7,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 47,SYM, 90,SYM,SYM,SYM,SYM, 76, /* AX */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 47,SYM, 91,SYM,SYM,SYM,SYM, 76, /* BX */ - 24, 92, 69, 31, 41, 55, 23, 65, 42, 34, 32, 63, 93, 94, 72, 95, /* CX */ - 40, 27, 73, 25, 44, 66, 38,SYM, 96, 19, 29, 56, 39, 28, 43, 54, /* DX */ - 24, 97, 69, 31, 41, 55, 23, 65, 42, 34, 32, 63, 98, 99, 72,100, /* EX */ - 40, 27, 73, 25, 44, 66, 38,SYM,101, 19, 29, 56, 39, 28, 43,SYM, /* FX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 40,SYM, 96,SYM,SYM,SYM,SYM, 50, /* AX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 40,SYM, 97,SYM,SYM,SYM,SYM, 50, /* BX */ + 24, 98, 58, 30, 41, 48, 23, 62, 43, 34, 33, 64, 82, 99, 71,100, /* CX */ + 42, 29, 74, 25, 49, 68, 39,SYM,101, 19, 28, 57, 37, 26, 44, 53, /* DX */ + 24,102, 58, 30, 41, 48, 23, 62, 43, 34, 33, 64, 82,103, 71,104, /* EX */ + 42, 29, 74, 25, 49, 68, 39,SYM,105, 19, 28, 57, 37, 26, 44,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -110,18 +110,18 @@ static const unsigned char Iso_8859_16_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 21, 11, 15, 3, 26, 20, 22, 1, 18, 6, 14, 16, 5, 2, /* 4X */ - 13, 37, 4, 7, 10, 17, 30, 9, 33, 12, 8,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 21, 11, 15, 3, 26, 20, 22, 1, 18, 6, 14, 16, 5, 2, /* 6X */ - 13, 37, 4, 7, 10, 17, 30, 9, 33, 12, 8,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 0, 21, 11, 14, 3, 27, 20, 22, 1, 18, 8, 16, 15, 4, 2, /* 4X */ + 13, 36, 5, 6, 10, 17, 31, 9, 32, 12, 7,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 21, 11, 14, 3, 27, 20, 22, 1, 18, 8, 16, 15, 4, 2, /* 6X */ + 13, 36, 5, 6, 10, 17, 31, 9, 32, 12, 7,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM, 24, 24, 19,SYM,SYM, 40,SYM, 40,SYM, 62,SYM, 32,SYM, 32, 28, /* AX */ - SYM,SYM, 42, 19, 43,SYM,SYM,SYM, 43, 42, 62,SYM, 84, 84,102, 28, /* BX */ - 70, 35, 61, 53, 41, 31, 76, 45, 46, 34, 60, 52, 83, 36, 80, 71, /* CX */ - 58, 27, 64, 25, 59, 57, 38, 29, 79, 85, 49,103, 39, 23, 81, 54, /* DX */ - 70, 35, 61, 53, 41, 31, 76, 45, 46, 34, 60, 52, 83, 36, 80, 71, /* EX */ - 58, 27, 64, 25, 59, 57, 38, 29, 79, 85, 49,104, 39, 23, 81,105, /* FX */ + SYM, 24, 24, 19,SYM,SYM, 42,SYM, 42,SYM, 73,SYM, 33,SYM, 33, 26, /* AX */ + SYM,SYM, 43, 19, 44,SYM,SYM,SYM, 44, 43, 73,SYM, 80, 80, 81, 26, /* BX */ + 66, 35, 65, 52, 41, 30, 50, 46, 45, 34, 59, 56,106, 38, 75, 69, /* CX */ + 63, 29, 67, 25, 61, 78, 39, 28,107, 79, 55, 83, 37, 23, 60, 53, /* DX */ + 66, 35, 65, 52, 41, 30, 50, 46, 45, 34, 59, 56,108, 38, 75, 69, /* EX */ + 63, 29, 67, 25, 61, 78, 39, 28,109, 79, 55, 83, 37, 23, 60, 81, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -131,18 +131,18 @@ static const unsigned char Windows_1250_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 21, 11, 15, 3, 26, 20, 22, 1, 18, 6, 14, 16, 5, 2, /* 4X */ - 13, 37, 4, 7, 10, 17, 30, 9, 33, 12, 8,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 21, 11, 15, 3, 26, 20, 22, 1, 18, 6, 14, 16, 5, 2, /* 6X */ - 13, 37, 4, 7, 10, 17, 30, 9, 33, 12, 8,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM, 40,SYM, 29, 74, 43, 32, /* 8X */ - ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 40,SYM, 29, 74, 43, 32, /* 9X */ - SYM,SYM,SYM, 19,SYM, 24,SYM,SYM,SYM,SYM, 67,SYM,SYM,SYM,SYM, 28, /* AX */ - SYM,SYM,SYM, 19,SYM,SYM,SYM,SYM,SYM, 24, 67,SYM, 68,SYM, 68, 28, /* BX */ - 106, 35, 61, 53, 41,107, 31, 45, 42, 34, 23, 52, 48, 36, 80, 77, /* CX */ - 58, 27, 82, 25, 59, 57, 38,SYM, 50, 75, 49, 79, 39, 51, 78, 54, /* DX */ - 108, 35, 61, 53, 41,109, 31, 45, 42, 34, 23, 52, 48, 36, 80, 77, /* EX */ - 58, 27, 82, 25, 59, 57, 38,SYM, 50, 75, 49, 79, 39, 51, 78,SYM, /* FX */ + SYM, 0, 21, 11, 14, 3, 27, 20, 22, 1, 18, 8, 16, 15, 4, 2, /* 4X */ + 13, 36, 5, 6, 10, 17, 31, 9, 32, 12, 7,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 21, 11, 14, 3, 27, 20, 22, 1, 18, 8, 16, 15, 4, 2, /* 6X */ + 13, 36, 5, 6, 10, 17, 31, 9, 32, 12, 7,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM, 42,SYM, 28,110, 44, 33, /* 8X */ + ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 42,SYM, 28,111, 44, 33, /* 9X */ + SYM,SYM,SYM, 19,SYM, 24,SYM,SYM,SYM,SYM, 72,SYM,SYM,SYM,SYM, 26, /* AX */ + SYM,SYM,SYM, 19,SYM,SYM,SYM,SYM,SYM, 24, 72,SYM,112,SYM,113, 26, /* BX */ + 114, 35, 65, 52, 41,115, 30, 46, 43, 34, 23, 56, 47, 38, 75,116, /* CX */ + 63, 29, 70, 25, 61, 78, 39,SYM, 51, 76, 55,117, 37, 54, 77, 53, /* DX */ + 118, 35, 65, 52, 41,119, 30, 46, 43, 34, 23, 56, 47, 38, 75,120, /* EX */ + 63, 29, 70, 25, 61, 78, 39,SYM, 51, 76, 55,121, 37, 54, 77,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -152,18 +152,18 @@ static const unsigned char Ibm852_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 21, 11, 15, 3, 26, 20, 22, 1, 18, 6, 14, 16, 5, 2, /* 4X */ - 13, 37, 4, 7, 10, 17, 30, 9, 33, 12, 8,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 21, 11, 15, 3, 26, 20, 22, 1, 18, 6, 14, 16, 5, 2, /* 6X */ - 13, 37, 4, 7, 10, 17, 30, 9, 33, 12, 8,SYM,SYM,SYM,SYM,CTR, /* 7X */ - 45, 39, 34, 61, 41, 75, 31, 45, 19, 52, 57, 57, 80, 32, 41, 31, /* 8X */ - 34,110,111, 59, 38, 68, 68, 29, 29, 38, 39, 74, 74, 19,SYM, 42, /* 9X */ - 35, 36, 25, 49, 24, 24, 43, 43, 23, 23,SYM, 32, 42, 67,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 35, 61, 48, 67,SYM,SYM,SYM,SYM, 28, 28,SYM, /* BX */ - SYM,SYM,SYM,SYM,SYM,SYM, 53, 53,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* CX */ - 58, 58, 77, 52, 77, 82, 36, 80, 48,SYM,SYM,SYM,SYM, 78, 75,SYM, /* DX */ - 25, 54, 59, 27, 27, 82, 40, 40,112, 49,113, 79, 51, 51, 78,SYM, /* EX */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 79, 50, 50,SYM,SYM, /* FX */ + SYM, 0, 21, 11, 14, 3, 27, 20, 22, 1, 18, 8, 16, 15, 4, 2, /* 4X */ + 13, 36, 5, 6, 10, 17, 31, 9, 32, 12, 7,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 21, 11, 14, 3, 27, 20, 22, 1, 18, 8, 16, 15, 4, 2, /* 6X */ + 13, 36, 5, 6, 10, 17, 31, 9, 32, 12, 7,SYM,SYM,SYM,SYM,CTR, /* 7X */ + 46, 37, 34, 65, 41, 76, 30, 46, 19, 56, 78, 78, 75, 33, 41, 30, /* 8X */ + 34,122,123, 61, 39,124,125, 28, 28, 39, 37,126,127, 19,SYM, 43, /* 9X */ + 35, 38, 25, 55, 24, 24, 44, 44, 23, 23,SYM, 33, 43, 72,SYM,SYM, /* AX */ + SYM,SYM,SYM,SYM,SYM, 35, 65, 47, 72,SYM,SYM,SYM,SYM, 26, 26,SYM, /* BX */ + SYM,SYM,SYM,SYM,SYM,SYM, 52, 52,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* CX */ + 63, 63,128, 56,129, 70, 38, 75, 47,SYM,SYM,SYM,SYM, 77, 76,SYM, /* DX */ + 25, 53, 61, 29, 29, 70, 42, 42,130, 55,131,132, 54, 54, 77,SYM, /* EX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,133, 51, 51,SYM,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -173,84 +173,84 @@ static const unsigned char Mac_Centraleurope_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 21, 11, 15, 3, 26, 20, 22, 1, 18, 6, 14, 16, 5, 2, /* 4X */ - 13, 37, 4, 7, 10, 17, 30, 9, 33, 12, 8,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 21, 11, 15, 3, 26, 20, 22, 1, 18, 6, 14, 16, 5, 2, /* 6X */ - 13, 37, 4, 7, 10, 17, 30, 9, 33, 12, 8,SYM,SYM,SYM,SYM,CTR, /* 7X */ - 41, 69, 69, 34, 24, 38, 39, 35, 24, 42, 41, 42, 31, 31, 34, 32, /* 8X */ - 32, 77, 36, 77, 65, 65, 63, 25, 63, 59, 38, 66, 49, 48, 48, 39, /* 9X */ - SYM,SYM, 23,SYM,SYM,SYM,SYM, 54,SYM,SYM,SYM, 23,SYM,SYM,114,115, /* AX */ - 116, 72,SYM,SYM, 72,117,SYM,SYM, 19,118,119, 68, 68,120,121, 73, /* BX */ - 73, 27,SYM,SYM, 27, 82,SYM,SYM,SYM,SYM,SYM, 82, 57, 66, 57, 44, /* CX */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 44,122,123, 50,SYM,SYM, 50,124, /* DX */ - 125, 40,SYM,SYM, 40, 29, 29, 35, 74, 74, 36, 43, 43, 56, 25, 59, /* EX */ - 56, 75, 49, 75, 79, 79,126,127, 51, 51,128, 28, 19, 28,129,SYM, /* FX */ + SYM, 0, 21, 11, 14, 3, 27, 20, 22, 1, 18, 8, 16, 15, 4, 2, /* 4X */ + 13, 36, 5, 6, 10, 17, 31, 9, 32, 12, 7,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 21, 11, 14, 3, 27, 20, 22, 1, 18, 8, 16, 15, 4, 2, /* 6X */ + 13, 36, 5, 6, 10, 17, 31, 9, 32, 12, 7,SYM,SYM,SYM,SYM,CTR, /* 7X */ + 41, 58, 58, 34, 24, 39, 37, 35, 24, 43, 41, 43, 30, 30, 34, 33, /* 8X */ + 33,134, 38,135, 62, 62, 64, 25, 64, 61, 39, 68, 55, 47, 47, 37, /* 9X */ + SYM,SYM, 23,SYM,SYM,SYM,SYM, 53,SYM,SYM,SYM, 23,SYM,SYM, 82,136, /* AX */ + 137, 71,SYM,SYM, 71,138,SYM,SYM, 19,139,140,141,142,143,144, 74, /* BX */ + 74, 29,SYM,SYM, 29, 70,SYM,SYM,SYM,SYM,SYM, 70, 78, 68, 78, 49, /* CX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 49,145,146, 51,SYM,SYM, 51,147, /* DX */ + 148, 42,SYM,SYM, 42, 28, 28, 35,149,150, 38, 44, 44, 57, 25, 61, /* EX */ + 57, 76, 55, 76,151,152,153,154, 54, 54,155, 26, 19, 26, 82,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const int Unicode_Char_size = 76; +static const int Unicode_Char_size = 74; static const unsigned int Unicode_CharOrder[] = { - 65, 0, 66, 21, 67, 11, 68, 15, 69, 3, 70, 26, 71, 20, 72, 22, - 73, 1, 74, 18, 75, 6, 76, 14, 77, 16, 78, 5, 79, 2, 80, 13, - 81, 37, 82, 4, 83, 7, 84, 10, 85, 17, 86, 30, 87, 9, 88, 33, - 89, 12, 90, 8, 97, 0, 98, 21, 99, 11, 100, 15, 101, 3,102, 26, - 103, 20, 104, 22, 105, 1, 106, 18, 107, 6, 108, 14, 109, 16,110, 5, - 111, 2, 112, 13, 113, 37, 114, 4, 115, 7, 116, 10, 117, 17,118, 30, - 119, 9, 120, 33, 121, 12, 122, 8, 193, 35, 201, 34, 205, 36,211, 25, - 225, 35, 233, 34, 237, 36, 243, 25, 260, 24, 261, 24, 262, 31,263, 31, - 280, 23, 281, 23, 321, 19, 322, 19, 323, 27, 324, 27, 346, 29,347, 29, - 377, 32, 378, 32, 379, 28, 380, 28, + 65, 0, 66, 21, 67, 11, 68, 14, 69, 3, 70, 27, 71, 20, 72, 22, + 73, 1, 74, 18, 75, 8, 76, 16, 77, 15, 78, 4, 79, 2, 80, 13, + 81, 36, 82, 5, 83, 6, 84, 10, 85, 17, 86, 31, 87, 9, 88, 32, + 89, 12, 90, 7, 97, 0, 98, 21, 99, 11, 100, 14, 101, 3,102, 27, + 103, 20, 104, 22, 105, 1, 106, 18, 107, 8, 108, 16, 109, 15,110, 4, + 111, 2, 112, 13, 113, 36, 114, 5, 115, 6, 116, 10, 117, 17,118, 31, + 119, 9, 120, 32, 121, 12, 122, 7, 193, 35, 201, 34, 211, 25,225, 35, + 233, 34, 243, 25, 260, 24, 261, 24, 262, 30, 263, 30, 280, 23,281, 23, + 321, 19, 322, 19, 323, 29, 324, 29, 346, 28, 347, 28, 377, 33,378, 33, + 379, 26, 380, 26, }; /* Model Table: - * Total sequences: 1547 - * First 512 sequences: 0.9881622113600178 - * Next 512 sequences (512-1024): 0.011288903649768277 - * Rest: 0.0005488849902139173 + * Total considered sequences: 1469 / 1369 + * - Positive sequences: first 603 (0.9950009015061881) + * - Probable sequences: next 297 (900-603) (0.004003405092029788) + * - Neutral sequences: last 469 (0.0009956934017820718) + * - Negative sequences: -100 (off-ratio) * Negative sequences: TODO */ static const PRUint8 PolishLangModel[] = { - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,0,3,3,3,3,3,3,3,2,0,0,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,1,2,2,2,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,0,0,0,3,3,3,3,3,2,2,2,1,0,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,2,2,3,3,3,3,3,2,2,2,0,1,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,2,3,3,3,2,0,2,2,2,2,0,1,2,2,2,2, - 3,3,3,3,3,3,3,3,3,2,3,3,3,2,2,3,2,3,2,1,3,2,2,3,3,3,3,0,3,1,2,0,1,2,2,2,2,2, - 3,3,3,3,3,2,3,3,1,3,3,3,2,2,3,2,3,3,2,3,2,2,2,3,3,3,2,0,3,1,1,0,0,1,2,2,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,3,2,3,3,2,0,0,0,2,0,0,0,2,2,2,2, - 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,0,1,0,2,0,0,0,2,2,1,2, - 3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,2,2,3,2,3,2,2,2,3,3,3,1,0,0,2,0,0,0,0,1,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,2,3,2,3,2,2,3,3,3,3,2,0,1,1,2,0,0,1,2,2,2,0, - 3,3,3,3,2,3,3,2,3,2,3,2,3,1,3,2,2,3,3,2,2,2,3,2,3,3,1,0,0,0,1,0,0,0,2,2,2,2, - 3,2,2,2,3,3,3,3,3,3,3,3,0,3,3,3,3,2,3,3,3,3,2,0,0,0,3,3,3,3,2,3,2,0,1,1,0,0, - 3,3,3,3,3,3,2,3,2,2,2,3,3,3,3,2,2,3,1,3,2,2,3,2,2,3,2,0,0,0,1,0,0,0,2,2,1,0, - 3,3,3,3,2,3,3,3,2,3,3,3,2,2,3,3,3,3,2,0,3,3,2,3,3,2,3,1,2,1,2,0,0,1,2,2,2,0, - 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,2,3,2,3,2,2,3,3,2,0,3,1,2,0,2,0,2,2,2,2, - 3,3,3,3,2,3,3,3,1,2,2,3,3,3,2,2,3,3,2,2,2,3,2,3,2,3,2,0,2,1,1,0,0,0,2,2,2,0, - 3,3,2,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,2,0,0,0,3,3,3,2,2,1,2,2,2,2,2,2, - 3,3,3,3,2,3,2,3,2,3,2,3,0,2,2,3,3,3,2,2,2,2,1,3,3,3,1,1,1,3,1,1,0,0,1,2,1,0, - 3,1,3,3,2,3,3,2,2,2,3,2,3,2,1,2,2,3,0,2,3,2,1,3,3,3,1,0,2,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,2,2,2,3,2,1,2,2,3,3,2,3,2,3,2,2,3,2,2,3,2,0,1,0,2,0,0,1,2,2,1,1, - 3,3,3,3,3,3,2,3,2,2,1,2,3,2,3,2,2,3,2,3,1,2,2,3,2,2,2,0,0,0,0,0,0,0,2,2,1,0, - 3,3,3,3,3,3,2,2,2,2,2,2,3,2,3,2,3,3,2,2,1,2,2,2,2,2,2,0,1,1,2,0,0,0,2,2,0,1, - 0,0,0,0,0,0,3,3,3,1,3,3,0,3,2,3,0,0,0,3,3,3,0,0,0,0,0,0,3,3,0,2,2,0,0,0,0,0, - 0,0,0,0,0,0,2,2,3,2,3,3,0,3,0,3,0,0,0,3,3,2,0,0,0,0,0,0,3,0,0,2,0,0,0,0,0,0, - 1,0,0,0,3,2,2,2,3,3,2,3,1,2,3,3,2,0,3,3,2,3,1,0,0,0,1,0,3,1,1,0,3,0,0,0,1,0, - 3,3,3,3,3,2,2,2,1,2,2,1,2,1,3,2,2,3,2,2,2,2,1,2,2,1,3,0,0,0,2,0,0,0,2,1,0,1, - 0,0,0,0,0,0,2,3,0,0,1,3,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,2,3,3,0,3,2,2,0,2,0,2,3,0,3,2,2,3,0,1,1,3,1,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,3,3,2,1,0,3,0,3,0,2,3,0,3,0,0,0,0,2,0,0,0,0,0,1,0,0,0,3,0,0,0,0,0,0, - 3,3,3,3,2,2,2,2,1,0,1,0,2,1,2,0,0,2,2,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,2,2,2,0, - 1,0,1,1,0,0,2,0,0,2,0,0,0,0,0,2,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,2,3,0,2,0,2,0,2,1,0,2,3,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0, - 2,3,2,2,0,0,1,2,0,1,2,0,1,2,1,0,1,2,0,0,0,1,2,0,0,0,2,0,0,0,2,0,0,2,1,0,0,0, - 2,1,2,2,2,2,1,2,1,0,2,2,0,1,2,2,2,1,1,0,2,2,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0, - 0,1,1,1,2,2,2,2,2,0,2,2,0,1,2,2,1,1,1,0,1,2,1,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0, - 2,1,2,1,2,2,2,2,1,0,1,2,0,0,2,1,1,0,0,0,2,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, - 1,2,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,3,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,0,3,3,3,3,3,3,2,3,0,1,2, + 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,1,2,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,0,3,3,3,3,3,3,2,2,0,0,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,1,2,3,3,3,3,3,3,3,3,0,1,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,0,3,3,3,3,3,3,3,3,1,0,0,2,1,0,2,2,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,1,0,2,2,0,0,2,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,2,3,3,1,3,0,0,0,2,0,0,2,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,2,0,0,0,1,0,0,1,2,1, + 3,3,3,3,3,3,3,1,3,3,3,3,3,2,2,3,3,3,2,3,2,2,2,3,3,3,3,2,1,0,0,1,1,0,2,2,0, + 3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,1,3,2,2,2,3,3,3,0,1,3,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,3,2,2,3,3,3,3,1,2,0,0,1,2,1,0,2,2,0, + 3,3,3,3,3,3,2,3,3,2,3,3,3,1,2,3,3,3,3,3,2,1,3,3,3,3,0,2,0,0,0,1,1,0,2,1,2, + 3,2,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,2,3,3,3,3,2,0,1,0,3,3,3,3,3,1,1,2,1,1,0, + 3,3,3,3,3,3,3,2,3,2,3,3,3,3,1,2,3,3,1,3,1,2,3,3,2,3,0,2,0,0,0,1,0,0,2,1,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,1,0,0,2,0,3,2,1,1, + 3,3,3,3,3,2,3,1,3,2,2,3,3,3,2,3,3,3,1,2,2,3,2,3,2,3,1,2,2,0,0,1,0,0,2,2,1, + 3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,3,3,3,2,0,3,3,3,3,3,3,2,3,1,1,0,3,1,0,2,2,1, + 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,2,1,0,0,3,3,3,3,1,2,2,2,1,1,1, + 3,3,3,3,3,2,3,2,3,3,2,3,1,3,3,3,2,3,2,2,2,3,1,3,3,3,1,1,3,0,1,1,0,0,1,2,0, + 3,1,3,3,3,2,2,2,3,3,3,3,3,2,3,2,1,3,0,2,3,2,1,3,3,3,2,1,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,2,3,2,2,2,2,3,3,3,3,2,3,2,2,3,3,3,3,0,2,0,0,0,2,1,0,1,2,1, + 3,3,3,3,3,3,3,1,3,3,2,3,3,1,2,1,3,3,3,3,1,2,2,3,3,3,1,2,0,0,0,1,0,0,2,1,1, + 3,3,3,3,3,3,2,2,2,3,3,3,3,2,2,3,3,3,1,3,1,2,2,2,2,3,1,1,2,0,0,2,0,0,2,1,0, + 0,0,0,0,0,0,3,3,3,1,3,3,0,3,3,0,3,0,1,3,3,3,0,0,0,0,3,0,3,0,3,0,0,1,0,0,0, + 0,0,0,0,0,0,3,3,2,2,3,3,0,3,3,0,0,0,0,3,3,2,0,0,0,0,3,0,1,0,3,0,0,1,0,0,0, + 0,0,0,0,2,3,3,3,2,3,3,3,1,2,3,1,3,0,3,3,2,3,1,0,0,0,3,0,0,1,0,1,0,3,0,0,0, + 3,2,3,3,3,0,3,0,3,2,0,2,3,0,3,2,3,3,0,0,0,3,0,3,3,2,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,2,3,2,1,2,1,3,2,3,2,1,2,3,3,2,2,2,1,1,1,2,2,0,3,0,0,0,1,0,0,2,0,0, + 1,1,0,0,3,3,0,0,2,3,0,3,0,2,0,3,3,0,0,0,0,1,0,0,0,0,0,0,0,2,3,0,0,0,0,0,0, + 0,0,0,0,0,0,3,0,2,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,1,0,1,1,0,0,0,2,2,0,0,0,0,1,2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,2,2,2,0,0,1,1,2,2,2,1,1,2,2,2,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,1,2,0, + 2,3,2,2,0,0,1,0,0,1,2,1,1,2,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,3,3,0,0,0,0, + 0,1,0,0,3,3,1,0,1,2,0,2,0,0,3,1,2,0,0,0,0,3,0,0,0,0,0,0,0,2,2,0,0,0,0,0,0, + 2,1,2,1,2,2,2,0,1,0,2,2,0,1,2,2,2,0,0,0,1,2,1,0,0,0,0,1,0,0,0,2,0,0,0,0,0, + 0,1,0,0,3,2,2,2,2,0,1,2,0,1,1,1,2,1,0,0,1,2,0,0,1,0,0,0,0,1,0,1,1,0,0,0,1, + 1,2,0,0,0,1,0,0,0,0,1,0,1,1,0,0,0,3,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0, }; @@ -258,8 +258,8 @@ const SequenceModel Iso_8859_2PolishModel = { Iso_8859_2_CharToOrderMap, PolishLangModel, - 38, - (float)0.9881622113600178, + 37, + (float)0.9990043065982179, PR_TRUE, "ISO-8859-2", "pl" @@ -269,8 +269,8 @@ const SequenceModel Iso_8859_13PolishModel = { Iso_8859_13_CharToOrderMap, PolishLangModel, - 38, - (float)0.9881622113600178, + 37, + (float)0.9990043065982179, PR_TRUE, "ISO-8859-13", "pl" @@ -280,8 +280,8 @@ const SequenceModel Iso_8859_16PolishModel = { Iso_8859_16_CharToOrderMap, PolishLangModel, - 38, - (float)0.9881622113600178, + 37, + (float)0.9990043065982179, PR_TRUE, "ISO-8859-16", "pl" @@ -291,8 +291,8 @@ const SequenceModel Windows_1250PolishModel = { Windows_1250_CharToOrderMap, PolishLangModel, - 38, - (float)0.9881622113600178, + 37, + (float)0.9990043065982179, PR_TRUE, "WINDOWS-1250", "pl" @@ -302,8 +302,8 @@ const SequenceModel Ibm852PolishModel = { Ibm852_CharToOrderMap, PolishLangModel, - 38, - (float)0.9881622113600178, + 37, + (float)0.9990043065982179, PR_TRUE, "IBM852", "pl" @@ -313,8 +313,8 @@ const SequenceModel Mac_CentraleuropePolishModel = { Mac_Centraleurope_CharToOrderMap, PolishLangModel, - 38, - (float)0.9881622113600178, + 37, + (float)0.9990043065982179, PR_TRUE, "MAC-CENTRALEUROPE", "pl" @@ -324,8 +324,11 @@ const LanguageModel PolishModel = { "pl", Unicode_CharOrder, - 76, + 74, PolishLangModel, - 38, - (float)0.9881622113600178, + 37, + 5, + (float)0.38580674970932544, + 24, + (float)0.030099113395268155, }; diff --git a/src/LangModels/LangPortugueseModel.cpp b/src/LangModels/LangPortugueseModel.cpp index 72eae0b..fc29c27 100644 --- a/src/LangModels/LangPortugueseModel.cpp +++ b/src/LangModels/LangPortugueseModel.cpp @@ -42,7 +42,7 @@ /** * Generated by BuildLangModel.py - * On: 2021-03-16 19:59:19.803482 + * On: 2022-12-14 18:14:13.337964 **/ /* Character Mapping Table: @@ -68,18 +68,18 @@ static const unsigned char Iso_8859_15_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 17, 10, 7, 1, 16, 14, 18, 4, 29, 35, 12, 9, 6, 2, /* 4X */ - 13, 22, 5, 3, 8, 11, 15, 34, 24, 31, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 17, 10, 7, 1, 16, 14, 18, 4, 29, 35, 12, 9, 6, 2, /* 6X */ - 13, 22, 5, 3, 8, 11, 15, 34, 24, 31, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 0, 17, 10, 7, 1, 16, 14, 18, 4, 29, 34, 12, 9, 6, 2, /* 4X */ + 13, 21, 5, 3, 8, 11, 15, 36, 25, 31, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 17, 10, 7, 1, 16, 14, 18, 4, 29, 34, 12, 9, 6, 2, /* 6X */ + 13, 21, 5, 3, 8, 11, 15, 36, 25, 31, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM, 51,SYM, 52,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM, 53, 54,SYM,SYM, 55,SYM,SYM,SYM, 56, 57, 50,SYM, /* BX */ - 36, 25, 33, 19, 41, 42, 43, 21, 38, 20, 28, 44, 58, 23, 45, 46, /* CX */ - 47, 59, 49, 27, 37, 30, 40,SYM, 60, 61, 32, 62, 39, 63, 64, 48, /* DX */ - 36, 25, 33, 19, 41, 42, 43, 21, 38, 20, 28, 44, 65, 23, 45, 46, /* EX */ - 47, 66, 49, 27, 37, 30, 40,SYM, 67, 68, 32, 69, 39, 70, 71, 50, /* FX */ + SYM,SYM,SYM,SYM,SYM,SYM, 53,SYM, 53,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ + SYM,SYM,SYM,SYM, 56, 54,SYM,SYM, 56,SYM,SYM,SYM, 57, 58, 59,SYM, /* BX */ + 35, 24, 33, 19, 44, 50, 43, 20, 38, 22, 28, 46, 60, 23, 48, 55, /* CX */ + 42, 40, 52, 27, 37, 30, 39,SYM, 51, 61, 32, 62, 41, 47, 45, 49, /* DX */ + 35, 24, 33, 19, 44, 50, 43, 20, 38, 22, 28, 46, 63, 23, 48, 55, /* EX */ + 42, 40, 52, 27, 37, 30, 39,SYM, 51, 64, 32, 65, 41, 47, 45, 66, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -89,18 +89,18 @@ static const unsigned char Iso_8859_1_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 17, 10, 7, 1, 16, 14, 18, 4, 29, 35, 12, 9, 6, 2, /* 4X */ - 13, 22, 5, 3, 8, 11, 15, 34, 24, 31, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 17, 10, 7, 1, 16, 14, 18, 4, 29, 35, 12, 9, 6, 2, /* 6X */ - 13, 22, 5, 3, 8, 11, 15, 34, 24, 31, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 0, 17, 10, 7, 1, 16, 14, 18, 4, 29, 34, 12, 9, 6, 2, /* 4X */ + 13, 21, 5, 3, 8, 11, 15, 36, 25, 31, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 17, 10, 7, 1, 16, 14, 18, 4, 29, 34, 12, 9, 6, 2, /* 6X */ + 13, 21, 5, 3, 8, 11, 15, 36, 25, 31, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 72,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 36, 25, 33, 19, 41, 42, 43, 21, 38, 20, 28, 44, 73, 23, 45, 46, /* CX */ - 47, 74, 49, 27, 37, 30, 40,SYM, 75, 76, 32, 77, 39, 78, 79, 48, /* DX */ - 36, 25, 33, 19, 41, 42, 43, 21, 38, 20, 28, 44, 80, 23, 45, 46, /* EX */ - 47, 81, 49, 27, 37, 30, 40,SYM, 82, 83, 32, 84, 39, 85, 86, 50, /* FX */ + SYM,SYM,SYM,SYM,SYM, 54,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 35, 24, 33, 19, 44, 50, 43, 20, 38, 22, 28, 46, 67, 23, 48, 55, /* CX */ + 42, 40, 52, 27, 37, 30, 39,SYM, 51, 68, 32, 69, 41, 47, 45, 49, /* DX */ + 35, 24, 33, 19, 44, 50, 43, 20, 38, 22, 28, 46, 70, 23, 48, 55, /* EX */ + 42, 40, 52, 27, 37, 30, 39,SYM, 51, 71, 32, 72, 41, 47, 45, 73, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -110,18 +110,18 @@ static const unsigned char Windows_1252_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 17, 10, 7, 1, 16, 14, 18, 4, 29, 35, 12, 9, 6, 2, /* 4X */ - 13, 22, 5, 3, 8, 11, 15, 34, 24, 31, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 17, 10, 7, 1, 16, 14, 18, 4, 29, 35, 12, 9, 6, 2, /* 6X */ - 13, 22, 5, 3, 8, 11, 15, 34, 24, 31, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM,ILL,SYM, 87,SYM,SYM,SYM,SYM,SYM,SYM, 88,SYM, 89,ILL, 90,ILL, /* 8X */ - ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 91,SYM, 92,ILL, 93, 50, /* 9X */ + SYM, 0, 17, 10, 7, 1, 16, 14, 18, 4, 29, 34, 12, 9, 6, 2, /* 4X */ + 13, 21, 5, 3, 8, 11, 15, 36, 25, 31, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 17, 10, 7, 1, 16, 14, 18, 4, 29, 34, 12, 9, 6, 2, /* 6X */ + 13, 21, 5, 3, 8, 11, 15, 36, 25, 31, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM,ILL,SYM, 74,SYM,SYM,SYM,SYM,SYM,SYM, 53,SYM, 75,ILL, 56,ILL, /* 8X */ + ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 53,SYM, 76,ILL, 56, 77, /* 9X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 94,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 36, 25, 33, 19, 41, 42, 43, 21, 38, 20, 28, 44, 95, 23, 45, 46, /* CX */ - 47, 96, 49, 27, 37, 30, 40,SYM, 97, 98, 32, 99, 39,100,101, 48, /* DX */ - 36, 25, 33, 19, 41, 42, 43, 21, 38, 20, 28, 44,102, 23, 45, 46, /* EX */ - 47,103, 49, 27, 37, 30, 40,SYM,104,105, 32,106, 39,107,108, 50, /* FX */ + SYM,SYM,SYM,SYM,SYM, 54,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 35, 24, 33, 19, 44, 50, 43, 20, 38, 22, 28, 46, 78, 23, 48, 55, /* CX */ + 42, 40, 52, 27, 37, 30, 39,SYM, 51, 79, 32, 80, 41, 47, 45, 49, /* DX */ + 35, 24, 33, 19, 44, 50, 43, 20, 38, 22, 28, 46, 81, 23, 48, 55, /* EX */ + 42, 40, 52, 27, 37, 30, 39,SYM, 51, 82, 32, 83, 41, 47, 45, 84, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -131,18 +131,18 @@ static const unsigned char Iso_8859_9_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 17, 10, 7, 1, 16, 14, 18, 4, 29, 35, 12, 9, 6, 2, /* 4X */ - 13, 22, 5, 3, 8, 11, 15, 34, 24, 31, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 17, 10, 7, 1, 16, 14, 18, 4, 29, 35, 12, 9, 6, 2, /* 6X */ - 13, 22, 5, 3, 8, 11, 15, 34, 24, 31, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 0, 17, 10, 7, 1, 16, 14, 18, 4, 29, 34, 12, 9, 6, 2, /* 4X */ + 13, 21, 5, 3, 8, 11, 15, 36, 25, 31, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 17, 10, 7, 1, 16, 14, 18, 4, 29, 34, 12, 9, 6, 2, /* 6X */ + 13, 21, 5, 3, 8, 11, 15, 36, 25, 31, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM,109,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 36, 25, 33, 19, 41, 42, 43, 21, 38, 20, 28, 44,110, 23, 45, 46, /* CX */ - 111,112, 49, 27, 37, 30, 40,SYM,113,114, 32,115, 39,116,117, 48, /* DX */ - 36, 25, 33, 19, 41, 42, 43, 21, 38, 20, 28, 44,118, 23, 45, 46, /* EX */ - 119,120, 49, 27, 37, 30, 40,SYM,121,122, 32,123, 39,124,125, 50, /* FX */ + SYM,SYM,SYM,SYM,SYM, 54,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 35, 24, 33, 19, 44, 50, 43, 20, 38, 22, 28, 46, 85, 23, 48, 55, /* CX */ + 86, 40, 52, 27, 37, 30, 39,SYM, 51, 87, 32, 88, 41, 89, 90, 49, /* DX */ + 35, 24, 33, 19, 44, 50, 43, 20, 38, 22, 28, 46, 91, 23, 48, 55, /* EX */ + 92, 40, 52, 27, 37, 30, 39,SYM, 51, 93, 32, 94, 41, 95, 96, 97, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -150,65 +150,66 @@ static const int Unicode_Char_size = 76; static const unsigned int Unicode_CharOrder[] = { 65, 0, 66, 17, 67, 10, 68, 7, 69, 1, 70, 16, 71, 14, 72, 18, - 73, 4, 74, 29, 75, 35, 76, 12, 77, 9, 78, 6, 79, 2, 80, 13, - 81, 22, 82, 5, 83, 3, 84, 8, 85, 11, 86, 15, 87, 34, 88, 24, + 73, 4, 74, 29, 75, 34, 76, 12, 77, 9, 78, 6, 79, 2, 80, 13, + 81, 21, 82, 5, 83, 3, 84, 8, 85, 11, 86, 15, 87, 36, 88, 25, 89, 31, 90, 26, 97, 0, 98, 17, 99, 10, 100, 7, 101, 1,102, 16, - 103, 14, 104, 18, 105, 4, 106, 29, 107, 35, 108, 12, 109, 9,110, 6, - 111, 2, 112, 13, 113, 22, 114, 5, 115, 3, 116, 8, 117, 11,118, 15, - 119, 34, 120, 24, 121, 31, 122, 26, 192, 36, 193, 25, 194, 33,195, 19, - 199, 21, 201, 20, 202, 28, 205, 23, 211, 27, 212, 37, 213, 30,218, 32, - 224, 36, 225, 25, 226, 33, 227, 19, 231, 21, 233, 20, 234, 28,237, 23, + 103, 14, 104, 18, 105, 4, 106, 29, 107, 34, 108, 12, 109, 9,110, 6, + 111, 2, 112, 13, 113, 21, 114, 5, 115, 3, 116, 8, 117, 11,118, 15, + 119, 36, 120, 25, 121, 31, 122, 26, 192, 35, 193, 24, 194, 33,195, 19, + 199, 20, 201, 22, 202, 28, 205, 23, 211, 27, 212, 37, 213, 30,218, 32, + 224, 35, 225, 24, 226, 33, 227, 19, 231, 20, 233, 22, 234, 28,237, 23, 243, 27, 244, 37, 245, 30, 250, 32, }; /* Model Table: - * Total sequences: 929 - * First 512 sequences: 0.9952990712503466 - * Next 512 sequences (512-1024): 0.004700928749653451 - * Rest: -7.806255641895632e-18 + * Total considered sequences: 1057 / 1444 + * - Positive sequences: first 508 (0.9950267193246717) + * - Probable sequences: next 167 (675-508) (0.003973967287456359) + * - Neutral sequences: last 769 (0.0009993133878719584) + * - Negative sequences: 387 (off-ratio) * Negative sequences: TODO */ static const PRUint8 PortugueseLangModel[] = { - 2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,3,3,3,3,0,3,2,0,3,0,3,2,0,2,3,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,3,3,2,3,2,3,2,0,3,2,3,2,2,3,2,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,2,3,2,3,2,0,3,2,3,0,0,3,3,0,0, - 3,3,3,3,3,2,2,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,3,3,0,3,2,3,3,0,3,3,2,2,2,3,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,2,3,3,3,3,3,2,3,2,2,2,2,3,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,2,3,3,2,3,3,3,0,2, - 3,3,3,3,3,3,3,3,3,2,3,3,2,2,3,3,3,2,3,3,3,3,3,3,2,3,3,3,2,3,0,3,3,2,2,3,0,3, - 3,3,3,3,3,3,2,2,2,3,2,3,2,2,3,2,2,2,2,3,3,0,2,3,0,3,2,2,3,3,2,3,3,2,3,0,0,2, - 3,3,3,3,3,3,3,2,3,3,3,3,3,2,2,2,2,2,3,3,3,0,0,3,0,3,2,3,3,2,2,3,2,3,2,2,0,3, - 3,3,3,3,3,2,3,2,2,3,2,3,2,3,2,0,2,3,0,3,3,0,2,3,2,3,2,3,2,0,2,3,3,3,2,0,0,3, - 3,3,3,2,3,3,3,2,3,3,3,3,3,2,2,0,2,2,3,3,3,3,3,3,0,3,2,3,3,0,2,3,2,3,0,3,0,2, - 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,2,3,3,2,3,3,3,3,2,3,3,0,2,0,0,2,2,0,0, - 3,3,3,3,3,2,2,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,0,3,2,3,3,0,0,3,2,3,2,2,0,3, - 3,3,3,3,3,3,2,2,3,2,2,3,3,3,0,0,2,2,3,2,3,3,2,3,0,3,2,3,2,0,2,2,3,2,0,0,0,2, - 3,3,3,2,3,3,3,2,2,3,2,3,3,2,2,2,2,2,3,2,3,0,0,2,0,3,0,2,3,0,2,3,2,2,2,0,0,0, - 3,3,3,0,3,3,0,2,0,0,0,3,2,0,0,2,0,0,0,2,3,0,0,3,0,3,0,0,2,0,2,0,0,2,0,0,0,2, - 3,3,3,2,3,3,0,2,2,2,2,3,3,2,2,0,3,2,0,0,3,0,0,3,0,2,0,3,3,0,0,2,2,2,0,0,0,2, - 3,3,3,3,3,3,3,3,3,2,2,3,3,2,2,2,3,2,2,2,3,0,0,2,0,2,2,2,2,3,0,2,2,0,2,0,0,0, - 3,3,3,3,3,3,3,2,3,2,0,3,2,0,0,0,2,2,2,3,2,0,0,2,0,3,0,2,0,0,3,3,2,2,2,2,0,0, - 0,2,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,2,2,3,2,3,3,3,3,3,3,3,3,3,3,2,2,2,2,0,0,0,2,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0, - 3,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,3,0,2,0,0,0,0,0, - 2,0,0,2,0,0,0,0,0,0,0,3,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,3,3,0,3,3,3,3,3,3,0,3,3,3,3,3,3,0,0,0,2,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,2,3,0,0,2,3,0,3,3,2,3,0,3,2,0,2,2,2,0,0,2,3,2,0,2,0,2,2,0,0,0,0,0,0,2, - 0,0,0,3,0,3,2,2,3,0,3,2,3,3,3,3,3,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,0,3,2,2,0,0,2,2,3,2,2,2,0,0,2,2,2,2,0,0,0,0,2,2,2,2,0,2,2,0,2,2,2,0,0, - 0,0,0,3,3,3,3,3,3,3,3,0,3,3,3,2,2,2,0,0,0,0,2,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,3,0,0,3,0,2,3,0,2,2,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,2,2,0,2,0,0,0,2,3,0,2,0,0,0,0,0,2,0,0,0,0,0,3,0,0,0,0,0,0,2,0,0,2,0,2, + 2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,2,3,0,3,3,1,0,3,0,3,3,0,3,0,2,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,0,3,1,3,3,3,0,3,1,3,2,3,2,0,3,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,1,1,2,3,3,2,0,3,0,3,0,0,3,0,3,0, + 3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,2,3,3,3,3,1,3,3,3,3,0,1,3,2,1,3,3,1,1,2,1,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,1,3,3,3,3,3,2,3,1,1,2,3,0,1,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,1,1,3,3,1,3,3,3,3,3,0,3,2, + 3,3,3,3,3,3,3,3,3,2,3,3,2,2,3,3,3,2,3,3,3,3,3,3,3,2,3,3,2,3,0,2,3,2,3,0,1,3, + 3,3,3,3,3,3,3,3,2,3,2,3,2,1,2,2,2,2,2,3,0,2,3,3,3,1,1,3,3,2,1,2,3,2,0,0,2,2, + 3,3,3,3,3,3,2,1,3,3,3,3,3,2,1,1,1,1,3,3,0,0,3,3,3,0,2,3,3,1,2,3,2,3,1,0,2,3, + 3,3,3,3,3,2,2,1,1,3,2,3,2,3,1,1,1,3,1,3,0,0,3,3,3,0,0,3,2,0,2,2,3,2,1,0,1,3, + 3,3,3,2,3,3,3,3,3,2,3,3,3,1,1,1,2,2,3,3,3,2,3,3,3,1,1,3,3,2,2,2,2,3,3,0,0,2, + 3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,1,1,3,2,2,3,3,3,3,0,3,3,0,1,0,1,2,0,1,0, + 3,3,3,3,3,1,2,3,3,3,3,3,3,2,3,3,3,3,3,2,1,3,3,3,3,1,1,3,3,1,1,3,1,3,2,0,1,3, + 3,3,3,3,3,3,2,2,3,1,2,3,3,3,1,1,1,1,3,2,3,1,3,2,3,0,1,3,2,1,3,2,3,2,1,0,1,2, + 3,3,3,2,3,3,3,1,2,3,1,3,3,1,2,1,1,2,3,3,0,0,3,2,3,0,0,2,3,1,1,3,1,3,1,0,2,2, + 3,3,3,1,3,3,0,1,0,0,1,3,1,0,0,1,0,0,0,2,0,0,3,3,3,0,0,1,2,1,1,1,0,2,0,0,1,1, + 3,3,3,1,3,3,1,1,2,1,1,3,3,1,1,1,3,1,0,1,0,0,3,3,3,0,0,3,2,1,0,1,1,1,1,0,1,1, + 3,3,3,3,3,3,3,3,3,2,3,3,3,1,2,1,2,2,2,1,0,1,3,2,3,1,1,3,1,3,1,2,1,1,1,0,0,1, + 3,3,3,2,3,2,3,1,3,2,1,3,2,1,1,1,1,2,1,2,0,1,2,2,3,1,1,2,0,1,3,3,2,1,1,0,1,0, + 0,3,3,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,3,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,3,0,0,0,0,1,0,0,1,0,0,3,0,2,0,0,0,0,0, + 1,0,1,0,1,1,0,0,0,0,0,3,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,2,2,3,2,3,3,3,3,3,3,2,3,3,3,2,2,0,0,0,0,1,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0, + 3,1,3,3,0,3,3,3,3,3,3,1,3,3,3,3,3,3,0,0,1,2,0,0,0,0,3,0,0,0,0,0,0,0,1,0,0,0, + 1,0,0,3,1,3,1,2,3,1,3,3,3,3,3,3,3,3,0,0,0,2,0,0,0,3,1,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,1,3,0,1,0,3,1,3,3,1,3,0,3,2,1,1,2,0,0,3,2,1,3,0,2,0,0,1,1,0,0,0,0,1,1, + 3,3,3,1,3,1,1,0,0,1,1,2,0,1,0,0,1,1,1,2,0,0,2,1,1,0,1,1,1,0,2,1,1,1,1,0,1,2, + 1,1,0,3,3,3,3,3,3,3,3,1,3,3,3,2,3,2,1,0,0,1,0,0,0,3,0,0,0,0,0,0,0,0,1,0,0,0, + 1,1,0,3,1,1,3,0,1,3,0,3,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,1,1,1,0,0,0,0,0,3,0,1,0,1,0,1,0,1,0,0,1,0,3,0,0,2,0,0,1,0,1,0,1,0,0,1, 0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,2,3,3,2,3,3,2,3,2,3,2,3,2,2,0,2,2,2,0,0,0,0,0,2,0,2,0,0,2,0,0,0,0,2,2,0,0, - 0,0,0,3,0,2,3,3,2,3,3,0,3,2,0,2,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,3,0,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,2,3,2,2,2,3,2,2,2,2,0,2,2,2,2,2,0,0,0,0,0,0,0,2,0,0,0,0,2,0,0,2,2,0,0, - 3,3,3,2,3,2,2,0,0,3,2,2,2,0,2,0,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,2,0,0,2,2,0,0, - 0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,2,0,2,3,2,2,3,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,1,2,3,2,2,2,2,1,3,2,2,1,1,1,1,0,0,0,1,0,0,2,1,0,0,1,0,0,0,0,1,0,1,0, + 0,0,1,3,0,2,3,3,2,3,3,0,3,2,1,2,1,3,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0, + 1,0,0,0,0,0,3,0,1,3,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,2,3,2,2,1,1,3,1,2,2,1,1,1,1,0,1,0,0,0,0,0,1,0,0,1,0,1,0,2,0,0,1,0,1,0, + 0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,2,3,1,2,1,2,1,1,1,1,0,1,0,0,1,2,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,1,0, + 0,0,1,2,0,1,3,1,1,3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, }; @@ -217,7 +218,7 @@ const SequenceModel Iso_8859_15PortugueseModel = Iso_8859_15_CharToOrderMap, PortugueseLangModel, 38, - (float)0.9952990712503466, + (float)0.999000686612128, PR_TRUE, "ISO-8859-15", "pt" @@ -228,7 +229,7 @@ const SequenceModel Iso_8859_1PortugueseModel = Iso_8859_1_CharToOrderMap, PortugueseLangModel, 38, - (float)0.9952990712503466, + (float)0.999000686612128, PR_TRUE, "ISO-8859-1", "pt" @@ -239,7 +240,7 @@ const SequenceModel Windows_1252PortugueseModel = Windows_1252_CharToOrderMap, PortugueseLangModel, 38, - (float)0.9952990712503466, + (float)0.999000686612128, PR_TRUE, "WINDOWS-1252", "pt" @@ -250,7 +251,7 @@ const SequenceModel Iso_8859_9PortugueseModel = Iso_8859_9_CharToOrderMap, PortugueseLangModel, 38, - (float)0.9952990712503466, + (float)0.999000686612128, PR_TRUE, "ISO-8859-9", "pt" @@ -263,5 +264,8 @@ const LanguageModel PortugueseModel = 76, PortugueseLangModel, 38, - (float)0.9952990712503466, + 3, + (float)0.33399315313253364, + 21, + (float)0.034949037152514996, }; diff --git a/src/LangModels/LangRomanianModel.cpp b/src/LangModels/LangRomanianModel.cpp index 430f51d..4b22a2e 100644 --- a/src/LangModels/LangRomanianModel.cpp +++ b/src/LangModels/LangRomanianModel.cpp @@ -42,7 +42,7 @@ /** * Generated by BuildLangModel.py - * On: 2021-03-16 20:04:01.199893 + * On: 2022-12-14 18:14:02.581406 **/ /* Character Mapping Table: @@ -68,18 +68,18 @@ static const unsigned char Iso_8859_2_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 2, 17, 9, 11, 0, 16, 15, 23, 1, 26, 27, 6, 12, 4, 8, /* 4X */ - 13, 32, 3, 10, 5, 7, 20, 29, 25, 28, 22,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 2, 17, 9, 11, 0, 16, 15, 23, 1, 26, 27, 6, 12, 4, 8, /* 6X */ - 13, 32, 3, 10, 5, 7, 20, 29, 25, 28, 22,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 2, 16, 9, 11, 1, 18, 15, 23, 0, 25, 26, 6, 12, 4, 8, /* 4X */ + 13, 32, 3, 10, 5, 7, 17, 29, 27, 28, 22,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 2, 16, 9, 11, 1, 18, 15, 23, 0, 25, 26, 6, 12, 4, 8, /* 6X */ + 13, 32, 3, 10, 5, 7, 17, 29, 27, 28, 22,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM, 63,SYM, 45,SYM, 64, 58,SYM,SYM, 34, 65, 66, 67,SYM, 35, 59, /* AX */ - SYM, 68,SYM, 45,SYM, 69, 58,SYM,SYM, 34, 70, 71, 72,SYM, 35, 59, /* BX */ - 73, 30, 24, 14, 36, 74, 37, 42, 33, 31, 75, 54, 50, 39, 21, 76, /* CX */ - 46, 51, 77, 38, 53, 47, 40,SYM, 52, 78, 44, 62, 41, 55, 79, 57, /* DX */ - 80, 30, 24, 14, 36, 81, 37, 42, 33, 31, 82, 54, 50, 39, 21, 83, /* EX */ - 46, 51, 84, 38, 53, 47, 40,SYM, 52, 85, 44, 62, 41, 55, 86,SYM, /* FX */ + SYM, 69,SYM, 43,SYM, 48, 64,SYM,SYM, 33, 73, 72, 68,SYM, 39, 61, /* AX */ + SYM, 69,SYM, 43,SYM, 48, 64,SYM,SYM, 33, 74, 72, 68,SYM, 39, 61, /* BX */ + 75, 31, 24, 14, 41, 76, 40, 44, 34, 30, 67, 56, 54, 36, 21, 60, /* CX */ + 53, 49, 66, 35, 51, 52, 37,SYM, 50, 65, 45, 77, 38, 46, 78, 55, /* DX */ + 79, 31, 24, 14, 41, 80, 40, 44, 34, 30, 67, 56, 54, 36, 21, 60, /* EX */ + 53, 49, 66, 35, 51, 52, 37,SYM, 50, 65, 45, 81, 38, 46, 82,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -89,18 +89,18 @@ static const unsigned char Iso_8859_16_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 2, 17, 9, 11, 0, 16, 15, 23, 1, 26, 27, 6, 12, 4, 8, /* 4X */ - 13, 32, 3, 10, 5, 7, 20, 29, 25, 28, 22,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 2, 17, 9, 11, 0, 16, 15, 23, 1, 26, 27, 6, 12, 4, 8, /* 6X */ - 13, 32, 3, 10, 5, 7, 20, 29, 25, 28, 22,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 2, 16, 9, 11, 1, 18, 15, 23, 0, 25, 26, 6, 12, 4, 8, /* 4X */ + 13, 32, 3, 10, 5, 7, 17, 29, 27, 28, 22,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 2, 16, 9, 11, 1, 18, 15, 23, 0, 25, 26, 6, 12, 4, 8, /* 6X */ + 13, 32, 3, 10, 5, 7, 17, 29, 27, 28, 22,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM, 87, 88, 45,SYM,SYM, 34,SYM, 34,SYM, 19,SYM, 89,SYM, 90, 59, /* AX */ - SYM,SYM, 33, 45, 35,SYM,SYM,SYM, 35, 33, 19,SYM, 91, 92, 93, 59, /* BX */ - 60, 30, 24, 14, 36, 37, 56, 42, 43, 31, 94, 54, 48, 39, 21, 49, /* CX */ - 46, 51, 61, 38, 53, 47, 40, 58, 62, 95, 44, 96, 41, 97, 18, 57, /* DX */ - 60, 30, 24, 14, 36, 37, 56, 42, 43, 31, 98, 54, 48, 39, 21, 49, /* EX */ - 46, 51, 61, 38, 53, 47, 40, 58, 62, 99, 44,100, 41,101, 18,102, /* FX */ + SYM, 69, 69, 43,SYM,SYM, 33,SYM, 33,SYM, 19,SYM, 68,SYM, 68, 61, /* AX */ + SYM,SYM, 34, 43, 39,SYM,SYM,SYM, 39, 34, 19,SYM, 83, 84, 71, 61, /* BX */ + 47, 31, 24, 14, 41, 40, 58, 44, 42, 30, 57, 56, 63, 36, 21, 62, /* CX */ + 53, 49, 59, 35, 51, 52, 37, 64, 85, 86, 45, 70, 38, 67, 20, 55, /* DX */ + 47, 31, 24, 14, 41, 40, 58, 44, 42, 30, 57, 56, 63, 36, 21, 62, /* EX */ + 53, 49, 59, 35, 51, 52, 37, 64, 87, 88, 45, 70, 38, 67, 20, 71, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -110,18 +110,18 @@ static const unsigned char Windows_1250_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 2, 17, 9, 11, 0, 16, 15, 23, 1, 26, 27, 6, 12, 4, 8, /* 4X */ - 13, 32, 3, 10, 5, 7, 20, 29, 25, 28, 22,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 2, 17, 9, 11, 0, 16, 15, 23, 1, 26, 27, 6, 12, 4, 8, /* 6X */ - 13, 32, 3, 10, 5, 7, 20, 29, 25, 28, 22,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM, 34,SYM, 58,103, 35,104, /* 8X */ - ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 34,SYM, 58,105, 35,106, /* 9X */ - SYM,SYM,SYM, 45,SYM,107,SYM,SYM,SYM,SYM,108,SYM,SYM,SYM,SYM, 59, /* AX */ - SYM,SYM,SYM, 45,SYM,SYM,SYM,SYM,SYM,109,110,SYM,111,SYM,112, 59, /* BX */ - 113, 30, 24, 14, 36,114, 37, 42, 33, 31,115, 54, 50, 39, 21,116, /* CX */ - 46, 51,117, 38, 53, 47, 40,SYM, 52,118, 44, 62, 41, 55,119, 57, /* DX */ - 120, 30, 24, 14, 36,121, 37, 42, 33, 31,122, 54, 50, 39, 21,123, /* EX */ - 46, 51,124, 38, 53, 47, 40,SYM, 52,125, 44, 62, 41, 55,126,SYM, /* FX */ + SYM, 2, 16, 9, 11, 1, 18, 15, 23, 0, 25, 26, 6, 12, 4, 8, /* 4X */ + 13, 32, 3, 10, 5, 7, 17, 29, 27, 28, 22,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 2, 16, 9, 11, 1, 18, 15, 23, 0, 25, 26, 6, 12, 4, 8, /* 6X */ + 13, 32, 3, 10, 5, 7, 17, 29, 27, 28, 22,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM, 33,SYM, 64, 72, 39, 68, /* 8X */ + ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 33,SYM, 64, 72, 39, 68, /* 9X */ + SYM,SYM,SYM, 43,SYM, 69,SYM,SYM,SYM,SYM, 89,SYM,SYM,SYM,SYM, 61, /* AX */ + SYM,SYM,SYM, 43,SYM,SYM,SYM,SYM,SYM, 69, 90,SYM, 48,SYM, 48, 61, /* BX */ + 91, 31, 24, 14, 41, 92, 40, 44, 34, 30, 67, 56, 54, 36, 21, 60, /* CX */ + 53, 49, 66, 35, 51, 52, 37,SYM, 50, 65, 45, 93, 38, 46, 94, 55, /* DX */ + 95, 31, 24, 14, 41, 96, 40, 44, 34, 30, 67, 56, 54, 36, 21, 60, /* EX */ + 53, 49, 66, 35, 51, 52, 37,SYM, 50, 65, 45, 97, 38, 46, 98,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -131,78 +131,79 @@ static const unsigned char Ibm852_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 2, 17, 9, 11, 0, 16, 15, 23, 1, 26, 27, 6, 12, 4, 8, /* 4X */ - 13, 32, 3, 10, 5, 7, 20, 29, 25, 28, 22,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 2, 17, 9, 11, 0, 16, 15, 23, 1, 26, 27, 6, 12, 4, 8, /* 6X */ - 13, 32, 3, 10, 5, 7, 20, 29, 25, 28, 22,SYM,SYM,SYM,SYM,CTR, /* 7X */ - 42, 41, 31, 24, 36,127, 37, 42, 45, 54, 47, 47, 21,128, 36, 37, /* 8X */ - 31,129,130, 53, 40,131,132, 58, 58, 40, 41,133,134, 45,SYM, 33, /* 9X */ - 30, 39, 38, 44,135,136, 35, 35,137,138,SYM,139, 33,140,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 30, 24, 50,141,SYM,SYM,SYM,SYM, 59, 59,SYM, /* BX */ + SYM, 2, 16, 9, 11, 1, 18, 15, 23, 0, 25, 26, 6, 12, 4, 8, /* 4X */ + 13, 32, 3, 10, 5, 7, 17, 29, 27, 28, 22,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 2, 16, 9, 11, 1, 18, 15, 23, 0, 25, 26, 6, 12, 4, 8, /* 6X */ + 13, 32, 3, 10, 5, 7, 17, 29, 27, 28, 22,SYM,SYM,SYM,SYM,CTR, /* 7X */ + 44, 38, 30, 24, 41, 65, 40, 44, 43, 56, 52, 52, 21, 68, 41, 40, /* 8X */ + 30, 99,100, 51, 37, 48, 48, 64, 64, 37, 38, 72, 72, 43,SYM, 34, /* 9X */ + 31, 36, 35, 45, 69, 69, 39, 39, 67, 67,SYM, 68, 34,101,SYM,SYM, /* AX */ + SYM,SYM,SYM,SYM,SYM, 31, 24, 54,102,SYM,SYM,SYM,SYM, 61, 61,SYM, /* BX */ SYM,SYM,SYM,SYM,SYM,SYM, 14, 14,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* CX */ - 46, 46,142, 54,143,144, 39, 21, 50,SYM,SYM,SYM,SYM,145,146,SYM, /* DX */ - 38, 57, 53, 51, 51,147, 34, 34,148, 44,149, 62, 55, 55,150,SYM, /* EX */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 62, 52, 52,SYM,SYM, /* FX */ + 53, 53, 60, 56, 60, 66, 36, 21, 54,SYM,SYM,SYM,SYM,103, 65,SYM, /* DX */ + 35, 55, 51, 49, 49, 66, 33, 33,104, 45,105,106, 46, 46,107,SYM, /* EX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,108, 50, 50,SYM,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ static const int Unicode_Char_size = 66; static const unsigned int Unicode_CharOrder[] = { - 65, 2, 66, 17, 67, 9, 68, 11, 69, 0, 70, 16, 71, 15, 72, 23, - 73, 1, 74, 26, 75, 27, 76, 6, 77, 12, 78, 4, 79, 8, 80, 13, - 81, 32, 82, 3, 83, 10, 84, 5, 85, 7, 86, 20, 87, 29, 88, 25, - 89, 28, 90, 22, 97, 2, 98, 17, 99, 9, 100, 11, 101, 0,102, 16, - 103, 15, 104, 23, 105, 1, 106, 26, 107, 27, 108, 6, 109, 12,110, 4, - 111, 8, 112, 13, 113, 32, 114, 3, 115, 10, 116, 5, 117, 7,118, 20, - 119, 29, 120, 25, 121, 28, 122, 22, 193, 30, 194, 24, 201, 31,206, 21, - 225, 30, 226, 24, 233, 31, 238, 21, 258, 14, 259, 14, 536, 19,537, 19, - 538, 18, 539, 18, + 65, 2, 66, 16, 67, 9, 68, 11, 69, 1, 70, 18, 71, 15, 72, 23, + 73, 0, 74, 25, 75, 26, 76, 6, 77, 12, 78, 4, 79, 8, 80, 13, + 81, 32, 82, 3, 83, 10, 84, 5, 85, 7, 86, 17, 87, 29, 88, 27, + 89, 28, 90, 22, 97, 2, 98, 16, 99, 9, 100, 11, 101, 1,102, 18, + 103, 15, 104, 23, 105, 0, 106, 25, 107, 26, 108, 6, 109, 12,110, 4, + 111, 8, 112, 13, 113, 32, 114, 3, 115, 10, 116, 5, 117, 7,118, 17, + 119, 29, 120, 27, 121, 28, 122, 22, 193, 31, 194, 24, 201, 30,206, 21, + 225, 31, 226, 24, 233, 30, 238, 21, 258, 14, 259, 14, 536, 19,537, 19, + 538, 20, 539, 20, }; /* Model Table: - * Total sequences: 1066 - * First 512 sequences: 0.9975318123681904 - * Next 512 sequences (512-1024): 0.002424831763747681 - * Rest: 4.3355868061878584e-05 + * Total considered sequences: 1337 / 1089 + * - Positive sequences: first 478 (0.9950281325668789) + * - Probable sequences: next 301 (779-478) (0.003975199459074541) + * - Neutral sequences: last 310 (0.0009966679740465167) + * - Negative sequences: -248 (off-ratio) * Negative sequences: TODO */ static const PRUint8 RomanianLangModel[] = { - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,0,3,3,2,3,3,3,2,2,2,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,1,3,3,0,3,3,3,3,3,0,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,0,2,3,2,3,3,3,0,2,2,3,3,2,3,0, - 3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,3,2,3,0,3,3,3,2,2,2,0, - 3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,2,3,3,3,2,2,0,2,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,3,3,3,3,2,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,2,2,3,2,3,2,2,3,3,2,2,3,3,2,0,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,2,3,3,3,0,2,3,3,3,2,2,2, - 3,3,3,3,3,2,3,3,3,2,3,3,3,2,3,3,3,3,0,0,3,0,2,2,3,0,2,2,3,3,3,2,0, - 3,3,3,2,3,3,3,3,3,3,3,2,3,3,3,2,2,3,3,2,2,2,2,2,3,2,2,0,3,2,2,2,0, - 3,3,3,3,2,3,3,3,3,2,3,2,2,3,3,0,0,2,3,0,0,2,0,3,3,0,2,2,2,0,2,2,0, - 2,3,1,3,3,3,3,3,0,3,3,3,3,3,0,3,1,3,3,3,3,2,3,0,0,0,2,2,0,0,0,0,0, - 3,3,3,3,3,2,3,3,3,2,3,3,3,2,3,3,2,3,0,0,3,1,2,3,3,0,1,2,3,2,1,2,0, - 3,3,3,3,2,3,3,3,3,2,2,2,3,2,3,3,3,1,0,0,0,0,0,2,3,0,2,1,2,2,2,2,0, - 3,3,3,3,3,2,3,3,3,3,3,3,2,2,3,2,2,3,3,2,2,2,2,2,3,0,2,2,3,2,2,2,0, - 3,3,3,0,0,0,0,3,2,2,2,0,0,0,3,0,0,0,0,0,2,2,0,0,2,0,0,2,0,0,0,0,0, - 3,3,3,0,3,3,2,3,3,3,0,0,2,2,2,0,0,0,0,0,2,0,0,0,2,0,0,2,0,0,0,0,0, - 3,3,3,3,3,2,3,3,3,2,2,3,2,1,3,2,0,2,0,0,0,2,2,2,3,0,2,2,2,0,2,0,0, - 0,3,0,2,3,0,3,0,0,2,0,0,3,0,0,2,0,0,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,2,2,2,2,3,3,0,2,3,2,2,3,2,0,3,0,0,3,0,3,2,3,0,2,2,2,2,2,2,2, - 3,3,3,3,3,3,3,3,3,2,3,3,2,2,3,0,2,3,0,0,2,0,2,0,2,0,2,2,3,2,2,2,0, - 0,3,0,3,3,3,3,3,0,1,2,0,3,0,0,0,0,0,2,3,0,0,2,0,0,0,0,0,0,0,0,0,0, - 3,3,3,2,2,3,0,3,3,3,2,2,2,3,3,0,3,2,0,0,3,0,0,0,2,3,0,0,3,2,0,0,0, - 3,3,3,2,2,2,3,3,3,2,2,2,2,0,2,0,0,2,0,0,1,0,2,0,2,0,0,2,0,0,2,0,0, - 3,3,3,3,2,2,3,3,3,2,3,1,3,2,2,2,2,2,1,2,2,0,2,3,2,0,2,2,3,2,2,2,0, - 3,2,3,3,3,2,3,2,3,3,3,3,3,2,0,2,2,2,0,0,2,0,2,2,0,2,2,2,0,2,2,0,0, - 3,3,3,3,3,2,3,2,3,2,3,2,2,2,0,0,2,2,0,0,0,0,0,3,0,0,0,2,2,3,0,0,0, - 2,3,0,2,3,2,2,0,0,2,2,2,2,2,0,2,0,0,0,0,2,0,2,2,0,0,0,2,2,0,0,0,0, - 0,3,2,2,1,2,2,0,2,2,2,2,2,2,0,2,0,2,0,0,2,0,2,0,0,0,2,2,0,0,0,0,0, - 0,2,2,0,0,2,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,0,3,3,1,3,3,3,2,2,2,1,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,2,3,3,2,3,3,3,3,3,0,1,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,1,3,3,0,3,3,3,3,3,0,0,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,2,3,2,3,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,3,3,2,3,3,3,3,3,1,3,3,2,2,2, + 3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,3,3,3,0,0,2,3,3,3,2,2,0,3,2,2,1,0, + 3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,1,2,3,3,3,3,1,3,2,2,2,1, + 3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,2,3,3,3,2,2,2,1,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,1,3,3,0,3,3,3,3,3,1,1,1, + 3,3,3,3,3,3,3,3,3,3,3,3,2,0,3,2,2,3,2,2,3,1,2,3,3,1,3,1,2,1,2,1,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,0,3,1,3,3,3,1,3,0,3,2,2,2,2, + 3,3,3,3,3,2,3,3,3,2,3,3,3,2,3,3,3,3,2,1,0,1,2,2,2,3,2,0,3,3,2,2,0, + 3,3,3,2,3,2,2,3,3,3,3,2,3,3,3,2,3,2,3,0,2,2,1,2,3,1,1,1,3,2,2,2,0, + 3,3,3,3,2,3,3,3,3,2,3,2,2,3,3,1,1,1,2,1,3,1,2,3,3,1,2,0,2,0,2,2,0, + 3,1,1,3,3,3,3,3,1,3,3,3,3,3,0,3,3,3,1,3,3,1,3,0,0,2,1,0,0,0,0,0,0, + 3,3,3,3,3,2,3,3,3,1,2,3,3,1,3,3,2,3,1,0,0,1,1,3,3,1,1,1,3,2,2,1,1, + 3,3,3,3,3,2,3,3,3,3,3,3,2,1,3,2,3,2,2,1,3,2,2,2,2,2,1,0,3,2,2,2,0, + 3,3,3,3,2,1,3,3,3,2,3,2,1,1,3,1,1,1,0,1,0,1,1,1,3,1,2,0,2,0,2,2,0, + 3,3,3,3,1,3,3,3,3,2,2,0,2,0,3,2,1,1,3,0,0,0,2,1,3,1,1,0,2,2,2,1,0, + 3,3,3,1,3,3,1,3,3,3,0,1,2,0,2,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0, + 3,3,3,0,0,0,0,3,2,2,1,0,0,0,3,0,0,1,0,0,0,2,0,1,2,0,2,0,0,0,0,0,0, + 3,0,0,1,3,1,3,1,0,1,1,1,3,1,0,1,1,0,0,3,1,0,1,1,0,1,1,0,0,0,0,0,0, + 3,3,3,1,2,2,2,3,3,1,2,2,2,1,3,2,3,3,0,0,0,1,3,2,3,1,2,0,2,1,1,2,1, + 3,3,3,3,3,3,3,3,3,2,2,2,3,1,2,1,2,2,2,0,0,1,2,1,1,1,2,0,3,2,2,2,0, + 3,0,0,3,3,3,3,3,0,1,2,0,3,1,0,0,1,0,0,3,2,0,1,1,0,0,0,0,0,0,0,0,0, + 3,3,3,2,2,1,3,3,3,1,1,2,2,1,2,0,2,1,1,0,0,0,1,1,1,1,2,0,1,0,1,2,0, + 3,3,3,3,2,2,3,3,3,1,3,1,3,2,1,1,1,2,2,0,0,1,1,3,1,1,2,0,3,2,1,2,0, + 3,3,3,1,1,3,1,3,2,3,0,1,1,3,2,0,1,3,2,0,0,0,0,2,1,0,0,3,2,1,0,0,0, + 2,3,3,3,3,2,3,3,3,2,3,3,3,2,0,2,3,1,1,0,0,0,2,2,0,1,2,1,1,2,1,0,1, + 3,3,3,2,2,1,2,2,3,1,3,1,1,2,0,0,1,0,2,0,0,0,0,3,0,0,2,0,2,2,0,0,0, + 1,2,2,2,2,2,2,1,1,2,2,2,2,2,0,2,1,2,1,0,0,0,1,1,1,1,1,1,1,0,0,0,0, + 0,0,0,2,3,1,2,0,0,2,2,2,2,1,0,1,2,1,0,0,0,0,2,2,0,0,2,1,0,0,0,0,0, + 2,0,2,0,0,1,0,3,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1, }; @@ -211,7 +212,7 @@ const SequenceModel Iso_8859_2RomanianModel = Iso_8859_2_CharToOrderMap, RomanianLangModel, 33, - (float)0.9975318123681904, + (float)0.9990033320259535, PR_TRUE, "ISO-8859-2", "ro" @@ -222,7 +223,7 @@ const SequenceModel Iso_8859_16RomanianModel = Iso_8859_16_CharToOrderMap, RomanianLangModel, 33, - (float)0.9975318123681904, + (float)0.9990033320259535, PR_TRUE, "ISO-8859-16", "ro" @@ -233,7 +234,7 @@ const SequenceModel Windows_1250RomanianModel = Windows_1250_CharToOrderMap, RomanianLangModel, 33, - (float)0.9975318123681904, + (float)0.9990033320259535, PR_TRUE, "WINDOWS-1250", "ro" @@ -244,7 +245,7 @@ const SequenceModel Ibm852RomanianModel = Ibm852_CharToOrderMap, RomanianLangModel, 33, - (float)0.9975318123681904, + (float)0.9990033320259535, PR_TRUE, "IBM852", "ro" @@ -257,5 +258,8 @@ const LanguageModel RomanianModel = 66, RomanianLangModel, 33, - (float)0.9975318123681904, + 3, + (float)0.32848477086438155, + 20, + (float)0.03787956408345752, }; diff --git a/src/LangModels/LangSlovakModel.cpp b/src/LangModels/LangSlovakModel.cpp index 45859f0..59a933f 100644 --- a/src/LangModels/LangSlovakModel.cpp +++ b/src/LangModels/LangSlovakModel.cpp @@ -42,7 +42,7 @@ /** * Generated by BuildLangModel.py - * On: 2021-03-21 13:00:32.554155 + * On: 2022-12-14 18:25:49.974197 **/ /* Character Mapping Table: @@ -68,18 +68,18 @@ static const unsigned char Iso_8859_2_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 19, 15, 12, 2, 28, 27, 17, 5, 18, 7, 9, 11, 3, 1, /* 4X */ - 14, 40, 4, 6, 8, 13, 10, 34, 37, 21, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 19, 15, 12, 2, 28, 27, 17, 5, 18, 7, 9, 11, 3, 1, /* 6X */ - 14, 40, 4, 6, 8, 13, 10, 34, 37, 21, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 0, 20, 15, 14, 2, 29, 27, 16, 4, 18, 8, 10, 11, 3, 1, /* 4X */ + 13, 40, 5, 6, 7, 12, 9, 34, 37, 21, 19,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 20, 15, 14, 2, 29, 27, 16, 4, 18, 8, 10, 11, 3, 1, /* 6X */ + 13, 40, 5, 6, 7, 12, 9, 34, 37, 21, 19,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM, 70,SYM, 50,SYM, 31, 58,SYM,SYM, 29, 54, 33, 71,SYM, 30, 63, /* AX */ - SYM, 72,SYM, 50,SYM, 31, 58,SYM,SYM, 29, 54, 33, 73,SYM, 30, 63, /* BX */ - 48, 16, 66, 51, 36, 46, 47, 49, 24, 25, 56, 41, 44, 23, 61, 39, /* CX */ - 57, 53, 38, 32, 35, 55, 45,SYM, 42, 52, 26, 74, 43, 22, 68, 59, /* DX */ - 48, 16, 66, 51, 36, 46, 47, 49, 24, 25, 56, 41, 44, 23, 61, 39, /* EX */ - 57, 53, 38, 32, 35, 55, 45,SYM, 42, 52, 26, 75, 43, 22, 68,SYM, /* FX */ + SYM, 69,SYM, 47,SYM, 31, 54,SYM,SYM, 28, 63, 33, 70,SYM, 30, 58, /* AX */ + SYM, 71,SYM, 47,SYM, 31, 54,SYM,SYM, 28, 63, 33, 72,SYM, 30, 58, /* BX */ + 46, 17, 66, 52, 36, 42, 49, 50, 24, 25, 57, 41, 45, 23, 59, 39, /* CX */ + 62, 53, 38, 32, 35, 60, 48,SYM, 43, 51, 26, 67, 44, 22, 73, 56, /* DX */ + 46, 17, 66, 52, 36, 42, 49, 50, 24, 25, 57, 41, 45, 23, 59, 39, /* EX */ + 62, 53, 38, 32, 35, 60, 48,SYM, 43, 51, 26, 67, 44, 22, 74,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -89,18 +89,18 @@ static const unsigned char Windows_1250_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 19, 15, 12, 2, 28, 27, 17, 5, 18, 7, 9, 11, 3, 1, /* 4X */ - 14, 40, 4, 6, 8, 13, 10, 34, 37, 21, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 19, 15, 12, 2, 28, 27, 17, 5, 18, 7, 9, 11, 3, 1, /* 6X */ - 14, 40, 4, 6, 8, 13, 10, 34, 37, 21, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM, 29,SYM, 58, 33, 30, 76, /* 8X */ - ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 29,SYM, 58, 33, 30, 77, /* 9X */ - SYM,SYM,SYM, 50,SYM, 78,SYM,SYM,SYM,SYM, 54,SYM,SYM,SYM,SYM, 63, /* AX */ - SYM,SYM,SYM, 50,SYM,SYM,SYM,SYM,SYM, 79, 54,SYM, 31,SYM, 31, 63, /* BX */ - 48, 16, 66, 51, 36, 46, 47, 49, 24, 25, 56, 41, 44, 23, 61, 39, /* CX */ - 57, 53, 38, 32, 35, 55, 45,SYM, 42, 52, 26, 80, 43, 22, 68, 59, /* DX */ - 48, 16, 66, 51, 36, 46, 47, 49, 24, 25, 56, 41, 44, 23, 61, 39, /* EX */ - 57, 53, 38, 32, 35, 55, 45,SYM, 42, 52, 26, 81, 43, 22, 68,SYM, /* FX */ + SYM, 0, 20, 15, 14, 2, 29, 27, 16, 4, 18, 8, 10, 11, 3, 1, /* 4X */ + 13, 40, 5, 6, 7, 12, 9, 34, 37, 21, 19,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 20, 15, 14, 2, 29, 27, 16, 4, 18, 8, 10, 11, 3, 1, /* 6X */ + 13, 40, 5, 6, 7, 12, 9, 34, 37, 21, 19,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM, 28,SYM, 54, 33, 30, 75, /* 8X */ + ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 28,SYM, 54, 33, 30, 76, /* 9X */ + SYM,SYM,SYM, 47,SYM, 77,SYM,SYM,SYM,SYM, 63,SYM,SYM,SYM,SYM, 58, /* AX */ + SYM,SYM,SYM, 47,SYM,SYM,SYM,SYM,SYM, 78, 63,SYM, 31,SYM, 31, 58, /* BX */ + 46, 17, 66, 52, 36, 42, 49, 50, 24, 25, 57, 41, 45, 23, 59, 39, /* CX */ + 62, 53, 38, 32, 35, 60, 48,SYM, 43, 51, 26, 67, 44, 22, 79, 56, /* DX */ + 46, 17, 66, 52, 36, 42, 49, 50, 24, 25, 57, 41, 45, 23, 59, 39, /* EX */ + 62, 53, 38, 32, 35, 60, 48,SYM, 43, 51, 26, 67, 44, 22, 80,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -110,18 +110,18 @@ static const unsigned char Ibm852_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 19, 15, 12, 2, 28, 27, 17, 5, 18, 7, 9, 11, 3, 1, /* 4X */ - 14, 40, 4, 6, 8, 13, 10, 34, 37, 21, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 19, 15, 12, 2, 28, 27, 17, 5, 18, 7, 9, 11, 3, 1, /* 6X */ - 14, 40, 4, 6, 8, 13, 10, 34, 37, 21, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */ - 49, 43, 25, 66, 36, 52, 47, 49, 50, 41, 55, 55, 61, 82, 36, 47, /* 8X */ - 25, 46, 46, 35, 45, 31, 31, 58, 58, 45, 43, 33, 33, 50,SYM, 24, /* 9X */ - 16, 23, 32, 26, 83, 84, 30, 30, 56, 56,SYM, 85, 24, 54,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 16, 66, 44, 54,SYM,SYM,SYM,SYM, 63, 63,SYM, /* BX */ - SYM,SYM,SYM,SYM,SYM,SYM, 51, 51,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* CX */ - 57, 57, 39, 41, 39, 38, 23, 61, 44,SYM,SYM,SYM,SYM, 68, 52,SYM, /* DX */ - 32, 59, 35, 53, 53, 38, 29, 29, 48, 26, 48, 86, 22, 22, 68,SYM, /* EX */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 87, 42, 42,SYM,SYM, /* FX */ + SYM, 0, 20, 15, 14, 2, 29, 27, 16, 4, 18, 8, 10, 11, 3, 1, /* 4X */ + 13, 40, 5, 6, 7, 12, 9, 34, 37, 21, 19,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 20, 15, 14, 2, 29, 27, 16, 4, 18, 8, 10, 11, 3, 1, /* 6X */ + 13, 40, 5, 6, 7, 12, 9, 34, 37, 21, 19,SYM,SYM,SYM,SYM,CTR, /* 7X */ + 50, 44, 25, 66, 36, 51, 49, 50, 47, 41, 60, 60, 59, 81, 36, 49, /* 8X */ + 25, 42, 42, 35, 48, 31, 31, 54, 54, 48, 44, 33, 33, 47,SYM, 24, /* 9X */ + 17, 23, 32, 26, 82, 83, 30, 30, 57, 57,SYM, 84, 24, 63,SYM,SYM, /* AX */ + SYM,SYM,SYM,SYM,SYM, 17, 66, 45, 63,SYM,SYM,SYM,SYM, 58, 58,SYM, /* BX */ + SYM,SYM,SYM,SYM,SYM,SYM, 52, 52,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* CX */ + 62, 62, 39, 41, 39, 38, 23, 59, 45,SYM,SYM,SYM,SYM, 85, 51,SYM, /* DX */ + 32, 56, 35, 53, 53, 38, 28, 28, 46, 26, 46, 67, 22, 22, 86,SYM, /* EX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 67, 43, 43,SYM,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -131,148 +131,143 @@ static const unsigned char Mac_Centraleurope_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 19, 15, 12, 2, 28, 27, 17, 5, 18, 7, 9, 11, 3, 1, /* 4X */ - 14, 40, 4, 6, 8, 13, 10, 34, 37, 21, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 19, 15, 12, 2, 28, 27, 17, 5, 18, 7, 9, 11, 3, 1, /* 6X */ - 14, 40, 4, 6, 8, 13, 10, 34, 37, 21, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */ - 36, 69, 69, 25, 88, 45, 43, 16, 89, 24, 36, 24, 47, 47, 25, 90, /* 8X */ - 91, 39, 23, 39, 67, 67, 92, 32, 93, 35, 45, 60, 26, 44, 44, 43, /* 9X */ - SYM,SYM, 56,SYM,SYM,SYM,SYM, 59,SYM,SYM,SYM, 56,SYM,SYM, 94, 95, /* AX */ - 96, 64,SYM,SYM, 64, 97,SYM,SYM, 50, 98, 99, 31, 31, 46, 46,100, /* BX */ - 101, 53,SYM,SYM, 53, 38,SYM,SYM,SYM,SYM,SYM, 38, 55, 60, 55, 62, /* CX */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 62, 48, 48, 42,SYM,SYM, 42,102, /* DX */ - 103, 29,SYM,SYM, 29, 58, 58, 16, 33, 33, 23, 30, 30, 65, 32, 35, /* EX */ - 65, 52, 26, 52,104,105,106,107, 22, 22,108, 63, 50, 63,109,SYM, /* FX */ + SYM, 0, 20, 15, 14, 2, 29, 27, 16, 4, 18, 8, 10, 11, 3, 1, /* 4X */ + 13, 40, 5, 6, 7, 12, 9, 34, 37, 21, 19,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 20, 15, 14, 2, 29, 27, 16, 4, 18, 8, 10, 11, 3, 1, /* 6X */ + 13, 40, 5, 6, 7, 12, 9, 34, 37, 21, 19,SYM,SYM,SYM,SYM,CTR, /* 7X */ + 36, 61, 61, 25, 87, 48, 44, 17, 88, 24, 36, 24, 49, 49, 25, 89, /* 8X */ + 90, 39, 23, 39, 91, 92, 93, 32, 94, 35, 48, 64, 26, 45, 45, 44, /* 9X */ + SYM,SYM, 57,SYM,SYM,SYM,SYM, 56,SYM,SYM,SYM, 57,SYM,SYM, 95, 96, /* AX */ + 97, 65,SYM,SYM, 65, 98,SYM,SYM, 47, 68, 68, 31, 31, 42, 42, 99, /* BX */ + 100, 53,SYM,SYM, 53, 38,SYM,SYM,SYM,SYM,SYM, 38, 60, 64, 60, 55, /* CX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 55, 46, 46, 43,SYM,SYM, 43,101, /* DX */ + 102, 28,SYM,SYM, 28, 54, 54, 17, 33, 33, 23, 30, 30,103, 32, 35, /* EX */ + 104, 51, 26, 51, 67, 67,105,106, 22, 22,107, 58, 47, 58,108,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const int Unicode_Char_size = 98; +static const int Unicode_Char_size = 94; static const unsigned int Unicode_CharOrder[] = { - 65, 0, 66, 19, 67, 15, 68, 12, 69, 2, 70, 28, 71, 27, 72, 17, - 73, 5, 74, 18, 75, 7, 76, 9, 77, 11, 78, 3, 79, 1, 80, 14, - 81, 40, 82, 4, 83, 6, 84, 8, 85, 13, 86, 10, 87, 34, 88, 37, - 89, 21, 90, 20, 97, 0, 98, 19, 99, 15, 100, 12, 101, 2,102, 28, - 103, 27, 104, 17, 105, 5, 106, 18, 107, 7, 108, 9, 109, 11,110, 3, - 111, 1, 112, 14, 113, 40, 114, 4, 115, 6, 116, 8, 117, 13,118, 10, - 119, 34, 120, 37, 121, 21, 122, 20, 193, 16, 196, 36, 201, 25,203, 41, - 205, 23, 211, 32, 212, 35, 214, 45, 218, 26, 220, 43, 221, 22,225, 16, - 228, 36, 233, 25, 235, 41, 237, 23, 243, 32, 244, 35, 246, 45,250, 26, - 252, 43, 253, 22, 262, 47, 263, 47, 268, 24, 269, 24, 270, 39,271, 39, - 282, 44, 283, 44, 313, 46, 314, 46, 317, 31, 318, 31, 327, 38,328, 38, - 340, 48, 341, 48, 344, 42, 345, 42, 352, 29, 353, 29, 356, 33,357, 33, - 381, 30, 382, 30, + 65, 0, 66, 20, 67, 15, 68, 14, 69, 2, 70, 29, 71, 27, 72, 16, + 73, 4, 74, 18, 75, 8, 76, 10, 77, 11, 78, 3, 79, 1, 80, 13, + 81, 40, 82, 5, 83, 6, 84, 7, 85, 12, 86, 9, 87, 34, 88, 37, + 89, 21, 90, 19, 97, 0, 98, 20, 99, 15, 100, 14, 101, 2,102, 29, + 103, 27, 104, 16, 105, 4, 106, 18, 107, 8, 108, 10, 109, 11,110, 3, + 111, 1, 112, 13, 113, 40, 114, 5, 115, 6, 116, 7, 117, 12,118, 9, + 119, 34, 120, 37, 121, 21, 122, 19, 193, 17, 196, 36, 201, 25,203, 41, + 205, 23, 211, 32, 212, 35, 218, 26, 220, 44, 221, 22, 225, 17,228, 36, + 233, 25, 235, 41, 237, 23, 243, 32, 244, 35, 250, 26, 252, 44,253, 22, + 268, 24, 269, 24, 270, 39, 271, 39, 282, 45, 283, 45, 313, 42,314, 42, + 317, 31, 318, 31, 327, 38, 328, 38, 340, 46, 341, 46, 344, 43,345, 43, + 352, 28, 353, 28, 356, 33, 357, 33, 381, 30, 382, 30, }; /* Model Table: - * Total considered sequences: 1410 / 2401 - * - Positive sequences: first 773 (0.9950030300775062) - * - Probable sequences: next 277 (1050-773) (0.003999347913144824) - * - Neutral sequences: last 1351 (0.0009976220093489419) - * - Negative sequences: 991 (off-ratio) + * Total considered sequences: 1383 / 2209 + * - Positive sequences: first 768 (0.9950176374237621) + * - Probable sequences: next 243 (1011-768) (0.0039886852153107055) + * - Neutral sequences: last 1198 (0.0009936773609271476) + * - Negative sequences: 826 (off-ratio) * Negative sequences: TODO */ static const PRUint8 SlovakLangModel[] = { - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,0,2, - 3,0,3,3,3,3,3,3,1,3,3,0,0,3,3,3,2,1,2,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,0,1, - 3,2,0,3,3,3,3,3,0,0,3,0,0,3,3,3,1,1,2,1,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,1, - 3,0,3,3,3,3,3,3,2,3,3,0,0,3,3,3,1,0,1,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3, - 3,3,3,3,3,3,3,0,3,0,3,1,1,1,0,1,1,2,0,1,3,1,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3, - 3,3,3,3,3,3,3,0,3,2,2,3,0,2,2,0,2,2,0,2,0,2,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,3, - 3,3,2,3,3,3,3,2,3,3,2,0,1,3,1,0,3,1,3,0,0,0,0,3,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3, - 2,3,3,3,3,0,1,2,2,3,3,3,1,0,2,0,2,2,0,1,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,2,1,1,2,3,3,3, - 2,3,3,1,2,0,1,3,3,0,2,3,1,0,3,0,0,2,2,1,0,2,1,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,2,3,1,3,3,3,1,3,3,3,3,3, - 2,3,3,2,2,2,0,0,3,1,2,1,1,1,2,2,0,2,2,1,2,1,1,0,1, - 3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3, - 3,3,3,3,3,3,3,1,3,0,2,2,2,0,2,1,2,0,0,1,0,1,0,0,0, - 3,3,3,3,3,3,3,3,3,3,1,2,3,3,3,3,3,2,2,3,3,3,3,3, - 3,3,3,2,1,3,2,0,1,0,0,3,3,0,2,3,0,1,0,0,2,1,0,0,2, - 3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,1,2,3,2,3,2,3, - 2,3,3,1,2,1,2,0,2,0,1,3,3,1,1,0,0,1,0,2,1,0,0,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, - 2,3,3,3,2,3,3,3,3,0,3,3,1,0,3,0,0,2,2,1,2,1,3,0,1, - 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,0,1, - 3,1,0,3,3,3,3,2,1,2,2,0,0,3,1,3,2,1,1,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,0,1,2,3,3,3,3,3,1,2,0,3,1,3, - 2,3,3,0,2,3,0,1,1,2,0,3,3,0,2,0,0,2,0,1,0,1,2,0,0, - 3,3,3,3,3,3,3,3,3,3,2,1,2,3,1,3,2,3,1,2,3,3,0,3, - 0,3,3,2,2,0,0,0,1,2,0,0,0,0,1,0,3,1,0,0,0,0,0,0,0, - 0,1,1,3,3,1,3,3,3,3,3,3,3,2,3,3,0,3,3,3,3,0,0,0, - 3,0,0,2,1,3,3,3,0,1,0,0,0,0,2,1,0,0,2,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,3,1,2,2,0,3,3,2, - 2,3,3,1,1,3,0,3,2,0,2,2,2,0,2,0,2,1,0,2,0,2,2,0,2, - 3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,2,3,3,1,0,3, - 3,1,3,0,1,3,2,1,2,1,0,0,1,0,1,0,0,2,0,2,0,1,0,0,0, - 3,3,3,3,3,3,3,3,2,3,2,3,3,3,1,3,3,2,3,3,2,3,3,3, - 3,3,2,2,0,2,1,2,1,0,2,1,0,0,1,0,0,1,2,1,1,1,1,0,0, - 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, - 2,3,3,1,0,3,0,2,3,3,2,0,0,0,2,0,0,1,0,1,0,0,0,0,0, - 3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,1,0,0, - 3,0,2,2,2,3,3,2,0,3,1,0,0,0,3,0,1,0,0,0,0,2,0,0,0, - 0,0,0,3,3,0,3,3,3,2,3,3,2,0,2,3,0,2,1,3,3,0,0,0, - 2,0,0,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,2,0,3,3,1,3,3,3,3,3,3,3,0,3,3,0,3,3,3,3,0,0,0, - 3,0,0,2,1,3,3,2,0,3,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0, - 3,3,3,3,1,3,3,3,3,3,1,2,0,3,0,2,2,1,0,0,0,0,0,3, - 0,0,1,0,0,3,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0, - 0,2,1,3,3,0,2,3,3,3,2,3,3,1,2,3,0,3,1,1,3,0,0,0, - 2,0,0,2,2,2,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, - 1,2,0,3,3,0,3,3,3,3,3,3,3,0,3,3,0,3,1,3,3,0,0,0, - 3,0,0,0,2,3,3,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,2,3,3,2,2,2,3,0,0,3,3,2,2,0,3,0,1, - 0,2,2,3,1,0,0,1,3,0,2,0,0,0,0,0,0,1,0,2,0,2,0,0,0, - 3,3,3,2,3,3,2,3,3,3,1,2,1,3,2,1,3,0,1,1,0,3,1,3, - 1,3,1,2,3,1,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,2,3,3,3,3,3,3,2,0,3,3,0,2,2,0,1,0,0,0,3, - 2,2,1,0,0,3,0,2,1,3,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0, - 3,3,3,3,1,3,3,3,0,1,0,2,3,3,0,0,2,0,1,3,0,0,0,3, - 0,2,1,0,0,3,0,0,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0, - 3,3,2,3,1,0,3,3,2,0,2,3,1,3,0,1,0,1,1,3,0,0,0,0, - 1,0,3,2,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,3,3,1,2,1,3,2,1,3,3,0,3,2,0,1,2,3,3,0,0,0, - 0,0,0,3,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,0,1,2,2,0,1,1,0,0,1,2,2,0,0,1,1,0,1,0,0,0,0, - 1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,2,3,3,3,1,2,2,0,1,1,1,0,1,0,2,0,1,0,2,0,1, - 0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,2,0,0,0,0,0, - 0,0,0,0,3,0,3,2,2,3,3,2,3,0,1,2,0,1,2,3,3,0,0,0, - 1,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,1,0,2,2,1,2,2,3,0,1,0,2,0,0,1,0,1,0,1,3,0,0,0, - 3,0,0,1,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, - 3,2,3,2,0,3,0,2,3,0,2,1,0,2,3,0,0,0,0,0,0,2,0,1, - 0,0,0,0,2,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, - 3,3,0,0,0,0,1,0,0,0,0,2,1,3,0,0,2,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,0,0,0,0,2,0,0,0,0,2,0,3,0,0,1,0,0,0,0,0,0,0, - 0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,0,2,0,0,2,0,0,0,0,1,0,0,3,0,0,0,0,0,1,0,1,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,1,2,0,2,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,1,2,0,0,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,3, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,2,2,2,1,1,1,2,2,0,0,1,0,0,2,0,1,0,2,1,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0, - 0,0,0,1,2,0,1,3,2,1,0,1,1,0,2,2,0,1,1,0,1,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0, - 0,0,0,2,2,0,1,1,2,1,0,2,0,0,1,1,0,1,0,1,0,0,0,0, - 0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,2,0,0,0,0, - 0,0,0,0,0,0,3,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0, - 2,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,1,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0, - 0,0,0,0,0,2,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,1, + 1,3,1,1,3,3,3,3,3,0,3,3,0,0,3,3,3,2,0,0,1,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,0, + 1,3,1,0,3,3,3,3,3,1,0,3,0,0,3,3,2,1,3,0,2,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0, + 3,3,0,3,3,3,3,3,3,3,3,3,0,0,3,3,3,1,0,0,1,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,3,0,3,0,2,2,0,1,0,1,1,0,0,0,0,2,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0, + 3,3,3,1,3,3,3,3,2,3,3,2,0,1,3,2,0,2,0,0,3,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3, + 3,3,3,3,3,3,3,3,0,3,2,2,3,1,1,2,0,2,0,0,0,1,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2, + 3,3,3,3,2,1,3,0,2,1,3,2,2,1,0,1,0,1,0,0,0,2,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,3,3,3,3, + 3,2,3,3,2,2,2,0,1,3,1,2,1,0,0,3,2,0,0,0,1,1,2,0, + 3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,3,2,3,1,2,1,3,3, + 3,3,3,3,2,1,2,2,2,3,0,1,3,1,1,3,0,0,0,1,1,1,0,1, + 3,3,3,3,3,3,3,3,3,0,3,2,3,3,3,3,3,3,1,3,3,3,3, + 3,3,3,3,2,3,0,2,1,2,0,0,3,3,0,3,3,0,0,1,0,0,2,2, + 3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3, + 3,2,3,3,3,3,3,3,0,3,1,2,2,2,0,2,0,1,1,0,0,1,0,0, + 3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,1,3,2,2,3,3,2, + 3,2,3,3,2,2,3,2,1,3,0,2,3,3,1,0,0,0,0,0,0,2,1,1, + 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,0, + 1,3,2,0,3,3,2,3,2,1,2,1,0,0,3,1,3,1,1,0,0,0,0,0, + 3,3,3,3,3,3,3,3,2,2,3,2,3,3,2,3,3,3,0,1,2,3,1, + 3,2,3,3,1,3,2,0,2,2,1,0,3,3,1,2,0,0,0,2,1,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3, + 3,2,3,3,2,3,1,3,3,3,0,3,3,0,0,3,0,0,0,3,2,1,1,1, + 3,3,3,3,3,3,2,3,3,2,3,2,3,1,3,3,3,2,1,2,3,3,1, + 3,0,3,3,1,0,2,0,0,1,1,0,1,0,1,1,0,2,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,2,1,3,1,0,1,3,3, + 2,2,3,2,1,3,1,0,3,1,0,2,2,1,0,1,0,0,0,2,0,1,0,3, + 0,0,0,3,1,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,0,0, + 0,3,0,0,2,3,0,3,3,0,1,0,0,0,0,2,1,1,0,0,2,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,1,0, + 3,3,1,3,2,3,1,1,1,2,0,2,0,1,0,1,0,0,0,0,0,2,1,0, + 3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,1,3,3,2,3,1,0,2,3,3,2,0,0,2,2,0,1,0,0,0,1,0,0, + 3,3,3,3,3,3,3,2,3,2,3,3,3,1,3,3,2,3,3,2,3,3,3, + 3,3,3,2,1,2,0,1,3,2,1,1,1,1,2,1,0,0,0,1,1,2,1,0, + 3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,1,1, + 0,3,0,2,2,3,1,3,1,0,3,1,0,0,1,3,0,0,0,0,1,0,0,0, + 1,0,1,3,0,3,3,3,3,3,3,3,1,2,3,3,3,0,1,3,3,0,0, + 0,2,0,0,0,3,0,2,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0, + 2,1,0,3,1,3,3,3,3,3,3,3,0,3,3,3,2,0,3,3,2,0,0, + 0,3,0,0,2,3,0,3,1,0,3,0,0,0,0,2,0,0,0,0,1,0,0,0, + 3,3,3,3,3,1,2,3,3,1,3,2,3,0,0,2,1,2,0,0,2,0,0, + 3,0,0,1,0,3,0,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0, + 1,2,2,3,1,3,2,3,2,2,3,3,1,2,3,3,3,0,1,3,2,0,0, + 0,2,0,0,2,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,2,0,3,0,3,3,3,3,3,3,3,0,3,3,3,3,0,0,3,3,0,0, + 0,3,0,0,1,3,1,3,1,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0, + 3,3,3,3,3,3,2,2,1,2,3,3,3,2,2,0,3,3,0,0,2,3,0, + 2,0,3,2,2,1,1,0,1,3,0,2,0,1,0,0,0,0,0,0,0,2,0,0, + 3,3,3,3,3,1,3,3,3,3,3,2,3,3,0,1,1,2,0,0,1,0,0, + 3,2,1,1,1,3,0,0,2,1,3,0,0,0,0,1,0,0,0,0,0,0,0,0, + 3,3,3,2,3,3,2,3,3,0,3,2,3,2,1,1,1,3,2,1,1,3,0, + 3,0,2,2,2,1,3,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,2,3,1,3,0,2,2,3,0,3,1,0,2,0,0,3,0,0, + 3,0,2,1,0,3,0,0,0,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0, + 3,3,1,3,0,2,3,1,3,2,0,3,3,0,1,1,1,0,1,0,3,0,0, + 0,1,0,3,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,3,1,3,1,3,0,0,3,3,0,3,3,2,1,0,1,2,3,0,0, + 0,1,0,0,3,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,0,0,1,3,1,1,1,0,1,1,2,0,2,0,1,1,1,1,0,0,0, + 0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, + 3,3,3,3,3,3,3,2,1,0,2,1,1,1,2,2,2,0,2,0,0,2,0, + 1,0,0,0,1,0,2,0,0,1,0,2,0,0,0,0,0,0,0,0,0,1,0,0, + 0,0,0,1,0,3,3,2,2,3,3,1,0,1,3,2,0,0,2,3,3,0,0, + 0,0,0,0,0,1,0,3,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, + 0,0,0,2,0,2,2,3,0,0,1,1,0,0,2,1,1,0,0,3,1,1,0, + 0,3,0,0,1,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0, + 3,3,3,2,3,0,1,3,2,2,1,1,3,3,1,1,1,0,0,0,1,1,0, + 1,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0, + 3,3,0,0,0,0,2,1,1,0,0,2,3,0,0,0,1,2,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,0,0,0,0,2,1,0,0,0,1,2,0,0,0,0,1,0,0,0,0,0, + 0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,0,0,0,1,1,1,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,2,0,0, + 0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, + 1,1,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0, + 3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,2,2,1,2,1,1,0,0,2,0,0,0,1,2,1,0,0,0,1,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, + 0,0,0,1,0,2,0,1,2,0,1,1,0,0,1,1,0,0,1,1,0,0,0, + 0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,1,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, + 0,0,0,0,0,2,0,1,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0, }; @@ -280,8 +275,8 @@ const SequenceModel Iso_8859_2SlovakModel = { Iso_8859_2_CharToOrderMap, SlovakLangModel, - 49, - (float)0.9990023779906511, + 47, + (float)0.9990063226390729, PR_TRUE, "ISO-8859-2", "sk" @@ -291,8 +286,8 @@ const SequenceModel Windows_1250SlovakModel = { Windows_1250_CharToOrderMap, SlovakLangModel, - 49, - (float)0.9990023779906511, + 47, + (float)0.9990063226390729, PR_TRUE, "WINDOWS-1250", "sk" @@ -302,8 +297,8 @@ const SequenceModel Ibm852SlovakModel = { Ibm852_CharToOrderMap, SlovakLangModel, - 49, - (float)0.9990023779906511, + 47, + (float)0.9990063226390729, PR_TRUE, "IBM852", "sk" @@ -313,8 +308,8 @@ const SequenceModel Mac_CentraleuropeSlovakModel = { Mac_Centraleurope_CharToOrderMap, SlovakLangModel, - 49, - (float)0.9990023779906511, + 47, + (float)0.9990063226390729, PR_TRUE, "MAC-CENTRALEUROPE", "sk" @@ -324,8 +319,11 @@ const LanguageModel SlovakModel = { "sk", Unicode_CharOrder, - 98, + 94, SlovakLangModel, - 49, - (float)0.9998118217433309, + 47, + 5, + (float)0.37747196381525266, + 27, + (float)0.034743885663659047, }; diff --git a/src/LangModels/LangSloveneModel.cpp b/src/LangModels/LangSloveneModel.cpp index fb91e7e..0cdefd3 100644 --- a/src/LangModels/LangSloveneModel.cpp +++ b/src/LangModels/LangSloveneModel.cpp @@ -42,7 +42,7 @@ /** * Generated by BuildLangModel.py - * On: 2021-03-21 14:41:34.895906 + * On: 2022-12-14 18:20:53.180941 **/ /* Character Mapping Table: @@ -68,18 +68,18 @@ static const unsigned char Iso_8859_2_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 2, 16, 22, 13, 1, 24, 18, 21, 3, 6, 11, 5, 12, 4, 0, /* 4X */ - 14, 30, 7, 8, 9, 15, 10, 26, 25, 27, 17,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 2, 16, 22, 13, 1, 24, 18, 21, 3, 6, 11, 5, 12, 4, 0, /* 6X */ - 14, 30, 7, 8, 9, 15, 10, 26, 25, 27, 17,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 1, 17, 21, 12, 2, 24, 18, 20, 3, 10, 11, 7, 14, 4, 0, /* 4X */ + 13, 29, 5, 6, 8, 15, 9, 25, 27, 26, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 1, 17, 21, 12, 2, 24, 18, 20, 3, 10, 11, 7, 14, 4, 0, /* 6X */ + 13, 29, 5, 6, 8, 15, 9, 25, 27, 26, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM, 33,SYM, 34,SYM, 35, 36,SYM,SYM, 20, 37, 38, 39,SYM, 23, 40, /* AX */ - SYM, 41,SYM, 42,SYM, 43, 44,SYM,SYM, 20, 45, 46, 47,SYM, 23, 48, /* BX */ - 7, 2, 49, 50, 51, 52, 53, 54, 19, 1, 55, 56, 57, 3, 58, 59, /* CX */ - 60, 61, 62, 0, 0, 63, 29,SYM, 31, 64, 15, 65, 28, 32, 66, 67, /* DX */ - 7, 2, 68, 69, 70, 71, 72, 73, 19, 1, 74, 75, 76, 3, 77, 78, /* EX */ - 79, 80, 81, 0, 0, 82, 29,SYM, 31, 83, 15, 84, 28, 32, 85,SYM, /* FX */ + SYM, 64,SYM, 41,SYM, 63, 52,SYM,SYM, 22, 49, 60, 65,SYM, 23, 45, /* AX */ + SYM, 66,SYM, 41,SYM, 63, 52,SYM,SYM, 22, 49, 60, 67,SYM, 23, 45, /* BX */ + 5, 1, 51, 46, 32, 68, 31, 33, 19, 2, 62, 48, 34, 3, 54, 69, /* CX */ + 38, 42, 47, 0, 0, 50, 30,SYM, 35, 70, 15, 71, 28, 37, 72, 39, /* DX */ + 5, 1, 51, 46, 32, 73, 31, 33, 19, 2, 62, 48, 34, 3, 54, 74, /* EX */ + 38, 42, 47, 0, 0, 50, 30,SYM, 35, 75, 15, 76, 28, 37, 77,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -89,18 +89,18 @@ static const unsigned char Iso_8859_16_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 2, 16, 22, 13, 1, 24, 18, 21, 3, 6, 11, 5, 12, 4, 0, /* 4X */ - 14, 30, 7, 8, 9, 15, 10, 26, 25, 27, 17,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 2, 16, 22, 13, 1, 24, 18, 21, 3, 6, 11, 5, 12, 4, 0, /* 6X */ - 14, 30, 7, 8, 9, 15, 10, 26, 25, 27, 17,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 1, 17, 21, 12, 2, 24, 18, 20, 3, 10, 11, 7, 14, 4, 0, /* 4X */ + 13, 29, 5, 6, 8, 15, 9, 25, 27, 26, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 1, 17, 21, 12, 2, 24, 18, 20, 3, 10, 11, 7, 14, 4, 0, /* 6X */ + 13, 29, 5, 6, 8, 15, 9, 25, 27, 26, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM, 86, 87, 88,SYM,SYM, 20,SYM, 20,SYM, 89,SYM, 90,SYM, 91, 92, /* AX */ - SYM,SYM, 19, 93, 23,SYM,SYM,SYM, 23, 19, 94,SYM, 95, 96, 97, 98, /* BX */ - 99, 2,100,101,102,103,104,105, 1, 1, 1,106, 3, 3,107,108, /* CX */ - 109,110, 0, 0, 0,111, 29,112,113, 15, 15,114, 28,115,116,117, /* DX */ - 118, 2,119,120,121,122,123,124, 1, 1, 1,125, 3, 3,126,127, /* EX */ - 128,129, 0, 0, 0,130, 29,131,132, 15, 15,133, 28,134,135,136, /* FX */ + SYM, 78, 79, 41,SYM,SYM, 22,SYM, 22,SYM, 80,SYM, 81,SYM, 82, 45, /* AX */ + SYM,SYM, 19, 41, 23,SYM,SYM,SYM, 23, 19, 83,SYM, 84, 85, 86, 45, /* BX */ + 36, 1, 51, 46, 32, 31, 57, 33, 2, 2, 2, 48, 3, 3, 54, 58, /* CX */ + 38, 42, 0, 0, 0, 50, 30, 52, 87, 15, 15, 88, 28, 62, 44, 39, /* DX */ + 36, 1, 51, 46, 32, 31, 57, 33, 2, 2, 2, 48, 3, 3, 54, 58, /* EX */ + 38, 42, 0, 0, 0, 50, 30, 52, 89, 15, 15, 90, 28, 62, 44, 91, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -110,18 +110,18 @@ static const unsigned char Windows_1250_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 2, 16, 22, 13, 1, 24, 18, 21, 3, 6, 11, 5, 12, 4, 0, /* 4X */ - 14, 30, 7, 8, 9, 15, 10, 26, 25, 27, 17,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 2, 16, 22, 13, 1, 24, 18, 21, 3, 6, 11, 5, 12, 4, 0, /* 6X */ - 14, 30, 7, 8, 9, 15, 10, 26, 25, 27, 17,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM, 20,SYM,137,138, 23,139, /* 8X */ - ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 20,SYM,140,141, 23,142, /* 9X */ - SYM,SYM,SYM,143,SYM,144,SYM,SYM,SYM,SYM,145,SYM,SYM,SYM,SYM,146, /* AX */ - SYM,SYM,SYM,147,SYM,SYM,SYM,SYM,SYM,148,149,SYM,150,SYM,151,152, /* BX */ - 7, 2,153,154,155,156,157,158, 19, 1,159,160,161, 3,162,163, /* CX */ - 164,165,166, 0, 0,167, 29,SYM, 31,168, 15,169, 28, 32,170,171, /* DX */ - 7, 2,172,173,174,175,176,177, 19, 1,178,179,180, 3,181,182, /* EX */ - 183,184,185, 0, 0,186, 29,SYM, 31,187, 15,188, 28, 32,189,SYM, /* FX */ + SYM, 1, 17, 21, 12, 2, 24, 18, 20, 3, 10, 11, 7, 14, 4, 0, /* 4X */ + 13, 29, 5, 6, 8, 15, 9, 25, 27, 26, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 1, 17, 21, 12, 2, 24, 18, 20, 3, 10, 11, 7, 14, 4, 0, /* 6X */ + 13, 29, 5, 6, 8, 15, 9, 25, 27, 26, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM, 22,SYM, 52, 60, 23, 92, /* 8X */ + ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 22,SYM, 52, 60, 23, 93, /* 9X */ + SYM,SYM,SYM, 41,SYM, 94,SYM,SYM,SYM,SYM, 49,SYM,SYM,SYM,SYM, 45, /* AX */ + SYM,SYM,SYM, 41,SYM,SYM,SYM,SYM,SYM, 95, 49,SYM, 63,SYM, 63, 45, /* BX */ + 5, 1, 51, 46, 32, 96, 31, 33, 19, 2, 62, 48, 34, 3, 54, 97, /* CX */ + 38, 42, 47, 0, 0, 50, 30,SYM, 35, 98, 15, 99, 28, 37,100, 39, /* DX */ + 5, 1, 51, 46, 32,101, 31, 33, 19, 2, 62, 48, 34, 3, 54,102, /* EX */ + 38, 42, 47, 0, 0, 50, 30,SYM, 35,103, 15,104, 28, 37,105,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -131,18 +131,18 @@ static const unsigned char Ibm852_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 2, 16, 22, 13, 1, 24, 18, 21, 3, 6, 11, 5, 12, 4, 0, /* 4X */ - 14, 30, 7, 8, 9, 15, 10, 26, 25, 27, 17,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 2, 16, 22, 13, 1, 24, 18, 21, 3, 6, 11, 5, 12, 4, 0, /* 6X */ - 14, 30, 7, 8, 9, 15, 10, 26, 25, 27, 17,SYM,SYM,SYM,SYM,CTR, /* 7X */ - 190, 28, 1,191,192,193,194,195,196,197,198,199,200,201,202,203, /* 8X */ - 1,204,205, 0, 29,206,207,208,209, 29, 28,210,211,212,SYM, 19, /* 9X */ - 2, 3, 0, 15,213,214, 23, 23,215,216,SYM,217, 19,218,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 2,219,220,221,SYM,SYM,SYM,SYM,222,223,SYM, /* BX */ - SYM,SYM,SYM,SYM,SYM,SYM,224,225,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* CX */ - 226,227,228,229,230,231, 3,232,233,SYM,SYM,SYM,SYM,234,235,SYM, /* DX */ - 0,236, 0,237,238,239, 20, 20, 7, 15, 7,240, 32, 32,241,SYM, /* EX */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,242, 31, 31,SYM,SYM, /* FX */ + SYM, 1, 17, 21, 12, 2, 24, 18, 20, 3, 10, 11, 7, 14, 4, 0, /* 4X */ + 13, 29, 5, 6, 8, 15, 9, 25, 27, 26, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 1, 17, 21, 12, 2, 24, 18, 20, 3, 10, 11, 7, 14, 4, 0, /* 6X */ + 13, 29, 5, 6, 8, 15, 9, 25, 27, 26, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */ + 33, 28, 2, 51, 32,106, 31, 33, 41, 48, 50, 50, 54,107, 32, 31, /* 8X */ + 2,108,109, 0, 30, 63, 63, 52, 52, 30, 28, 60, 60, 41,SYM, 19, /* 9X */ + 1, 3, 0, 15,110,111, 23, 23, 62, 62,SYM,112, 19, 49,SYM,SYM, /* AX */ + SYM,SYM,SYM,SYM,SYM, 1, 51, 34, 49,SYM,SYM,SYM,SYM, 45, 45,SYM, /* BX */ + SYM,SYM,SYM,SYM,SYM,SYM, 46, 46,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* CX */ + 38, 38,113, 48,114, 47, 3, 54, 34,SYM,SYM,SYM,SYM,115,116,SYM, /* DX */ + 0, 39, 0, 42, 42, 47, 22, 22, 5, 15, 5,117, 37, 37,118,SYM, /* EX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,119, 35, 35,SYM,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -152,73 +152,73 @@ static const unsigned char Mac_Centraleurope_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 2, 16, 22, 13, 1, 24, 18, 21, 3, 6, 11, 5, 12, 4, 0, /* 4X */ - 14, 30, 7, 8, 9, 15, 10, 26, 25, 27, 17,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 2, 16, 22, 13, 1, 24, 18, 21, 3, 6, 11, 5, 12, 4, 0, /* 6X */ - 14, 30, 7, 8, 9, 15, 10, 26, 25, 27, 17,SYM,SYM,SYM,SYM,CTR, /* 7X */ - 243,244,245, 1,246, 29, 28, 2,247, 19,248, 19,249,249, 1,249, /* 8X */ - 249,249, 3,249,249,249,249, 0,249, 0, 29,249, 15,249,249, 28, /* 9X */ - SYM,SYM,249,SYM,SYM,SYM,SYM,249,SYM,SYM,SYM,249,SYM,SYM,249,249, /* AX */ - 249,249,SYM,SYM,249,249,SYM,SYM,249,249,249,249,249,249,249,249, /* BX */ - 249,249,SYM,SYM,249,249,SYM,SYM,SYM,SYM,SYM,249,249,249,249,249, /* CX */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,249, 7, 7, 31,SYM,SYM, 31,249, /* DX */ - 249, 20,SYM,SYM, 20,249,249, 2,249,249, 3, 23, 23,249, 0, 0, /* EX */ - 249,249, 15,249,249,249,249,249, 32, 32,249,249,249,249,249,SYM, /* FX */ + SYM, 1, 17, 21, 12, 2, 24, 18, 20, 3, 10, 11, 7, 14, 4, 0, /* 4X */ + 13, 29, 5, 6, 8, 15, 9, 25, 27, 26, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 1, 17, 21, 12, 2, 24, 18, 20, 3, 10, 11, 7, 14, 4, 0, /* 6X */ + 13, 29, 5, 6, 8, 15, 9, 25, 27, 26, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */ + 32, 43, 43, 2,120, 30, 28, 1,121, 19, 32, 19, 31, 31, 2,122, /* 8X */ + 123,124, 3,125, 56, 56, 55, 0, 55, 0, 30,126, 15, 34, 34, 28, /* 9X */ + SYM,SYM, 62,SYM,SYM,SYM,SYM, 39,SYM,SYM,SYM, 62,SYM,SYM,127,128, /* AX */ + 129, 59,SYM,SYM, 59,130,SYM,SYM, 41,131,132, 63, 63,133,134, 53, /* BX */ + 53, 42,SYM,SYM, 42, 47,SYM,SYM,SYM,SYM,SYM, 47, 50,135, 50, 40, /* CX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 40, 5, 5, 35,SYM,SYM, 35,136, /* DX */ + 137, 22,SYM,SYM, 22, 52, 52, 1, 60, 60, 3, 23, 23, 61, 0, 0, /* EX */ + 61,138, 15,139,140,141,142,143, 37, 37,144, 45, 41, 45,145,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ static const int Unicode_Char_size = 81; static const unsigned int Unicode_CharOrder[] = { - 32, 0, 65, 2, 66, 16, 67, 22, 68, 13, 69, 1, 70, 24, 71, 18, - 72, 21, 73, 3, 74, 6, 75, 11, 76, 5, 77, 12, 78, 4, 79, 0, - 80, 14, 82, 7, 83, 8, 84, 9, 85, 15, 86, 10, 90, 17, 97, 2, - 98, 16, 99, 22, 100, 13, 101, 1, 102, 24, 103, 18, 104, 21, 105, 3, - 106, 6, 107, 11, 108, 5, 109, 12, 110, 4, 111, 0, 112, 14, 114, 7, - 115, 8, 116, 9, 117, 15, 118, 10, 122, 17, 225, 2, 232, 1, 233, 1, - 234, 1, 236, 3, 237, 3, 242, 0, 243, 0, 244, 0, 249, 15, 250, 15, - 268, 19, 269, 19, 341, 7, 352, 20, 353, 20, 381, 23, 382, 23, 513, 2, - 515, 2, 517, 1, 519, 1, 521, 3, 523, 3, 525, 0, 527, 0, 531, 7, - 533, 15, 535, 15, 601, 1, 768, 1, 769, 0, 783, 1, 785, 0,7865, 1, + 32, 0, 65, 1, 66, 17, 67, 21, 68, 12, 69, 2, 70, 24, 71, 18, + 72, 20, 73, 3, 74, 10, 75, 11, 76, 7, 77, 14, 78, 4, 79, 0, + 80, 13, 82, 5, 83, 6, 84, 8, 85, 15, 86, 9, 90, 16, 97, 1, + 98, 17, 99, 21, 100, 12, 101, 2, 102, 24, 103, 18, 104, 20, 105, 3, + 106, 10, 107, 11, 108, 7, 109, 14, 110, 4, 111, 0, 112, 13, 114, 5, + 115, 6, 116, 8, 117, 15, 118, 9, 122, 16, 225, 1, 232, 2, 233, 2, + 234, 2, 236, 3, 237, 3, 242, 0, 243, 0, 244, 0, 249, 15, 250, 15, + 268, 19, 269, 19, 341, 5, 352, 22, 353, 22, 381, 23, 382, 23, 513, 1, + 515, 1, 517, 2, 519, 2, 521, 3, 523, 3, 525, 0, 527, 0, 531, 5, + 533, 15, 535, 15, 601, 2, 768, 2, 769, 0, 783, 2, 785, 0,7865, 2, 7885, 0, }; /* Model Table: - * Total considered sequences: 480 / 625 - * - Positive sequences: first 360 (0.9950453499390822) - * - Probable sequences: next 83 (443-360) (0.00397996480303231) - * - Neutral sequences: last 182 (0.0009746852578854659) - * - Negative sequences: 145 (off-ratio) + * Total considered sequences: 912 / 625 + * - Positive sequences: first 420 (0.9950318187902709) + * - Probable sequences: next 144 (564-420) (0.003969660901056105) + * - Neutral sequences: last 61 (0.0009985203086729788) + * - Negative sequences: -287 (off-ratio) * Negative sequences: TODO */ static const PRUint8 SloveneLangModel[] = { 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, - 3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, - 3,2,0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, - 3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2, - 3,3,3,3,0,1,3,0,3,3,0,3,0,3,3,3,2,2,3,3,3,3,3,0,3, - 3,3,3,3,3,3,3,0,3,3,3,3,3,1,3,3,3,2,3,0,1,3,3,1,2, - 3,3,3,3,3,2,0,0,3,2,3,2,3,2,3,3,3,0,0,0,3,1,2,0,0, - 3,3,3,3,3,2,3,0,3,3,3,3,3,3,1,3,3,3,3,2,3,3,3,3,0, - 3,3,3,3,3,3,2,3,0,3,3,3,3,0,3,3,3,0,0,2,0,0,3,0,0, - 3,3,3,3,3,3,3,3,3,2,3,3,2,0,2,3,0,0,1,0,0,2,2,0,0, - 3,3,3,3,3,3,2,3,3,3,0,3,0,0,3,2,3,3,1,2,3,0,3,0,0, - 3,3,3,3,3,3,3,3,2,3,3,2,3,3,1,3,0,0,0,0,0,1,3,0,1, - 3,3,3,3,3,3,0,3,3,0,2,2,1,2,3,3,3,0,0,1,3,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,0,1,2,2,2,0, - 3,3,3,3,3,3,1,3,3,2,0,1,1,0,2,3,0,0,0,0,2,2,0,0,0, - 3,1,3,2,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,1,3,3,2, - 3,3,3,3,3,3,3,3,3,3,2,0,3,3,0,3,2,3,2,3,0,0,0,0,0, - 3,3,3,3,3,3,2,3,3,2,3,1,3,3,3,3,3,1,3,0,3,3,0,0,0, - 3,3,3,3,2,3,0,3,0,0,0,0,0,0,0,3,2,0,0,0,0,2,0,0,0, - 3,3,3,3,3,2,3,3,0,0,2,3,0,0,0,3,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,2,0,0,3,0,3,3,0,3,1,0,0,0,3,0,0,2,0,0, - 3,3,3,3,3,2,0,3,0,2,0,2,2,2,0,3,0,0,0,0,0,0,0,0,0, - 3,3,3,3,0,2,0,0,0,0,0,1,0,0,0,2,0,0,0,0,0,3,0,0,2, - 3,3,3,3,3,0,3,0,0,0,0,0,0,0,0,3,3,0,0,0,0,0,0,0,0, - 3,3,3,3,0,0,0,3,3,1,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,2,3,2,3,3,3,2,1,3,3,3,3,3,2,3,3,2,3, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2, + 3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,2,3,1,1,3,3,0,0,2, + 3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,3,2,2,3, + 3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,2,2,2,0,3,2,0,0,1, + 3,3,3,3,3,3,3,3,3,1,3,3,3,3,2,3,3,3,3,3,2,3,3,2,1, + 3,3,3,3,3,2,3,2,2,3,1,3,3,3,3,3,2,3,1,1,2,3,3,0,1, + 3,3,3,3,3,3,3,3,3,3,3,1,3,2,3,3,0,1,0,1,2,3,2,0,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,3,2, + 3,3,3,3,3,3,3,3,3,1,2,2,2,2,1,3,1,1,0,2,3,2,2,0,1, + 3,3,3,3,3,3,3,3,1,2,2,2,2,3,2,3,1,3,1,3,1,3,3,1,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,2, + 3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,2,3,3,2,3,2,3,1,0, + 3,3,3,3,3,3,3,3,2,2,3,2,3,1,3,3,2,2,1,3,2,2,2,1,0, + 3,3,3,3,3,3,2,3,2,2,1,1,1,1,2,3,1,1,2,0,2,0,0,0,1, + 3,3,3,3,3,3,1,3,0,2,3,3,1,1,1,3,0,2,0,1,0,1,0,0,1, + 3,3,3,3,3,3,2,2,3,1,1,3,1,1,2,3,1,1,1,2,1,1,0,0,1, + 3,3,3,3,2,2,2,3,3,2,1,3,2,1,1,3,1,1,1,0,3,3,0,0,1, + 3,3,3,3,3,2,0,3,3,3,3,3,1,3,2,2,0,1,0,3,0,1,0,0,0, + 3,3,3,3,3,2,1,1,0,1,3,2,0,0,1,3,0,3,2,0,0,0,0,0,0, + 3,3,3,3,1,3,3,3,2,0,2,1,1,1,1,3,0,1,1,0,1,0,1,0,2, }; @@ -227,7 +227,7 @@ const SequenceModel Iso_8859_2SloveneModel = Iso_8859_2_CharToOrderMap, SloveneLangModel, 25, - (float)0.9990253147421145, + (float)0.999001479691327, PR_FALSE, "ISO-8859-2", "sl" @@ -238,7 +238,7 @@ const SequenceModel Iso_8859_16SloveneModel = Iso_8859_16_CharToOrderMap, SloveneLangModel, 25, - (float)0.9990253147421145, + (float)0.999001479691327, PR_FALSE, "ISO-8859-16", "sl" @@ -249,7 +249,7 @@ const SequenceModel Windows_1250SloveneModel = Windows_1250_CharToOrderMap, SloveneLangModel, 25, - (float)0.9990253147421145, + (float)0.999001479691327, PR_FALSE, "WINDOWS-1250", "sl" @@ -260,7 +260,7 @@ const SequenceModel Ibm852SloveneModel = Ibm852_CharToOrderMap, SloveneLangModel, 25, - (float)0.9990253147421145, + (float)0.999001479691327, PR_FALSE, "IBM852", "sl" @@ -271,7 +271,7 @@ const SequenceModel Mac_CentraleuropeSloveneModel = Mac_Centraleurope_CharToOrderMap, SloveneLangModel, 25, - (float)0.9990253147421145, + (float)0.999001479691327, PR_FALSE, "MAC-CENTRALEUROPE", "sl" @@ -284,5 +284,8 @@ const LanguageModel SloveneModel = 81, SloveneLangModel, 25, - (float)0.9993179075502389, + 2, + (float)0.3170327842079661, + 19, + (float)0.03483655056229821, }; diff --git a/src/LangModels/LangSpanishModel.cpp b/src/LangModels/LangSpanishModel.cpp index 7e41230..61d2dd6 100644 --- a/src/LangModels/LangSpanishModel.cpp +++ b/src/LangModels/LangSpanishModel.cpp @@ -42,7 +42,7 @@ /** * Generated by BuildLangModel.py - * On: 2021-03-16 11:33:00.157304 + * On: 2022-12-14 18:18:05.348804 **/ /* Character Mapping Table: @@ -68,18 +68,18 @@ static const unsigned char Iso_8859_15_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 1, 14, 10, 8, 0, 17, 15, 20, 4, 24, 29, 7, 12, 3, 2, /* 4X */ - 13, 22, 6, 5, 9, 11, 16, 31, 28, 18, 23,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 1, 14, 10, 8, 0, 17, 15, 20, 4, 24, 29, 7, 12, 3, 2, /* 6X */ - 13, 22, 6, 5, 9, 11, 16, 31, 28, 18, 23,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 1, 14, 9, 8, 0, 18, 15, 20, 5, 25, 30, 7, 12, 3, 2, /* 4X */ + 13, 22, 6, 4, 10, 11, 16, 31, 27, 17, 23,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 1, 14, 9, 8, 0, 18, 15, 20, 5, 25, 30, 7, 12, 3, 2, /* 6X */ + 13, 22, 6, 4, 10, 11, 16, 31, 27, 17, 23,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM, 38,SYM, 38,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM, 51, 54,SYM,SYM, 51,SYM,SYM,SYM, 46, 46, 55,SYM, /* BX */ - 36, 25, 43, 45, 37, 56, 53, 33, 35, 26, 48, 41, 39, 21, 47, 52, /* CX */ - 49, 27, 44, 19, 42, 57, 34,SYM, 58, 59, 30, 60, 32, 40, 50, 61, /* DX */ - 36, 25, 43, 45, 37, 62, 53, 33, 35, 26, 48, 41, 39, 21, 47, 52, /* EX */ - 49, 27, 44, 19, 42, 63, 34,SYM, 64, 65, 30, 66, 32, 40, 50, 67, /* FX */ + SYM,SYM,SYM,SYM,SYM,SYM, 46,SYM, 46,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ + SYM,SYM,SYM,SYM, 59, 52,SYM,SYM, 60,SYM,SYM,SYM, 53, 53, 58,SYM, /* BX */ + 43, 24, 40, 38, 41, 51, 39, 33, 35, 26, 45, 37, 57, 21, 34, 50, /* CX */ + 55, 28, 54, 19, 47, 42, 36,SYM, 44, 49, 29, 48, 32, 61, 62, 56, /* DX */ + 43, 24, 40, 38, 41, 51, 39, 33, 35, 26, 45, 37, 57, 21, 34, 50, /* EX */ + 55, 28, 54, 19, 47, 42, 36,SYM, 44, 49, 29, 48, 32, 63, 64, 58, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -89,18 +89,18 @@ static const unsigned char Iso_8859_1_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 1, 14, 10, 8, 0, 17, 15, 20, 4, 24, 29, 7, 12, 3, 2, /* 4X */ - 13, 22, 6, 5, 9, 11, 16, 31, 28, 18, 23,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 1, 14, 10, 8, 0, 17, 15, 20, 4, 24, 29, 7, 12, 3, 2, /* 6X */ - 13, 22, 6, 5, 9, 11, 16, 31, 28, 18, 23,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 1, 14, 9, 8, 0, 18, 15, 20, 5, 25, 30, 7, 12, 3, 2, /* 4X */ + 13, 22, 6, 4, 10, 11, 16, 31, 27, 17, 23,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 1, 14, 9, 8, 0, 18, 15, 20, 5, 25, 30, 7, 12, 3, 2, /* 6X */ + 13, 22, 6, 4, 10, 11, 16, 31, 27, 17, 23,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 68,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 36, 25, 43, 45, 37, 69, 53, 33, 35, 26, 48, 41, 39, 21, 47, 52, /* CX */ - 49, 27, 44, 19, 42, 70, 34,SYM, 71, 72, 30, 73, 32, 40, 50, 74, /* DX */ - 36, 25, 43, 45, 37, 75, 53, 33, 35, 26, 48, 41, 39, 21, 47, 52, /* EX */ - 49, 27, 44, 19, 42, 76, 34,SYM, 77, 78, 30, 79, 32, 40, 50, 80, /* FX */ + SYM,SYM,SYM,SYM,SYM, 52,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 43, 24, 40, 38, 41, 51, 39, 33, 35, 26, 45, 37, 57, 21, 34, 50, /* CX */ + 55, 28, 54, 19, 47, 42, 36,SYM, 44, 49, 29, 48, 32, 65, 66, 56, /* DX */ + 43, 24, 40, 38, 41, 51, 39, 33, 35, 26, 45, 37, 57, 21, 34, 50, /* EX */ + 55, 28, 54, 19, 47, 42, 36,SYM, 44, 49, 29, 48, 32, 67, 68, 58, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -110,78 +110,79 @@ static const unsigned char Windows_1252_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 1, 14, 10, 8, 0, 17, 15, 20, 4, 24, 29, 7, 12, 3, 2, /* 4X */ - 13, 22, 6, 5, 9, 11, 16, 31, 28, 18, 23,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 1, 14, 10, 8, 0, 17, 15, 20, 4, 24, 29, 7, 12, 3, 2, /* 6X */ - 13, 22, 6, 5, 9, 11, 16, 31, 28, 18, 23,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM,ILL,SYM, 81,SYM,SYM,SYM,SYM,SYM,SYM, 38,SYM, 46,ILL, 51,ILL, /* 8X */ - ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 38,SYM, 46,ILL, 51, 82, /* 9X */ + SYM, 1, 14, 9, 8, 0, 18, 15, 20, 5, 25, 30, 7, 12, 3, 2, /* 4X */ + 13, 22, 6, 4, 10, 11, 16, 31, 27, 17, 23,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 1, 14, 9, 8, 0, 18, 15, 20, 5, 25, 30, 7, 12, 3, 2, /* 6X */ + 13, 22, 6, 4, 10, 11, 16, 31, 27, 17, 23,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM,ILL,SYM, 69,SYM,SYM,SYM,SYM,SYM,SYM, 46,SYM, 53,ILL, 70,ILL, /* 8X */ + ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 46,SYM, 53,ILL, 71, 58, /* 9X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 83,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 36, 25, 43, 45, 37, 84, 53, 33, 35, 26, 48, 41, 39, 21, 47, 52, /* CX */ - 49, 27, 44, 19, 42, 85, 34,SYM, 86, 87, 30, 88, 32, 40, 50, 89, /* DX */ - 36, 25, 43, 45, 37, 90, 53, 33, 35, 26, 48, 41, 39, 21, 47, 52, /* EX */ - 49, 27, 44, 19, 42, 91, 34,SYM, 92, 93, 30, 94, 32, 40, 50, 95, /* FX */ + SYM,SYM,SYM,SYM,SYM, 52,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 43, 24, 40, 38, 41, 51, 39, 33, 35, 26, 45, 37, 57, 21, 34, 50, /* CX */ + 55, 28, 54, 19, 47, 42, 36,SYM, 44, 49, 29, 48, 32, 72, 73, 56, /* DX */ + 43, 24, 40, 38, 41, 51, 39, 33, 35, 26, 45, 37, 57, 21, 34, 50, /* EX */ + 55, 28, 54, 19, 47, 42, 36,SYM, 44, 49, 29, 48, 32, 74, 75, 58, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ static const int Unicode_Char_size = 66; static const unsigned int Unicode_CharOrder[] = { - 65, 1, 66, 14, 67, 10, 68, 8, 69, 0, 70, 17, 71, 15, 72, 20, - 73, 4, 74, 24, 75, 29, 76, 7, 77, 12, 78, 3, 79, 2, 80, 13, - 81, 22, 82, 6, 83, 5, 84, 9, 85, 11, 86, 16, 87, 31, 88, 28, - 89, 18, 90, 23, 97, 1, 98, 14, 99, 10, 100, 8, 101, 0,102, 17, - 103, 15, 104, 20, 105, 4, 106, 24, 107, 29, 108, 7, 109, 12,110, 3, - 111, 2, 112, 13, 113, 22, 114, 6, 115, 5, 116, 9, 117, 11,118, 16, - 119, 31, 120, 28, 121, 18, 122, 23, 193, 25, 201, 26, 205, 21,209, 27, - 211, 19, 218, 30, 220, 32, 225, 25, 233, 26, 237, 21, 241, 27,243, 19, - 250, 30, 252, 32, + 65, 1, 66, 14, 67, 9, 68, 8, 69, 0, 70, 18, 71, 15, 72, 20, + 73, 5, 74, 25, 75, 30, 76, 7, 77, 12, 78, 3, 79, 2, 80, 13, + 81, 22, 82, 6, 83, 4, 84, 10, 85, 11, 86, 16, 87, 31, 88, 27, + 89, 17, 90, 23, 97, 1, 98, 14, 99, 9, 100, 8, 101, 0,102, 18, + 103, 15, 104, 20, 105, 5, 106, 25, 107, 30, 108, 7, 109, 12,110, 3, + 111, 2, 112, 13, 113, 22, 114, 6, 115, 4, 116, 10, 117, 11,118, 16, + 119, 31, 120, 27, 121, 17, 122, 23, 193, 24, 201, 26, 205, 21,209, 28, + 211, 19, 218, 29, 220, 32, 225, 24, 233, 26, 237, 21, 241, 28,243, 19, + 250, 29, 252, 32, }; /* Model Table: - * Total sequences: 1002 - * First 512 sequences: 0.9966074680689881 - * Next 512 sequences (512-1024): 0.003392531931011823 - * Rest: 3.209238430557093e-17 + * Total considered sequences: 1131 / 1089 + * - Positive sequences: first 468 (0.9950191343195147) + * - Probable sequences: next 177 (645-468) (0.0039894116732021034) + * - Neutral sequences: last 444 (0.0009914540072831768) + * - Negative sequences: -42 (off-ratio) * Negative sequences: TODO */ static const PRUint8 SpanishLangModel[] = { - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,0,3,3,3,2,3,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,2,3,3,3,3,3,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,2,2,2,3,3,0,3,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,3,2,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,3,3,3,3,3,3,3,3,2,2,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,2,0,3,3,0,0,3,2,2,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,0,2,3,3,2,2, - 3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,2,2,2,2,2, - 3,3,3,2,3,3,3,2,2,2,2,3,3,2,2,2,2,2,3,3,2,3,3,2,2,3,3,0,2,2,2,2,0, - 3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,2,2,2,3,3,3,3,0,3,2,3,3,0,2,2,3,2,0, - 3,3,3,3,3,2,3,3,3,3,3,3,3,2,2,2,2,2,2,3,3,3,2,2,0,3,3,0,2,3,2,0,0, - 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,2,2,3,3,3,3,3,3,3,2,3,0,2,0, - 3,3,3,3,3,3,3,2,2,2,3,3,3,3,3,2,2,2,3,3,2,3,0,2,2,3,3,0,2,2,3,2,2, - 3,3,3,2,3,3,3,3,2,3,3,3,2,3,2,2,2,2,2,3,3,3,2,2,0,3,3,0,0,2,3,0,0, - 3,3,3,3,3,3,3,3,3,3,2,3,2,2,3,2,2,2,2,3,2,3,0,2,3,3,3,0,0,2,2,2,2, - 3,3,3,3,3,3,3,3,2,3,2,3,2,2,2,2,0,2,3,3,3,3,0,2,0,2,3,0,2,2,3,2,3, - 3,3,3,2,3,2,2,2,2,2,2,3,0,0,2,2,2,0,2,3,2,3,0,0,0,3,3,0,0,2,0,0,0, - 3,3,3,2,3,2,3,3,2,3,2,3,2,2,2,2,0,3,2,3,0,3,0,0,0,2,3,0,0,2,3,0,0, - 3,3,3,3,3,3,2,3,2,2,2,3,2,2,2,2,2,2,2,3,2,0,0,2,2,2,2,0,2,2,2,2,0, - 2,2,0,3,2,3,3,3,3,2,3,0,3,3,2,3,3,2,2,0,2,0,2,0,2,0,0,2,2,2,0,0,0, - 3,3,3,3,3,2,3,3,2,3,2,3,3,2,2,0,0,2,3,2,2,3,2,2,0,2,2,0,2,2,3,2,2, - 2,3,3,3,0,3,3,2,3,3,3,0,3,2,2,3,2,3,0,0,0,0,2,2,2,0,2,2,2,2,0,0,0, - 0,2,0,0,2,2,2,2,0,0,0,3,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,2,3,2,2,2,0,3,3,3,2,2,2,3,2,0,2,3,2,2,3,3,0,3,2,0,0,2,2,2,0, - 3,3,3,2,3,2,2,2,2,2,2,3,0,2,0,0,2,0,2,3,2,2,0,0,0,2,3,0,0,2,2,2,0, - 2,2,0,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,0,3,0,2,2,2,0,0,2,2,2,0,0,0, - 2,3,2,3,2,3,3,3,3,3,3,2,3,3,2,3,2,2,2,2,2,0,2,2,2,0,0,2,3,0,0,0,0, - 3,3,3,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,3,0,2,0,0,0,2,2,0,0,0,0,0,0, - 3,3,3,0,3,2,2,2,2,3,3,2,2,3,2,2,3,2,2,2,2,2,2,0,0,2,2,0,3,0,0,2,0, - 3,3,3,2,3,3,3,3,0,2,2,3,3,2,2,2,2,0,3,2,2,2,0,2,0,2,3,0,2,2,2,2,2, - 2,3,2,3,0,3,2,3,2,3,3,0,3,2,3,0,0,2,0,0,2,0,0,2,2,0,0,2,0,0,0,0,0, - 3,3,3,2,3,2,2,2,2,2,2,2,2,2,2,0,0,0,2,2,3,0,0,2,2,2,2,0,0,2,0,2,0, - 3,0,0,2,2,0,0,2,0,2,0,0,2,2,2,2,0,2,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,3,3,2,2,2,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,1,3,3,3,3,3,3,3,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,3,3,2,3,1,3,2,0,2,2,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,1,2,3,3,3,3,3,2,3,3,3,3,3,3,1,0,3,3,1,0, + 3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,1,3,1,3,1,0,2,3,2,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,3,3,3,3,3,3,3,1,3,2,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,2,0,2,3,2,1, + 3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,3,1,0,2,2,1,1, + 3,3,3,2,2,3,3,2,2,2,2,3,3,1,1,2,2,2,1,3,2,3,2,1,3,2,3,1,0,1,1,2,1, + 3,3,3,3,2,3,3,3,2,3,3,3,2,1,1,1,1,2,1,3,3,3,2,1,3,1,3,1,0,2,3,1,0, + 3,3,3,2,3,3,3,3,1,3,3,3,3,2,3,1,2,2,1,3,3,3,0,3,3,1,3,2,0,3,1,2,1, + 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,2,2,3,2,3,2,3,3,3,3,0,2,2,0, + 3,3,3,3,3,3,2,2,1,2,1,3,3,3,3,1,1,2,1,3,2,3,1,1,3,0,3,1,0,3,1,1,1, + 3,3,3,2,3,3,3,3,1,3,3,3,2,3,1,1,1,1,1,3,3,3,1,1,3,1,3,1,0,3,1,1,0, + 3,3,3,3,3,3,3,3,2,2,3,3,2,2,2,1,2,2,1,3,1,3,1,1,3,3,3,1,0,2,1,1,1, + 3,3,3,3,2,3,3,3,2,1,2,3,2,2,1,2,1,2,1,3,3,3,1,1,2,1,3,1,0,3,1,1,3, + 3,3,3,1,1,3,2,1,1,1,1,3,0,0,1,0,1,1,0,3,1,3,0,0,2,0,3,1,0,0,1,0,0, + 3,3,3,2,3,3,2,2,2,2,2,3,2,2,2,1,1,2,1,3,1,0,1,1,3,0,1,0,0,1,1,1,0, + 3,3,3,1,1,3,3,3,1,1,2,3,2,1,1,2,0,1,2,2,1,3,0,0,2,1,2,0,0,3,1,1,1, + 1,1,1,3,3,1,3,3,3,3,3,0,3,3,2,3,3,1,2,0,0,0,1,1,0,1,0,3,1,0,1,1,0, + 3,3,3,3,2,3,3,2,1,1,3,3,2,1,1,1,1,2,1,2,1,3,1,0,2,1,2,0,0,2,1,1,1, + 3,3,3,3,3,0,3,2,3,3,3,0,3,2,2,3,1,1,3,0,0,0,2,2,0,2,0,0,2,0,1,0,0, + 1,2,1,0,1,2,0,1,1,0,1,3,0,0,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0, + 3,3,3,2,1,3,1,1,1,3,2,3,2,1,1,2,0,1,1,3,2,1,3,2,3,1,1,0,0,2,2,1,1, + 1,0,0,3,3,2,3,3,2,3,3,2,3,3,3,3,3,1,3,0,2,0,2,2,0,2,0,2,1,0,0,0,0, + 3,3,3,1,1,3,1,1,1,1,1,3,1,1,1,1,0,0,0,3,0,2,0,0,2,1,3,0,0,1,1,0,1, + 1,3,1,3,3,2,3,3,3,3,3,2,2,3,2,3,2,1,2,0,0,0,1,2,0,2,0,3,0,0,1,0,0, + 3,3,3,0,1,3,2,1,1,3,3,3,1,3,1,1,3,1,2,1,2,2,1,0,1,0,1,3,0,0,0,1,0, + 2,3,3,0,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,2,0,3,0,0,1,0,2,0,0,0,0,0,0, + 1,3,0,3,3,0,2,3,1,3,3,0,3,1,3,1,0,1,1,0,1,0,0,2,0,1,0,1,1,0,0,0,0, + 3,3,3,1,2,3,2,2,1,2,1,3,3,1,1,1,1,2,0,1,2,2,0,1,1,1,1,0,0,2,1,1,1, + 3,3,3,2,2,3,1,1,1,1,1,1,1,1,1,1,0,1,1,0,2,0,0,1,1,1,0,0,0,0,1,2,1, + 3,1,0,1,1,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,1,2,0,1,0,0,1,0,0,0,0,0,0, }; @@ -190,7 +191,7 @@ const SequenceModel Iso_8859_15SpanishModel = Iso_8859_15_CharToOrderMap, SpanishLangModel, 33, - (float)0.9966074680689881, + (float)0.9990085459927168, PR_TRUE, "ISO-8859-15", "es" @@ -201,7 +202,7 @@ const SequenceModel Iso_8859_1SpanishModel = Iso_8859_1_CharToOrderMap, SpanishLangModel, 33, - (float)0.9966074680689881, + (float)0.9990085459927168, PR_TRUE, "ISO-8859-1", "es" @@ -212,7 +213,7 @@ const SequenceModel Windows_1252SpanishModel = Windows_1252_CharToOrderMap, SpanishLangModel, 33, - (float)0.9966074680689881, + (float)0.9990085459927168, PR_TRUE, "WINDOWS-1252", "es" @@ -225,5 +226,8 @@ const LanguageModel SpanishModel = 66, SpanishLangModel, 33, - (float)0.9966074680689881, + 4, + (float)0.39884249341527656, + 20, + (float)0.0335206321334504, }; diff --git a/src/LangModels/LangSwedishModel.cpp b/src/LangModels/LangSwedishModel.cpp index e07efba..68a36d0 100644 --- a/src/LangModels/LangSwedishModel.cpp +++ b/src/LangModels/LangSwedishModel.cpp @@ -42,7 +42,7 @@ /** * Generated by BuildLangModel.py - * On: 2021-03-16 20:24:13.934277 + * On: 2022-12-14 18:21:28.823388 **/ /* Character Mapping Table: @@ -68,18 +68,18 @@ static const unsigned char Iso_8859_1_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 21, 20, 9, 1, 13, 12, 17, 6, 23, 11, 7, 10, 3, 8, /* 4X */ - 18, 29, 2, 5, 4, 15, 14, 26, 25, 24, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 21, 20, 9, 1, 13, 12, 17, 6, 23, 11, 7, 10, 3, 8, /* 6X */ - 18, 29, 2, 5, 4, 15, 14, 26, 25, 24, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 1, 21, 19, 9, 0, 14, 12, 18, 6, 24, 11, 7, 10, 3, 8, /* 4X */ + 17, 28, 2, 5, 4, 16, 13, 26, 25, 23, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 1, 21, 19, 9, 0, 14, 12, 18, 6, 24, 11, 7, 10, 3, 8, /* 6X */ + 17, 28, 2, 5, 4, 16, 13, 26, 25, 23, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 34,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 49, 33, 50, 51, 16, 19, 37, 40, 32, 28, 42, 52, 53, 38, 43, 54, /* CX */ - 55, 56, 57, 58, 59, 60, 22,SYM, 39, 61, 62, 63, 31, 64, 65, 66, /* DX */ - 67, 33, 68, 69, 16, 19, 37, 40, 32, 28, 42, 70, 71, 38, 43, 72, /* EX */ - 73, 74, 75, 76, 77, 78, 22,SYM, 39, 79, 80, 81, 31, 82, 83, 84, /* FX */ + SYM,SYM,SYM,SYM,SYM, 52,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 34, 33, 53, 54, 15, 20, 37, 41, 39, 29, 55, 56, 57, 38, 58, 59, /* CX */ + 60, 61, 62, 35, 42, 45, 22,SYM, 32, 63, 47, 64, 40, 46, 65, 66, /* DX */ + 34, 33, 67, 68, 15, 20, 37, 41, 39, 29, 69, 70, 71, 38, 72, 73, /* EX */ + 74, 75, 76, 35, 42, 45, 22,SYM, 32, 77, 47, 78, 40, 46, 79, 80, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -89,18 +89,18 @@ static const unsigned char Iso_8859_4_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 21, 20, 9, 1, 13, 12, 17, 6, 23, 11, 7, 10, 3, 8, /* 4X */ - 18, 29, 2, 5, 4, 15, 14, 26, 25, 24, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 21, 20, 9, 1, 13, 12, 17, 6, 23, 11, 7, 10, 3, 8, /* 6X */ - 18, 29, 2, 5, 4, 15, 14, 26, 25, 24, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 1, 21, 19, 9, 0, 14, 12, 18, 6, 24, 11, 7, 10, 3, 8, /* 4X */ + 17, 28, 2, 5, 4, 16, 13, 26, 25, 23, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 1, 21, 19, 9, 0, 14, 12, 18, 6, 24, 11, 7, 10, 3, 8, /* 6X */ + 17, 28, 2, 5, 4, 16, 13, 26, 25, 23, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM, 85, 86, 87,SYM, 88, 89,SYM,SYM, 90, 91, 92, 93,SYM, 94,SYM, /* AX */ - SYM, 95,SYM, 96,SYM, 97, 98,SYM,SYM, 99,100,101,102, 46,103, 46, /* BX */ - 30, 33,104,105, 16, 19, 37,106,107, 28,108,109, 44, 38, 43, 48, /* CX */ - 110,111, 35,112,113,114, 22,SYM, 39, 41,115,116, 31,117, 47,118, /* DX */ - 30, 33,119,120, 16, 19, 37,121,122, 28,123,124, 44, 38, 43, 48, /* EX */ - 125,126, 35,127,128,129, 22,SYM, 39, 41,130,131, 31,132, 47,SYM, /* FX */ + SYM, 81, 82, 83,SYM, 84, 85,SYM,SYM, 44, 50, 86, 87,SYM, 51,SYM, /* AX */ + SYM, 88,SYM, 89,SYM, 90, 91,SYM,SYM, 44, 50, 92, 93, 94, 51, 95, /* BX */ + 30, 33, 96, 97, 15, 20, 37, 98, 48, 29, 99,100,101, 38,102, 36, /* CX */ + 103,104, 31,105, 42, 45, 22,SYM, 32,106, 47,107, 40,108, 43,109, /* DX */ + 30, 33,110,111, 15, 20, 37,112, 48, 29,113,114,115, 38,116, 36, /* EX */ + 117,118, 31,119, 42, 45, 22,SYM, 32,120, 47,121, 40,122, 43,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -110,18 +110,18 @@ static const unsigned char Iso_8859_9_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 21, 20, 9, 1, 13, 12, 17, 6, 23, 11, 7, 10, 3, 8, /* 4X */ - 18, 29, 2, 5, 4, 15, 14, 26, 25, 24, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 21, 20, 9, 1, 13, 12, 17, 6, 23, 11, 7, 10, 3, 8, /* 6X */ - 18, 29, 2, 5, 4, 15, 14, 26, 25, 24, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 1, 21, 19, 9, 0, 14, 12, 18, 6, 24, 11, 7, 10, 3, 8, /* 4X */ + 17, 28, 2, 5, 4, 16, 13, 26, 25, 23, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 1, 21, 19, 9, 0, 14, 12, 18, 6, 24, 11, 7, 10, 3, 8, /* 6X */ + 17, 28, 2, 5, 4, 16, 13, 26, 25, 23, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 34,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 133, 33,134,135, 16, 19, 37, 40, 32, 28, 42,136,137, 38, 43,138, /* CX */ - 139,140,141,142,143,144, 22,SYM, 39,145,146,147, 31,148,149,150, /* DX */ - 151, 33,152,153, 16, 19, 37, 40, 32, 28, 42,154,155, 38, 43,156, /* EX */ - 157,158,159,160,161,162, 22,SYM, 39,163,164,165, 31, 45,166,167, /* FX */ + SYM,SYM,SYM,SYM,SYM,123,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 34, 33,124,125, 15, 20, 37, 41, 39, 29,126,127,128, 38,129,130, /* CX */ + 131,132,133, 35, 42, 45, 22,SYM, 32,134, 47,135, 40,136,137,138, /* DX */ + 34, 33,139,140, 15, 20, 37, 41, 39, 29,141,142,143, 38,144,145, /* EX */ + 146,147,148, 35, 42, 45, 22,SYM, 32,149, 47,150, 40,151,152,153, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -131,18 +131,18 @@ static const unsigned char Iso_8859_15_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 21, 20, 9, 1, 13, 12, 17, 6, 23, 11, 7, 10, 3, 8, /* 4X */ - 18, 29, 2, 5, 4, 15, 14, 26, 25, 24, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 21, 20, 9, 1, 13, 12, 17, 6, 23, 11, 7, 10, 3, 8, /* 6X */ - 18, 29, 2, 5, 4, 15, 14, 26, 25, 24, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 1, 21, 19, 9, 0, 14, 12, 18, 6, 24, 11, 7, 10, 3, 8, /* 4X */ + 17, 28, 2, 5, 4, 16, 13, 26, 25, 23, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 1, 21, 19, 9, 0, 14, 12, 18, 6, 24, 11, 7, 10, 3, 8, /* 6X */ + 17, 28, 2, 5, 4, 16, 13, 26, 25, 23, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM,168,SYM,169,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,170, 34,SYM,SYM,171,SYM,SYM,SYM,172,173,174,SYM, /* BX */ - 175, 33,176,177, 16, 19, 37, 40, 32, 28, 42,178,179, 38, 43,180, /* CX */ - 181,182,183,184,185,186, 22,SYM, 39,187,188,189, 31,190,191,192, /* DX */ - 193, 33,194,195, 16, 19, 37, 40, 32, 28, 42,196,197, 38, 43,198, /* EX */ - 199,200,201,202,203,204, 22,SYM, 39,205,206,207, 31,208,209,210, /* FX */ + SYM,SYM,SYM,SYM,SYM,SYM, 44,SYM, 44,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ + SYM,SYM,SYM,SYM, 51,154,SYM,SYM, 51,SYM,SYM,SYM,155,156,157,SYM, /* BX */ + 34, 33,158,159, 15, 20, 37, 41, 39, 29,160,161,162, 38,163,164, /* CX */ + 165,166,167, 35, 42, 45, 22,SYM, 32,168, 47,169, 40, 46,170,171, /* DX */ + 34, 33,172,173, 15, 20, 37, 41, 39, 29,174,175,176, 38,177,178, /* EX */ + 179,180,181, 35, 42, 45, 22,SYM, 32,182, 47,183, 40, 46,184,185, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -152,74 +152,74 @@ static const unsigned char Windows_1252_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 21, 20, 9, 1, 13, 12, 17, 6, 23, 11, 7, 10, 3, 8, /* 4X */ - 18, 29, 2, 5, 4, 15, 14, 26, 25, 24, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 21, 20, 9, 1, 13, 12, 17, 6, 23, 11, 7, 10, 3, 8, /* 6X */ - 18, 29, 2, 5, 4, 15, 14, 26, 25, 24, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM,ILL,SYM, 36,SYM,SYM,SYM,SYM,SYM,SYM,211,SYM,212,ILL,213,ILL, /* 8X */ - ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,214,SYM,215,ILL,216,217, /* 9X */ + SYM, 1, 21, 19, 9, 0, 14, 12, 18, 6, 24, 11, 7, 10, 3, 8, /* 4X */ + 17, 28, 2, 5, 4, 16, 13, 26, 25, 23, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 1, 21, 19, 9, 0, 14, 12, 18, 6, 24, 11, 7, 10, 3, 8, /* 6X */ + 17, 28, 2, 5, 4, 16, 13, 26, 25, 23, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM,ILL,SYM, 49,SYM,SYM,SYM,SYM,SYM,SYM, 44,SYM,186,ILL, 51,ILL, /* 8X */ + ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 44,SYM,187,ILL, 51,188, /* 9X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 34,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 218, 33,219,220, 16, 19, 37, 40, 32, 28, 42,221,222, 38, 43,223, /* CX */ - 224,225,226,227,228,229, 22,SYM, 39,230,231,232, 31,233,234,235, /* DX */ - 236, 33,237,238, 16, 19, 37, 40, 32, 28, 42,239,240, 38, 43,241, /* EX */ - 242,243,244,245,246,247, 22,SYM, 39,248,249,249, 31,249,249,249, /* FX */ + SYM,SYM,SYM,SYM,SYM,189,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 34, 33,190,191, 15, 20, 37, 41, 39, 29,192,193,194, 38,195,196, /* CX */ + 197,198,199, 35, 42, 45, 22,SYM, 32,200, 47,201, 40, 46,202,203, /* DX */ + 34, 33,204,205, 15, 20, 37, 41, 39, 29,206,207,208, 38,209,210, /* EX */ + 211,212,213, 35, 42, 45, 22,SYM, 32,214, 47,215, 40, 46,216,217, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const int Unicode_Char_size = 60; +static const int Unicode_Char_size = 58; static const unsigned int Unicode_CharOrder[] = { - 65, 0, 66, 21, 67, 20, 68, 9, 69, 1, 70, 13, 71, 12, 72, 17, - 73, 6, 74, 23, 75, 11, 76, 7, 77, 10, 78, 3, 79, 8, 80, 18, - 81, 29, 82, 2, 83, 5, 84, 4, 85, 15, 86, 14, 87, 26, 88, 25, - 89, 24, 90, 27, 97, 0, 98, 21, 99, 20, 100, 9, 101, 1,102, 13, - 103, 12, 104, 17, 105, 6, 106, 23, 107, 11, 108, 7, 109, 10,110, 3, - 111, 8, 112, 18, 113, 29, 114, 2, 115, 5, 116, 4, 117, 15,118, 14, - 119, 26, 120, 25, 121, 24, 122, 27, 196, 16, 197, 19, 201, 28,214, 22, - 228, 16, 229, 19, 233, 28, 246, 22, + 65, 1, 66, 21, 67, 19, 68, 9, 69, 0, 70, 14, 71, 12, 72, 18, + 73, 6, 74, 24, 75, 11, 76, 7, 77, 10, 78, 3, 79, 8, 80, 17, + 81, 28, 82, 2, 83, 5, 84, 4, 85, 16, 86, 13, 87, 26, 88, 25, + 89, 23, 90, 27, 97, 1, 98, 21, 99, 19, 100, 9, 101, 0,102, 14, + 103, 12, 104, 18, 105, 6, 106, 24, 107, 11, 108, 7, 109, 10,110, 3, + 111, 8, 112, 17, 113, 28, 114, 2, 115, 5, 116, 4, 117, 16,118, 13, + 119, 26, 120, 25, 121, 23, 122, 27, 196, 15, 197, 20, 214, 22,228, 15, + 229, 20, 246, 22, }; /* Model Table: - * Total sequences: 752 - * First 512 sequences: 0.996987580875875 - * Next 512 sequences (512-1024): 0.00301241912412493 - * Rest: 4.640385298237959e-17 + * Total considered sequences: 886 / 841 + * - Positive sequences: first 482 (0.9950244403710493) + * - Probable sequences: next 121 (603-482) (0.003978503582736215) + * - Neutral sequences: last 238 (0.0009970560462144729) + * - Negative sequences: -45 (off-ratio) * Negative sequences: TODO */ static const PRUint8 SwedishLangModel[] = { - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,0,3,3,2,3,3,3,3,2,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,2,3,3,2,3,3,3,3,2,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,0,2,3,2,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,2,2,3,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,0,2,2,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,2,3,3,3,3,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,0,2,0,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,0,2,0,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,2,3,3,2,3,0,2,0,2,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,0,2,2,0,0, - 3,3,3,2,3,2,3,3,3,2,2,0,2,3,2,3,3,0,2,3,2,0,3,3,3,0,0,0,2,0, - 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,2,3,0,3,2,2,0,0,0,2,0,0, - 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,0,2,3,0,3,3,2,2,0,3,2,2,0,0, - 2,2,3,3,3,3,2,3,0,3,3,3,3,3,3,2,2,0,3,0,3,3,0,0,0,3,0,0,0,0, - 3,3,3,3,3,2,3,3,3,2,2,2,2,2,0,3,3,2,0,3,2,2,3,3,3,0,0,3,0,0, - 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,2,3,2,2,3,0,0,2,2,0, - 2,3,3,3,3,3,2,3,0,3,2,3,3,2,3,0,0,2,0,0,0,2,2,2,0,0,0,0,0,0, - 3,3,3,3,3,2,3,3,3,2,2,3,2,2,2,3,0,3,0,0,3,2,0,0,3,0,0,2,2,2, - 3,3,3,3,3,3,3,3,3,2,2,2,0,2,2,3,3,2,3,3,3,3,3,3,3,0,0,2,2,0, - 3,3,3,3,3,3,2,3,2,3,3,3,3,2,3,0,2,2,3,0,3,2,2,3,0,0,0,0,0,0, - 3,3,0,2,2,3,2,3,3,3,0,2,0,2,0,3,3,2,0,0,0,2,3,0,0,0,0,0,0,0, - 3,2,3,3,3,3,2,3,3,3,3,3,3,3,2,2,0,2,3,0,3,3,0,2,0,3,2,2,0,0, - 3,3,2,2,3,0,3,3,3,0,2,2,0,2,0,2,0,2,3,0,0,2,0,0,2,2,0,0,0,0, - 3,3,2,2,2,2,3,2,3,2,0,2,0,2,0,2,0,3,0,0,0,0,0,0,2,0,2,2,0,0, - 3,3,0,2,2,0,2,2,3,0,0,2,0,2,0,2,0,2,0,0,0,2,0,0,2,0,0,2,0,0, - 0,3,2,2,0,2,0,2,0,2,0,0,0,0,2,2,0,2,2,0,0,0,2,0,0,0,0,0,0,0, - 0,0,0,2,0,0,2,0,0,0,0,0,0,0,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,2,3,1,3,3,3,3,2,2, + 3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,1,3,2,3,3,3,2,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,2,1,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,0,1,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,3,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,1,2,2,2,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,1,2,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,1,2,3,3,3,2,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,1,1,1,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,1,0,0,1, + 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,1,3,2,3,3,2,0,2,1,0, + 3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,2,3,1,3,3,3,3,3,2,1,1,1, + 3,3,3,3,3,3,3,3,3,3,2,2,2,2,3,3,3,1,2,1,3,2,2,2,2,1,0,0,0, + 3,3,3,2,3,3,3,3,3,1,1,1,1,1,3,3,3,1,0,0,3,0,3,3,3,0,0,0,0, + 0,1,3,3,3,3,1,3,0,3,3,3,3,3,3,0,1,3,0,3,0,3,0,0,0,3,0,0,0, + 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,0,2,3,1,3,0,3,1,1,1,2,1,1,1, + 3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,2,3,3,3,1,3,2,1,2,2,0,0,1,0, + 3,3,3,2,3,2,3,2,3,2,2,1,1,1,1,3,3,1,1,1,3,2,3,3,3,0,2,1,1, + 3,3,3,2,3,2,3,3,3,2,2,3,1,0,1,0,3,1,3,3,0,1,0,3,0,0,0,2,1, + 3,1,3,3,3,3,1,3,1,3,2,3,3,3,2,0,0,2,1,0,1,2,1,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,2,0,1,2,1,1,3,3,2,2,2,3,3,3,3,3,0,1,0,0, + 2,3,3,3,3,3,2,3,1,3,3,3,3,3,2,1,0,3,1,2,0,1,1,0,3,0,0,0,0, + 2,3,3,3,3,3,2,3,3,3,3,3,3,2,3,0,2,3,2,3,0,3,1,0,1,2,1,1,0, + 3,3,1,2,2,2,3,3,3,3,1,2,0,1,2,3,3,0,0,0,0,1,3,2,0,0,0,1,0, + 3,3,1,2,3,1,3,2,3,0,1,1,0,1,2,0,3,3,1,1,0,1,0,1,1,1,0,0,0, + 3,3,1,2,1,2,3,1,2,0,0,1,0,0,1,1,1,0,2,1,0,1,0,1,0,0,2,1,0, + 3,3,0,2,1,1,2,0,3,1,1,1,0,0,0,0,1,0,2,1,0,1,0,1,1,0,1,1,0, + 0,0,1,0,0,0,2,1,0,0,0,0,0,1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0, }; @@ -227,8 +227,8 @@ const SequenceModel Iso_8859_1SwedishModel = { Iso_8859_1_CharToOrderMap, SwedishLangModel, - 30, - (float)0.996987580875875, + 29, + (float)0.9990029439537855, PR_TRUE, "ISO-8859-1", "sv" @@ -238,8 +238,8 @@ const SequenceModel Iso_8859_4SwedishModel = { Iso_8859_4_CharToOrderMap, SwedishLangModel, - 30, - (float)0.996987580875875, + 29, + (float)0.9990029439537855, PR_TRUE, "ISO-8859-4", "sv" @@ -249,8 +249,8 @@ const SequenceModel Iso_8859_9SwedishModel = { Iso_8859_9_CharToOrderMap, SwedishLangModel, - 30, - (float)0.996987580875875, + 29, + (float)0.9990029439537855, PR_TRUE, "ISO-8859-9", "sv" @@ -260,8 +260,8 @@ const SequenceModel Iso_8859_15SwedishModel = { Iso_8859_15_CharToOrderMap, SwedishLangModel, - 30, - (float)0.996987580875875, + 29, + (float)0.9990029439537855, PR_TRUE, "ISO-8859-15", "sv" @@ -271,8 +271,8 @@ const SequenceModel Windows_1252SwedishModel = { Windows_1252_CharToOrderMap, SwedishLangModel, - 30, - (float)0.996987580875875, + 29, + (float)0.9990029439537855, PR_TRUE, "WINDOWS-1252", "sv" @@ -282,8 +282,11 @@ const LanguageModel SwedishModel = { "sv", Unicode_CharOrder, - 60, + 58, SwedishLangModel, - 30, - (float)0.996987580875875, + 29, + 4, + (float)0.36974030827358995, + 21, + (float)0.030157896788186284, }; diff --git a/src/LangModels/LangThaiModel.cpp b/src/LangModels/LangThaiModel.cpp index aa85238..5c2eb6c 100644 --- a/src/LangModels/LangThaiModel.cpp +++ b/src/LangModels/LangThaiModel.cpp @@ -42,7 +42,7 @@ /** * Generated by BuildLangModel.py - * On: 2021-03-22 17:30:46.517390 + * On: 2022-12-14 18:24:15.382361 **/ /* Character Mapping Table: @@ -68,18 +68,18 @@ static const unsigned char Iso_8859_11_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, /* 4X */ - 90, 91, 92, 93, 94, 95, 96, 97, 98, 99,100,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115, /* 6X */ - 116,117,118,119,120,121,122,123,124,125,126,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, /* 4X */ + 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110, /* 6X */ + 111,112,113,114,115,116,117,118,119,120,121,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM, 3, 32,127, 20,128, 60, 7, 27, 52, 28, 40, 66, 48, 58, 57, /* AX */ - 50, 59, 55, 45, 16, 19, 43, 17, 44, 1, 22, 23, 42, 56, 26, 49, /* BX */ - 47, 9, 13, 2, 51, 14,129, 11, 35, 46, 18, 25, 62, 4, 53, 63, /* CX */ - 21,SYM, 0, 39,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,ILL,ILL,ILL,SYM, /* DX */ - 5, 24, 37, 29, 31, 67, 54,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,130, /* EX */ - NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,131,132,ILL,ILL,ILL,ILL, /* FX */ + SYM, 3, 30, 68, 21,122, 62, 6, 26, 56, 27, 41, 65, 48, 57, 54, /* AX */ + 49, 59, 58, 47, 19, 17, 45, 18, 44, 1, 23, 20, 46, 53, 28, 50, /* BX */ + 42, 9, 11, 2, 52, 12,123, 13, 32, 43, 16, 25, 60, 4, 51, 64, /* CX */ + 22,SYM, 0, 39,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,ILL,ILL,ILL,SYM, /* DX */ + 5, 24, 34, 29, 36, 69, 55,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,124, /* EX */ + NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,125,126,ILL,ILL,ILL,ILL, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -89,197 +89,186 @@ static const unsigned char Tis_620_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147, /* 4X */ - 148,149,150,151,152,153,154,155,156,157,158,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173, /* 6X */ - 174,175,176,177,178,179,180,181,182,183,184,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141, /* 4X */ + 142,143,144,145,146,147,148,149,150,151,152,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167, /* 6X */ + 168,169,170,171,172,173,174,175,176,177,178,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - ILL, 3, 32,185, 20,186, 60, 7, 27, 52, 28, 40, 66, 48, 58, 57, /* AX */ - 50, 59, 55, 45, 16, 19, 43, 17, 44, 1, 22, 23, 42, 56, 26, 49, /* BX */ - 47, 9, 13, 2, 51, 14,187, 11, 35, 46, 18, 25, 62, 4, 53, 63, /* CX */ - 21,SYM, 0, 39,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,ILL,ILL,ILL,SYM, /* DX */ - 5, 24, 37, 29, 31, 67, 54,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,188, /* EX */ - NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,189,190,ILL,ILL,ILL,ILL, /* FX */ + ILL, 3, 30, 68, 21,179, 62, 6, 26, 56, 27, 41, 65, 48, 57, 54, /* AX */ + 49, 59, 58, 47, 19, 17, 45, 18, 44, 1, 23, 20, 46, 53, 28, 50, /* BX */ + 42, 9, 11, 2, 52, 12,180, 13, 32, 43, 16, 25, 60, 4, 51, 64, /* CX */ + 22,SYM, 0, 39,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,ILL,ILL,ILL,SYM, /* DX */ + 5, 24, 34, 29, 36, 69, 55,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,181, /* EX */ + NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,182,183,ILL,ILL,ILL,ILL, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const int Unicode_Char_size = 75; +static const int Unicode_Char_size = 70; static const unsigned int Unicode_CharOrder[] = { - 3585, 3, 3586, 32, 3588, 20, 3590, 60, 3591, 7, 3592, 27, 3593, 52,3594, 28, - 3595, 40, 3596, 66, 3597, 48, 3598, 58, 3599, 57, 3600, 50, 3601, 59,3602, 55, - 3603, 45, 3604, 16, 3605, 19, 3606, 43, 3607, 17, 3608, 44, 3609, 1,3610, 22, - 3611, 23, 3612, 42, 3613, 56, 3614, 26, 3615, 49, 3616, 47, 3617, 9,3618, 13, - 3619, 2, 3620, 51, 3621, 14, 3623, 11, 3624, 35, 3625, 46, 3626, 18,3627, 25, - 3628, 62, 3629, 4, 3630, 53, 3631, 63, 3632, 21, 3633, 8, 3634, 0,3635, 39, - 3636, 15, 3637, 10, 3638, 41, 3639, 34, 3640, 33, 3641, 36, 3642, 64,3648, 5, - 3649, 24, 3650, 37, 3651, 29, 3652, 31, 3653, 67, 3654, 54, 3655, 38,3656, 6, - 3657, 12, 3658, 61, 3659, 65, 3660, 30, 3664, 72, 3666, 69, 3668, 70,3669, 73, - 3670, 68, 3671, 74, 3673, 71, + 3585, 3, 3586, 30, 3587, 68, 3588, 21, 3590, 62, 3591, 6, 3592, 26,3593, 56, + 3594, 27, 3595, 41, 3596, 65, 3597, 48, 3598, 57, 3599, 54, 3600, 49,3601, 59, + 3602, 58, 3603, 47, 3604, 19, 3605, 17, 3606, 45, 3607, 18, 3608, 44,3609, 1, + 3610, 23, 3611, 20, 3612, 46, 3613, 53, 3614, 28, 3615, 50, 3616, 42,3617, 9, + 3618, 11, 3619, 2, 3620, 52, 3621, 12, 3623, 13, 3624, 32, 3625, 43,3626, 16, + 3627, 25, 3628, 60, 3629, 4, 3630, 51, 3631, 64, 3632, 22, 3633, 8,3634, 0, + 3635, 39, 3636, 15, 3637, 10, 3638, 40, 3639, 35, 3640, 33, 3641, 37,3642, 66, + 3648, 5, 3649, 24, 3650, 34, 3651, 29, 3652, 36, 3653, 69, 3654, 55,3655, 38, + 3656, 7, 3657, 14, 3658, 63, 3659, 61, 3660, 31, 3661, 67, }; /* Model Table: - * Total considered sequences: 2422 / 5625 - * - Positive sequences: first 1646 (0.9950175497087397) - * - Probable sequences: next 355 (2001-1646) (0.003986886339010343) - * - Neutral sequences: last 3624 (0.0009955639522499782) - * - Negative sequences: 3203 (off-ratio) + * Total considered sequences: 2476 / 4900 + * - Positive sequences: first 1642 (0.9950041430825017) + * - Probable sequences: next 370 (2012-1642) (0.003999342904699388) + * - Neutral sequences: last 2888 (0.000996514012798877) + * - Negative sequences: 2424 (off-ratio) * Negative sequences: TODO */ static const PRUint8 ThaiLangModel[] = { - 1,3,3,3,3,3,0,3,0,3,0,3,0,3,3,0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,0,0,3,0, - 3,0,0,3,0,3,3,3,3,3,3,3,3,2,1,2,3,0,0,3,2,1,1,2,0,3,2,0,0,1,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, - 3,3,3,3,3,3,3,3,2,1,3,1,3,3,3,3,2,3,0,3,0,0,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, - 3,3,2,3,3,3,3,3,3,3,3,2,3,3,2,3,1,0,1,3,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, - 3,3,3,3,2,3,3,3,3,3,3,0,3,3,3,2,2,2,0,3,3,3,0,1,3,0,1,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3, - 3,3,3,3,2,3,3,3,0,1,3,2,3,0,1,2,3,0,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,3,3,3,3,0,0,3,0,3,0,3,0,3,3,0,3,3,3,3,3,0,3,3,1,3,3,3,3,0,0,0,3,0,0,3,0, - 0,0,0,3,0,3,3,3,3,2,3,0,3,0,0,3,3,0,0,3,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,0,3,0,3,0,3,0,3,3,0,3,3,3,3,3,0,3,3,3,3,3,3,3,3,0,3,3,0,0,3,0, - 3,0,3,3,0,3,3,1,1,0,3,0,2,1,1,1,3,1,0,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,2,3,1,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,2,3,3,1,2,3,1, - 3,0,1,3,0,3,3,3,0,3,3,1,3,1,2,2,2,3,0,3,0,0,0,3,0,0,1,0,0,1,0,0,0,0,0,0,0,0, - 0,3,2,3,1,0,3,3,0,3,0,3,3,3,3,0,3,3,3,3,3,0,3,3,0,0,3,3,3,0,0,0,1,0,0,3,0, - 0,0,0,2,0,1,0,3,3,1,0,3,3,3,0,0,0,0,3,0,1,1,1,1,2,0,0,0,1,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, - 3,3,0,3,1,3,3,3,3,3,3,1,3,1,2,3,2,1,0,2,0,0,0,2,0,0,1,1,0,0,0,0,0,0,0,0,0,0, - 0,3,3,3,3,3,3,2,0,3,0,3,3,3,3,0,3,3,3,3,3,0,3,3,3,3,3,3,3,3,0,3,3,0,0,3,0, - 3,0,0,3,0,3,3,2,1,0,3,1,3,2,2,3,2,0,0,2,0,0,0,1,1,3,0,0,2,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2, - 3,3,0,3,0,3,3,0,3,0,3,3,2,0,0,1,3,1,0,3,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,0,3,0,3,0,3,0,3,3,0,3,3,3,3,3,0,3,3,3,3,3,3,3,3,0,3,3,0,0,2,0, - 3,0,3,3,0,3,3,2,0,0,3,1,1,0,1,2,1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, - 3,3,2,3,3,3,3,3,2,0,3,2,3,2,0,2,2,2,0,3,0,0,0,1,0,0,1,2,0,1,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3, - 3,3,3,3,3,3,3,1,0,0,2,1,3,2,1,0,1,0,0,2,0,1,1,0,3,0,1,0,0,0,0,0,0,0,0,0,0,0, - 0,3,3,3,3,3,3,3,0,3,0,3,3,3,3,0,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,3,0,0,3,0, - 3,0,0,3,0,3,3,3,2,3,3,3,3,3,0,2,2,0,0,2,0,0,1,3,2,2,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3, - 3,3,3,3,3,3,3,2,0,1,3,0,3,2,1,2,2,2,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,2,2,2,3,3,3,3,3,3,3, - 3,3,3,2,3,2,0,3,0,3,2,0,1,0,3,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3, - 3,1,3,3,3,2,3,3,0,0,3,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3, - 3,3,3,3,3,2,3,0,1,1,3,0,2,0,0,0,1,0,0,0,1,0,0,0,2,0,0,1,2,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3, - 3,2,3,3,0,1,0,1,3,0,2,0,1,0,1,2,1,0,0,2,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, - 1,3,3,3,3,3,0,3,0,3,0,3,0,3,3,0,3,3,3,3,3,1,3,3,3,3,3,3,3,3,0,3,3,0,0,3,0, - 3,0,0,3,0,3,3,3,2,0,3,1,3,0,1,3,3,0,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3, - 3,2,3,3,1,3,3,3,0,0,3,2,2,1,2,2,2,1,0,3,2,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,3,3,3,3,3,2,3, - 3,3,0,3,1,1,3,1,2,1,1,2,0,2,0,0,1,0,0,0,3,0,0,0,2,0,0,0,1,0,0,0,0,0,0,0,0,0, - 0,3,3,3,3,0,0,2,0,3,0,3,0,3,3,0,3,3,3,3,3,0,3,3,0,3,3,3,3,0,0,0,3,0,0,0,0, - 0,0,0,3,0,3,3,0,0,0,0,0,3,0,0,0,3,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, - 3,3,3,3,3,1,3,3,3,3,2,3,3,3,3,3,2,1,2,3,0,3,1,3,0,0,2,0,0,0,3,0,0,3,0,0,3, - 2,3,0,0,1,0,1,1,0,0,3,3,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3, - 3,2,2,1,3,2,2,3,2,0,2,2,1,0,3,0,1,0,0,1,0,0,1,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,3, - 3,3,3,2,3,2,3,1,0,2,0,1,1,0,0,3,1,0,0,0,0,0,0,0,2,0,1,0,1,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,2,3,2,3,2,3,3,2,3,3,1,3, - 3,2,3,2,0,1,1,2,1,2,2,3,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0, - 0,3,0,3,0,0,0,0,0,0,0,0,0,0,0,0,3,1,3,3,2,0,3,0,0,3,0,3,3,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,3,3,3,3,3,0,1,0,3,0,3,0,3,3,0,3,3,3,3,3,0,3,3,3,3,3,3,3,3,0,3,3,0,0,3,0, - 3,0,0,3,0,3,2,2,1,0,3,2,3,1,1,3,2,0,0,2,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, - 0,3,3,3,3,0,0,0,0,3,0,3,0,0,3,0,3,3,2,3,3,0,3,3,0,3,3,0,3,0,0,0,3,0,0,1,0, - 0,0,0,3,0,0,1,2,0,0,0,0,3,0,0,1,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,2,3,3,3,2,3,3,3,3,3,3,2,2,2,3,3,3,2,1,3,3,3,1,1,2,1,2,2,3,3,3,2,1,1, - 2,3,2,2,3,0,1,0,3,0,3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,3,3,3,2,3,3,3,0,3,0,3,3,3,3,0,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,3,0,0,3,0, - 3,0,0,2,0,3,1,3,3,3,3,3,1,1,0,1,2,0,3,0,0,3,2,0,2,3,0,0,0,0,0,0,0,0,0,0,0,0, - 0,3,1,0,3,1,3,0,0,1,0,0,3,3,0,0,2,0,0,0,0,0,3,0,0,1,0,0,3,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,0,0,3,3,3,3,0,3,3,3,2,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,3,0,2,3, - 3,0,0,2,3,2,1,0,0,3,2,3,2,0,0,1,1,0,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0, - 0,3,3,3,3,3,3,3,0,3,0,2,3,2,3,0,3,3,3,3,2,0,3,3,3,3,3,3,3,3,0,3,2,0,0,2,0, - 3,0,0,3,0,1,1,1,1,1,1,3,2,0,0,1,3,0,0,2,2,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0, - 0,3,3,3,3,0,0,1,0,3,0,2,0,3,3,0,3,3,3,3,3,0,3,3,0,3,3,3,3,0,0,0,3,0,0,1,0, - 0,0,0,3,0,2,1,3,2,0,3,0,3,0,0,1,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,3,2,3,3,3,0,3,0,3,0,3,0,3,2,0,3,2,3,3,3,0,3,3,2,1,2,3,1,2,0,3,1,0,0,0,0, - 1,0,0,0,0,0,3,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,3,3,3,3,3,0,3,0,3,0,3,0,2,3,0,3,3,3,3,3,0,3,3,3,3,3,3,3,3,0,3,3,0,0,1,0, - 3,0,0,2,0,2,3,2,1,0,2,0,0,0,1,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,2,2,3,3,3,3,3,3,3,2,3,2,3,3,3,2,2,3,2,2,2,1,1,1,3,0,2,0,3,2,0,3,3,0,3, - 3,3,0,2,3,1,0,2,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,2,0,3,0,0,3,3,0,3,0,0,3,0,1,0,3,0,1,1,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0, - 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,0,1,1,1,3,2,3,2,0,2,3,3,3,3,2,0,3,0,0,0,1,0,0,0,0,1,2,0,0,0,0,1,2,0,3, - 0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,2,3,3,3,1,3,2,3,3,3,3,3,3,3,3,2,2,3,1,3,2,3,1,3,2,1,3,3,3,2,3,3,0,3, - 0,0,0,2,3,2,1,0,0,0,3,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,2,3,3,0,2,3,3,3,2,0,3,1,3,1,2,2,1,1,1,0,1,2,0,1,1,0,0,3,2,2,3,0,2,1, - 3,0,0,1,0,0,0,1,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,0,0,2,2,3,3,0,2,3,3,1,2,3,2,3,3,2,3,3,3,3,2,3,2,3,2,2,2,0,3,2, - 3,0,0,1,0,0,1,1,1,0,3,0,0,1,0,1,0,0,0,0,0,0,3,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0, - 3,2,3,3,2,3,0,0,3,3,3,2,0,3,1,2,1,3,2,3,2,3,1,2,2,1,1,3,2,2,3,2,2,2,0,1,0, - 2,0,0,1,0,1,0,0,3,0,3,0,1,3,0,0,1,0,0,0,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,1,3,0,2,0,0,0,3,0,3,0,0,0,0,3,0,3,3,1,3,2,0,0,1,0,3,0,2,0,2,0,0,0,0,0,3, - 0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,2,2,3,2,3,3,0,3,2,3,2,2,2,3,3,2,3,3,2,2,1,1,2,3,3,2,3,3,3,2,2,3,1,0,0,0, - 2,0,0,0,0,1,1,1,0,0,0,3,1,0,0,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, - 3,3,3,2,3,3,3,0,3,3,3,1,3,1,3,3,1,3,2,3,3,2,1,1,3,1,0,1,2,1,2,3,2,3,3,0,3, - 3,2,0,3,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,2,2,3,3,3,0,0,0,3,1,2,0,2,0,1,1,2,3,2,2,0,3,3,3,1,1,2,1,3,3,3,2,0,0,3,0, - 2,0,0,2,0,0,0,3,1,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0, - 0,1,0,2,0,1,0,0,0,1,0,0,0,0,0,0,3,3,1,3,2,0,0,0,0,2,1,0,0,0,0,0,0,0,0,3,0, - 0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0, - 3,0,0,1,0,0,1,1,3,1,3,3,2,0,3,3,1,0,0,0,0,2,3,0,0,0,3,0,0,0,0,1,0,3,1,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,2,1,3,3,3,1,3,3,2,2,2,2,2,3,3,0,1,2,0,2,0,1,1,0,0,1,0,0,2,0,0,3,0,0,1, - 3,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,1,0,1,1,0,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,0,1,1,1,0,1,0,1,0,1,2,0,0,0,0, - 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,3,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,3,3,0,2,0,3,2,3,0,1,1,3,0,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,0,3, - 0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,1,2,2,2,2,0,0,0,1,1,2,0,1,0,3,0,1,1,2,1,0,2,0,0,1,2,1,1,3,0,1,3,0,0,1,0, - 1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,1,3,2,2,3,0,0,0,0,3,0,0,0,0,0,1,1,0,0,1,0,1,0,1,3,0,1,0,1,1,0,2,0,0,0,0, - 1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,0,1,0,0,0,0,0,0,0,1,0,0,0,3,2,0,0,2,0,0,1,1,0,0,1,0,0,0,0,3,0,0,1,0,0,1, - 0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,2,2,0,1,3,3,0,2,0,0,1,0,1,0,1,0,0,0,0,0,1,1,0,0,0,0,0,1,0,2,1,0,0,0,0,0, - 0,0,0,1,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,0,3,3,0,0,0,0,0,0,1,0,0,0,0,1,0,2,1,1,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,1,0,0,0,1,0,0,0,1,0,0,0,0,0,2,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,1,0,1,0,0,0,1,0,3,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0, - 1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,1,0,0,2,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,1,0,0,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2, - 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,3,3,3,3,3,3,0,0,3,0,3,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,0,3, + 0,3,0,0,0,0,3,3,3,3,3,3,3,3,1,3,3,1,3,2,0,2,1,1,1,2,0,2,0,2,1,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,3,1,3,3,3,0,2,3,3,3,3,3,1,3,2,0,0,0,0,0,1,0,1,2,0,0,1,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,2,3,3,3,3,3,3,3,3,1,3,3,3,1,3,0,0,3,0,1,0,0,0,2,1,2,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,2,3,0,0,0,1,1,3,0,1,1,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3, + 3,3,3,3,3,1,3,3,0,3,3,3,0,2,1,3,3,1,2,0,1,1,0,0,0,0,1,2,2,0,0,0,0,0,0, + 0,3,3,3,3,1,3,0,0,3,0,3,3,3,0,0,3,3,3,3,3,3,0,3,0,3,3,3,3,0,3,0,3,0,0, + 0,0,0,0,0,0,3,3,2,3,3,3,1,0,0,3,3,0,2,1,0,3,0,0,0,0,0,2,0,0,1,0,0,0,0, + 3,3,3,3,3,3,3,3,2,3,2,3,3,3,1,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,1,3,2,3, + 2,3,2,1,3,0,3,3,2,3,3,3,0,3,2,3,3,2,3,0,3,2,0,0,0,0,0,1,0,1,1,0,0,0,0, + 3,3,3,3,3,3,3,0,0,3,0,3,3,3,0,0,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,0,3,0,3, + 0,3,0,0,3,0,3,3,0,1,3,3,0,2,1,3,3,0,3,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0, + 0,3,2,3,1,0,3,3,0,3,0,3,3,3,3,0,3,3,3,3,3,3,0,3,0,0,3,3,3,0,2,0,3,0,0, + 0,0,0,0,0,0,2,0,1,3,0,0,3,3,3,3,0,0,0,1,0,0,1,3,1,0,3,1,2,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,0,2,3,3,3,2,3,3,3,2,1,3,2,2,3,0,2,3,0,0,0,0,0,1,0,0,0,1,0,0,0, + 0,3,3,3,3,3,2,3,0,3,0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,0,3,0,3, + 0,3,0,0,0,0,3,3,0,2,3,3,0,2,2,3,2,3,3,0,0,1,0,0,1,3,2,0,1,1,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,1,3,3,3,0,3,3,3,2,3,1,3,3,1,3,0,2,2,0,0,0,0,0,2,0,0,1,2,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,2,0,2,2,3,0,1,2,3,3,0,2,0,0,1,2,1,1,0,0,1,1,2,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 1,3,3,3,0,1,3,3,0,0,3,3,3,3,0,3,3,1,3,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0, + 3,3,3,3,3,3,3,0,0,3,0,3,3,3,0,0,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,0,2,0,3, + 0,3,0,0,3,0,3,3,0,1,3,3,1,2,0,3,3,1,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, + 0,3,3,3,3,3,3,3,0,3,0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3, + 0,3,0,0,0,0,3,3,3,3,3,2,2,3,2,3,2,0,2,0,0,1,1,0,1,3,1,1,1,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3, + 3,3,3,1,3,3,3,3,0,3,3,2,0,0,0,2,2,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3, + 3,3,3,3,3,3,3,3,0,1,3,3,0,1,0,1,1,1,1,0,0,0,0,0,0,0,2,0,2,0,0,0,0,0,0, + 3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,3,2,3,3,2,3,3,3,3,3,2,2,3,3,3,3,3,3,3, + 3,3,3,2,3,3,3,2,3,3,1,2,0,0,0,1,1,3,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,3,0,1,3,2,0,0,1,2,2,1,2,0,2,2,0,0,0,0,0,1,0,0,0,1,0,0,0, + 3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,2,3,3,3,3,3,3,3, + 3,3,2,3,0,1,3,3,0,1,3,1,1,2,3,1,1,0,0,3,0,0,1,0,0,0,2,0,1,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3, + 3,3,3,2,3,1,3,2,0,2,2,2,3,0,0,1,1,1,2,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0, + 0,3,3,3,3,3,3,0,0,3,0,3,3,3,0,0,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,0,3,0,3, + 0,3,0,0,0,0,3,3,0,3,3,3,3,3,2,3,3,2,3,1,0,3,0,0,0,0,0,0,0,0,1,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3, + 3,3,3,3,3,1,3,3,0,3,3,3,0,3,1,3,3,0,3,2,1,2,0,0,0,0,1,0,0,0,0,0,0,0,0, + 0,3,3,3,3,1,2,0,0,3,0,3,3,3,0,0,3,3,3,3,3,3,0,3,0,3,3,3,3,0,3,0,0,0,0, + 0,0,0,0,0,0,3,0,0,0,3,3,1,0,0,3,3,0,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0, + 3,3,3,3,3,1,3,3,3,3,1,3,3,3,3,3,1,3,1,3,3,0,2,1,0,1,0,0,3,0,0,3,1,3,1, + 2,0,3,3,0,0,0,3,0,0,0,0,0,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3, + 3,3,3,3,3,3,3,2,1,1,3,2,1,1,0,2,2,0,2,0,0,3,0,0,0,0,3,0,2,1,0,0,1,0,0, + 3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,3,1,3,3,2,2,2,3,0,3,3, + 3,3,3,3,2,1,1,1,2,1,1,1,1,3,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,1,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,2,1,3,1,3,0,2,2,1,2,2,0,2,0,3,2,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0, + 0,3,0,3,0,0,0,0,0,0,0,1,1,0,0,0,3,3,0,3,0,2,0,3,0,3,3,3,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,2,2,3,3,1,3,3,3,3,3,1,3,3,1,1,3,2,2,2,1,0,3,3,2,2,1,0,2,2,3,1,3,1, + 2,2,2,3,1,3,0,3,0,0,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,3,3,3,3,3,1,0,0,3,0,3,3,3,0,0,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,0,3,0,3, + 0,3,0,0,0,0,3,3,0,2,3,3,1,1,1,3,2,1,2,0,0,2,0,0,0,0,0,2,0,1,1,0,0,0,0, + 3,3,3,3,3,3,0,0,3,3,3,3,3,3,0,3,3,3,3,2,3,3,0,3,3,3,3,3,3,3,3,3,1,3,3, + 0,3,3,0,0,3,2,2,3,0,2,2,0,1,0,1,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,3,3,3,3,3,3,3,0,3,0,3,3,3,3,0,3,3,3,3,3,3,1,3,3,3,3,3,2,2,3,3,2,0,3, + 0,2,0,0,0,0,3,3,3,3,2,3,3,3,0,2,1,0,1,1,0,1,2,3,1,1,1,1,2,0,0,0,0,0,0, + 0,3,3,3,3,0,1,0,0,3,0,3,3,2,0,0,3,3,3,3,3,3,0,3,0,3,3,3,3,0,2,0,2,0,0, + 0,0,0,0,0,0,3,3,0,3,0,1,1,1,0,3,3,0,1,0,0,2,0,1,0,0,0,3,0,0,1,0,0,0,0, + 0,3,1,0,3,0,1,3,0,3,0,2,0,0,3,0,0,0,0,3,0,1,0,3,0,0,0,3,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0, + 0,3,3,3,3,0,1,0,0,3,0,1,3,3,0,0,3,3,3,3,3,3,0,3,0,3,1,2,3,0,3,0,0,0,0, + 0,0,0,0,0,0,3,0,0,1,1,1,0,1,0,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,3,3,3,2,3,3,3,0,3,0,2,3,3,3,0,3,3,3,3,3,2,0,3,3,3,3,3,2,3,3,0,2,0,3, + 0,3,0,0,0,0,3,1,0,1,1,1,1,3,1,2,2,0,1,2,0,1,0,0,0,0,2,0,1,0,0,0,0,0,0, + 0,3,0,3,3,3,3,0,0,3,0,3,3,3,0,0,3,3,2,3,2,3,0,3,2,1,3,2,2,1,1,0,0,0,0, + 0,3,0,0,0,0,0,0,0,0,3,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, + 0,3,3,3,3,3,3,0,0,3,0,3,3,3,0,0,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,0,2,0,3, + 0,3,0,0,0,0,2,2,0,2,3,1,0,0,0,1,0,0,2,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0, + 0,2,1,3,0,1,3,3,0,3,0,1,1,0,3,0,2,0,1,3,0,1,0,2,0,0,0,2,0,0,0,0,0,0,1, + 0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,2,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,2,2,2,2,3,2,0,1,1,2,2,1,3,0,3,3, + 3,2,3,3,0,3,2,0,0,1,0,1,0,0,0,3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,3,0,2,0,0,0,3,0,2,0,0,1,0,3,2,1,3,1,0,3,0,0,0,0,0,2,3,0,0,2,0,0,0, + 0,0,3,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,2,3,2,2,3,0,0,3,2,3,3,0,2,0,1,2,3,3,1,2,1,3,1,3,2,2,1,0,3,2,3,1,0,0, + 0,2,0,0,0,0,0,3,0,0,1,1,3,0,3,0,1,0,1,2,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,2,0,3,3,3,3,1,2,0,3,1,1,2,1,3,2,2,1,2,1,1,1,2,2,3,3,3,3,2, + 0,2,1,0,1,0,0,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,1,3,3,3,3,3,3,2,3,3,2,2,3,3,2,2,0,3,3,2,2,1,3,3,2,2,0,3,1, + 3,3,3,0,0,3,0,1,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,0,1,1,2,2,3,3,2,1,3,3,1,3,3,3,0,0,3,0,0,0,1,0,0,1,3,0,0,0,0,0,0,0, + 3,0,3,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,2,3,1,0,2,2,3,3,2,2,0,3,3,2,3,2,2,3,3,1,3,3,2,3,2,1,3,3,2,1,2, + 0,1,2,0,0,0,1,3,0,1,1,2,1,1,1,0,0,0,1,2,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0, + 3,2,3,3,3,3,1,3,3,2,3,2,3,2,1,3,3,2,3,2,2,2,1,1,3,3,3,3,3,3,3,1,0,0,2, + 0,2,0,0,0,0,2,1,0,0,1,0,0,3,1,1,1,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,2,3,3,3,0,0,0,3,1,2,1,2,0,1,3,2,3,1,3,2,0,3,3,3,3,1,2,3,2,3,2,0,3, + 0,3,0,0,0,0,2,0,0,3,1,1,1,0,1,1,1,0,2,0,0,2,0,0,0,0,0,1,0,3,1,0,0,0,0, + 3,3,3,3,3,3,1,1,3,2,3,2,3,2,3,3,3,2,2,1,1,1,1,2,2,1,2,2,1,2,1,3,0,3,2, + 3,2,3,3,0,0,2,0,0,0,1,0,0,0,0,3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,2,3,3,1,3,3,2,3,2,3,2,2,3,2,2,1,2,0,1,2,1,1,0,1,0,0,0,0,2,0,2,2, + 1,0,2,2,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0, + 0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,1,3,3,3,0,1,0,0,0,2,0,1,0,0,0,0,3,0,0, + 0,0,0,0,0,0,0,1,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, + 2,3,3,0,1,0,1,3,3,0,2,0,0,0,2,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0, + 1,1,2,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 2,0,1,2,2,2,0,0,0,1,1,0,1,2,0,3,1,1,1,1,1,1,0,0,1,1,0,0,0,3,2,1,1,0,2, + 0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,1,0,1,2,2,0,0,0,2,0,0,1,1,0,0,0,0,2,0,1,0,0,0,1,1,1,0,0,2,2,0,0,0,0, + 0,2,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,1,1,1,0,0,1,1,3,1,3,2,3,2,2,3,0,0,0,0,0,0,1,3,0,0,0,0,3,0,0,0,0,3,0, + 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,3,1,1,1,0,0,0,0,3,0,1,0,0,0,1,0,1,0,0,0,0,1,1,3,0,0,1,0,1,0,1,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,3,0,0,0,0,0,0,1,0,0,1,0,0,0,3,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 2,1,0,1,0,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0, + 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0, + 3,1,1,0,1,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 2,1,0,0,3,1,2,0,0,1,0,2,0,2,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 2,1,1,0,1,1,0,3,1,0,0,0,0,1,0,1,0,0,1,0,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1, + 0,1,1,0,0,0,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,0,3,2,0,1,0,0,0,0,0,0,1,0,0,2,0,0,1,1,1,2,1,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,1,0,1,0,0,0,0,0,0,2,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, + 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 2,1,0,0,2,1,0,0,1,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0, + 0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, + 0,0,2,1,0,1,0,0,0,1,0,0,1,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, }; @@ -287,8 +276,8 @@ const SequenceModel Iso_8859_11ThaiModel = { Iso_8859_11_CharToOrderMap, ThaiLangModel, - 75, - (float)0.99900443604775, + 70, + (float)0.9990034859872011, PR_FALSE, "ISO-8859-11", "th" @@ -298,8 +287,8 @@ const SequenceModel Tis_620ThaiModel = { Tis_620_CharToOrderMap, ThaiLangModel, - 75, - (float)0.99900443604775, + 70, + (float)0.9990034859872011, PR_FALSE, "TIS-620", "th" @@ -309,8 +298,11 @@ const LanguageModel ThaiModel = { "th", Unicode_CharOrder, - 75, + 70, ThaiLangModel, - 75, - (float)1.0, + 70, + 2, + (float)0.0755324836590035, + 43, + (float)0.030663010422595123, }; diff --git a/src/LangModels/LangTurkishModel.cpp b/src/LangModels/LangTurkishModel.cpp index c1b16c1..c5568e3 100644 --- a/src/LangModels/LangTurkishModel.cpp +++ b/src/LangModels/LangTurkishModel.cpp @@ -42,7 +42,7 @@ /** * Generated by BuildLangModel.py - * On: 2021-03-16 20:34:51.083622 + * On: 2022-12-14 18:24:38.038544 **/ /* Character Mapping Table: @@ -68,18 +68,18 @@ static const unsigned char Iso_8859_3_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 15, 22, 9, 1, 27, 21, 19, 6, 28, 7, 5, 11, 3, 14, /* 4X */ - 23, 35, 4, 10, 8, 12, 18, 29, 33, 13, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 15, 22, 9, 1, 27, 21, 19, 2, 28, 7, 5, 11, 3, 14, /* 6X */ - 23, 35, 4, 10, 8, 12, 18, 29, 33, 13, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 0, 15, 24, 8, 1, 27, 19, 20, 6, 28, 7, 4, 11, 3, 14, /* 4X */ + 22, 34, 5, 10, 9, 12, 18, 29, 33, 13, 21,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 15, 24, 8, 1, 27, 19, 20, 2, 28, 7, 4, 11, 3, 14, /* 6X */ + 22, 34, 5, 10, 9, 12, 18, 29, 33, 13, 21,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM, 54,SYM,SYM,SYM,ILL, 55,SYM,SYM, 2, 17, 26, 56,SYM,ILL, 50, /* AX */ - SYM, 57,SYM,SYM,SYM,SYM, 58,SYM,SYM, 6, 17, 26, 59,SYM,ILL, 50, /* BX */ - 48, 36, 30,ILL, 39, 60, 61, 24, 40, 34, 62, 44, 63, 37, 31, 64, /* CX */ - ILL, 41, 52, 38, 46, 51, 25,SYM, 65, 66, 45, 32, 16, 67, 68, 69, /* DX */ - 48, 36, 30,ILL, 39, 70, 71, 24, 40, 34, 72, 44, 73, 37, 31, 74, /* EX */ - ILL, 41, 52, 38, 46, 51, 25,SYM, 75, 76, 45, 32, 16, 77, 78,SYM, /* FX */ + SYM, 55,SYM,SYM,SYM,ILL, 56,SYM,SYM, 2, 17, 25, 57,SYM,ILL, 48, /* AX */ + SYM, 58,SYM,SYM,SYM,SYM, 59,SYM,SYM, 6, 17, 25, 60,SYM,ILL, 48, /* BX */ + 42, 36, 30,ILL, 43, 61, 62, 23, 44, 35, 63, 51, 54, 38, 31, 50, /* CX */ + ILL, 39, 45, 37, 49, 52, 26,SYM, 64, 65, 40, 32, 16, 53, 66, 67, /* DX */ + 42, 36, 30,ILL, 43, 68, 69, 23, 44, 35, 70, 51, 54, 38, 31, 50, /* EX */ + ILL, 39, 45, 37, 49, 52, 26,SYM, 71, 72, 40, 32, 16, 53, 73,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -89,78 +89,79 @@ static const unsigned char Iso_8859_9_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 15, 22, 9, 1, 27, 21, 19, 6, 28, 7, 5, 11, 3, 14, /* 4X */ - 23, 35, 4, 10, 8, 12, 18, 29, 33, 13, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 15, 22, 9, 1, 27, 21, 19, 2, 28, 7, 5, 11, 3, 14, /* 6X */ - 23, 35, 4, 10, 8, 12, 18, 29, 33, 13, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 0, 15, 24, 8, 1, 27, 19, 20, 6, 28, 7, 4, 11, 3, 14, /* 4X */ + 22, 34, 5, 10, 9, 12, 18, 29, 33, 13, 21,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 15, 24, 8, 1, 27, 19, 20, 2, 28, 7, 4, 11, 3, 14, /* 6X */ + 22, 34, 5, 10, 9, 12, 18, 29, 33, 13, 21,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 79,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 48, 36, 30, 47, 39, 42, 49, 24, 40, 34, 80, 44, 81, 37, 31, 82, /* CX */ - 26, 41, 52, 38, 46, 83, 25,SYM, 43, 84, 45, 32, 16, 2, 17, 85, /* DX */ - 48, 36, 30, 47, 39, 42, 49, 24, 40, 34, 86, 44, 87, 37, 31, 88, /* EX */ - 26, 41, 52, 38, 46, 89, 25,SYM, 43, 90, 45, 32, 16, 6, 17, 53, /* FX */ + SYM,SYM,SYM,SYM,SYM, 74,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 42, 36, 30, 46, 43, 75, 41, 23, 44, 35, 76, 51, 54, 38, 31, 50, /* CX */ + 25, 39, 45, 37, 49, 77, 26,SYM, 47, 78, 40, 32, 16, 2, 17, 79, /* DX */ + 42, 36, 30, 46, 43, 80, 41, 23, 44, 35, 81, 51, 54, 38, 31, 50, /* EX */ + 25, 39, 45, 37, 49, 82, 26,SYM, 47, 83, 40, 32, 16, 6, 17, 84, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ static const int Unicode_Char_size = 66; static const unsigned int Unicode_CharOrder[] = { - 65, 0, 66, 15, 67, 22, 68, 9, 69, 1, 70, 27, 71, 21, 72, 19, - 73, 2, 73, 6, 74, 28, 75, 7, 76, 5, 77, 11, 78, 3, 79, 14, - 80, 23, 82, 4, 83, 10, 84, 8, 85, 12, 86, 18, 87, 29, 89, 13, - 90, 20, 97, 0, 98, 15, 99, 22, 100, 9, 101, 1, 102, 27,103, 21, - 104, 19, 105, 2, 106, 28, 107, 7, 108, 5, 109, 11, 110, 3,111, 14, - 112, 23, 114, 4, 115, 10, 116, 8, 117, 12, 118, 18, 119, 29,121, 13, - 122, 20, 194, 30, 199, 24, 206, 31, 214, 25, 219, 32, 220, 16,226, 30, - 231, 24, 238, 31, 246, 25, 251, 32, 252, 16, 286, 26, 287, 26,305, 6, + 65, 0, 66, 15, 67, 24, 68, 8, 69, 1, 70, 27, 71, 19, 72, 20, + 73, 2, 73, 6, 74, 28, 75, 7, 76, 4, 77, 11, 78, 3, 79, 14, + 80, 22, 82, 5, 83, 10, 84, 9, 85, 12, 86, 18, 87, 29, 89, 13, + 90, 21, 97, 0, 98, 15, 99, 24, 100, 8, 101, 1, 102, 27,103, 19, + 104, 20, 105, 2, 106, 28, 107, 7, 108, 4, 109, 11, 110, 3,111, 14, + 112, 22, 114, 5, 115, 10, 116, 9, 117, 12, 118, 18, 119, 29,121, 13, + 122, 21, 194, 30, 199, 23, 206, 31, 214, 26, 219, 32, 220, 16,226, 30, + 231, 23, 238, 31, 246, 26, 251, 32, 252, 16, 286, 25, 287, 25,305, 6, 350, 17, 351, 17, }; /* Model Table: - * Total sequences: 1097 - * First 512 sequences: 0.9923593121944019 - * Next 512 sequences (512-1024): 0.007545326169453709 - * Rest: 9.536163614441446e-05 + * Total considered sequences: 1109 / 1089 + * - Positive sequences: first 553 (0.995033943100518) + * - Probable sequences: next 194 (747-553) (0.0039670697911219355) + * - Neutral sequences: last 342 (0.0009989871083601054) + * - Negative sequences: -20 (off-ratio) * Negative sequences: TODO */ static const PRUint8 TurkishLangModel[] = { - 3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,3,3,2,2,0,0, - 3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,0,3,3,3,3,0,0,0, - 3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,2,3,3,2,2,2,2,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,0,3,2,2,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,3,3,2,2,3,1, - 3,3,3,2,2,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,2,2,3,2,2,3,3,2, - 2,2,0,3,3,3,3,3,3,3,3,3,2,3,2,3,0,3,2,2,3,2,3,3,2,2,3,3,2,2,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,2,2,2,2,3,3,0,2,2,2,3,2,3, - 3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,0,3,3,2,2,2,2,3,3,0,2,2,2,2,2,0, - 3,3,3,2,3,3,3,2,2,3,3,2,3,3,3,2,3,0,2,2,2,2,2,2,0,3,0,2,2,2,2,2,0, - 3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,0,3,3,2,2,3,3,3,3,0,2,1,2,2,2,2, - 3,3,3,2,3,3,3,2,2,3,3,3,3,3,3,3,3,2,2,3,3,2,3,3,2,3,0,2,2,2,2,3,2, - 3,3,3,3,3,3,0,3,3,3,3,3,2,3,2,3,0,3,3,3,3,3,3,3,3,0,3,3,2,0,0,0,0, - 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,2,2,3,3,2,2,3,0,2,2,2,2,0,2, - 2,2,2,3,3,3,0,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,2,0,3,3,3,3,0,1,0, - 3,3,3,2,3,3,3,2,2,3,2,3,3,3,3,3,3,2,2,2,2,2,2,0,0,3,0,2,2,2,2,2,2, - 2,2,2,3,3,3,0,3,3,3,3,3,0,3,1,3,0,3,3,2,3,2,3,3,3,2,3,3,2,0,0,0,0, - 3,3,3,2,2,3,3,3,3,2,2,3,3,2,2,3,3,1,2,2,0,3,2,2,3,2,0,2,0,0,2,2,1, - 3,3,3,2,3,3,3,2,2,3,2,2,3,3,3,2,2,2,3,2,2,2,3,0,2,0,0,2,2,0,2,2,0, - 3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,2,3,2,2,2,2,0,2,2,3,2,0,2,2,2,3,2,0, - 3,3,3,2,2,3,3,2,2,3,2,3,3,3,3,2,3,0,2,2,2,3,3,2,1,2,0,1,0,2,2,2,1, - 3,3,3,2,3,3,3,2,2,2,2,2,3,2,3,2,3,0,2,3,2,2,1,2,0,3,0,0,0,2,2,0,0, - 3,3,3,2,3,3,3,3,2,2,2,2,3,2,3,3,3,0,0,3,2,2,2,1,0,2,0,2,0,0,2,0,0, - 3,3,3,2,3,3,3,2,3,2,3,3,3,2,3,2,2,1,0,3,2,2,1,2,3,0,0,2,0,0,2,0,0, - 3,3,3,0,2,3,3,2,3,0,2,3,3,2,3,2,3,0,0,2,0,2,2,0,2,2,0,0,0,0,0,0,0, - 0,0,0,3,3,3,0,3,3,3,3,3,0,3,0,2,0,2,2,2,3,2,2,2,2,0,3,2,0,2,0,0,0, - 3,3,3,2,3,3,3,0,1,3,2,3,3,0,2,2,3,0,2,0,2,2,2,0,0,0,0,1,0,0,0,0,0, - 3,3,3,2,3,3,3,3,3,0,2,2,3,3,3,2,2,1,0,2,2,3,2,0,2,2,0,2,1,0,2,2,0, - 3,3,3,2,2,2,2,2,0,2,2,0,3,2,3,0,2,0,0,0,0,2,2,2,0,2,0,1,0,0,0,0,0, - 3,3,3,2,2,2,0,2,0,2,2,2,2,2,2,0,0,2,0,2,0,0,1,2,0,0,0,2,0,2,0,0,0, - 2,2,2,3,2,3,0,2,2,2,2,2,0,2,0,2,0,2,2,2,2,0,0,0,0,0,2,2,0,0,0,2,0, - 0,0,0,2,2,2,0,2,1,2,2,2,0,2,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0, - 2,0,0,2,2,2,0,2,2,2,2,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,1,3,3,3,0,1,0, + 3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,0,3,3,3,0,0,1, + 3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,1,3,3,1,1,3,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,0,2,3,2,2,2,3,1, + 3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,2,3,3,0,1,3,1,1,3,3,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,3,3,2,2,3,2, + 2,0,0,3,3,3,3,3,3,3,3,3,2,3,2,3,0,3,2,1,2,3,3,3,3,3,0,3,0,1,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,3,2,1,3,0,0,3,2,1,1,3,1,3, + 3,3,3,2,3,3,3,2,3,2,3,2,3,3,3,2,3,0,2,2,2,2,1,1,3,1,3,1,2,2,2,3,1, + 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,1,3,2,3,3,2,3,2,0,3,3,0,2,2,1,1, + 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,0,3,2,3,2,3,3,3,0,3,2,0,2,2,2,2, + 3,3,3,2,3,3,3,3,3,2,3,3,3,3,3,3,3,2,1,2,3,3,3,1,3,0,3,2,1,2,3,3,2, + 3,3,3,3,3,3,0,3,3,3,3,3,2,3,2,3,0,3,3,3,3,3,3,3,3,3,0,3,2,1,0,0,0, + 3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,2,3,3,2,2,2,1,3,0,3,2,0,2,2,1,1, + 3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,0,3,3,3,0,1,0, + 3,3,3,3,3,3,3,2,3,2,3,2,3,3,3,3,3,1,1,0,2,2,0,2,2,1,3,1,1,1,2,2,2, + 0,2,1,3,3,3,0,3,3,3,3,3,0,3,1,3,0,3,3,1,2,3,3,3,3,3,0,3,1,0,0,0,0, + 3,3,3,2,3,2,3,3,1,3,2,3,3,2,2,3,3,1,3,3,2,1,1,3,1,0,2,2,0,0,1,1,1, + 3,3,3,1,3,3,3,2,3,1,3,1,3,3,3,2,2,2,3,2,2,2,0,0,3,0,1,1,0,0,2,2,1, + 3,3,3,3,3,3,3,1,2,2,3,2,3,2,3,1,3,1,0,2,3,1,1,1,1,0,3,1,1,1,2,0,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,3,0,1,2,2,3,2,0,2,1,1,2,3,2,1, + 3,3,3,2,3,2,3,2,3,2,1,3,3,3,3,3,3,0,2,3,3,3,0,0,3,0,3,0,1,1,2,2,1, + 3,3,3,2,3,3,3,2,0,3,3,3,3,2,3,1,3,0,0,1,3,1,3,3,1,0,1,0,0,1,0,0,0, + 3,3,3,0,3,2,3,2,1,3,2,3,3,3,3,2,3,0,0,1,2,0,0,1,1,0,3,0,0,0,0,0,0, + 3,3,3,2,3,3,3,3,2,3,2,3,3,2,3,2,3,0,0,2,3,1,1,0,3,1,0,1,0,1,2,1,1, + 3,3,3,1,3,3,3,0,3,0,1,3,3,0,3,1,3,0,1,0,0,2,0,0,1,0,0,0,0,0,0,0,0, + 0,1,1,3,3,3,0,3,3,3,3,3,0,3,0,2,0,2,2,1,1,3,3,3,2,3,0,1,0,0,0,0,0, + 3,3,3,1,3,3,3,2,1,3,2,1,3,3,3,1,2,2,0,2,1,0,0,1,2,0,1,3,0,0,2,2,1, + 3,3,3,1,2,1,2,1,2,0,1,1,3,1,3,1,2,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0, + 3,3,3,2,2,2,0,1,1,1,1,1,2,1,3,1,1,0,0,0,2,0,1,0,1,0,0,1,1,1,0,0,0, + 1,1,2,3,3,3,0,2,2,3,2,3,0,2,0,2,0,2,1,1,2,2,0,0,1,1,0,2,0,0,0,1,0, + 0,0,1,2,3,1,0,1,1,1,1,1,0,2,0,1,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0, + 0,0,0,1,1,1,0,1,1,0,2,3,0,0,0,1,0,1,0,0,1,0,1,0,1,0,0,2,0,0,0,0,0, }; @@ -169,7 +170,7 @@ const SequenceModel Iso_8859_3TurkishModel = Iso_8859_3_CharToOrderMap, TurkishLangModel, 33, - (float)0.9923593121944019, + (float)0.9990010128916399, PR_FALSE, "ISO-8859-3", "tr" @@ -180,7 +181,7 @@ const SequenceModel Iso_8859_9TurkishModel = Iso_8859_9_CharToOrderMap, TurkishLangModel, 33, - (float)0.9923593121944019, + (float)0.9990010128916399, PR_FALSE, "ISO-8859-9", "tr" @@ -193,5 +194,8 @@ const LanguageModel TurkishModel = 66, TurkishLangModel, 33, - (float)0.9923593121944019, + 4, + (float)0.37622064189380516, + 23, + (float)0.0331922903705142, }; diff --git a/src/LangModels/LangVietnameseModel.cpp b/src/LangModels/LangVietnameseModel.cpp index a66aecf..b3c46fd 100644 --- a/src/LangModels/LangVietnameseModel.cpp +++ b/src/LangModels/LangVietnameseModel.cpp @@ -42,7 +42,7 @@ /** * Generated by BuildLangModel.py - * On: 2021-03-21 15:20:14.350353 + * On: 2022-12-14 18:45:49.354853 **/ /* Character Mapping Table: @@ -68,272 +68,270 @@ static const unsigned char Windows_1258_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 6, 18, 3, 21, 24, 71, 5, 1, 4, 78, 22, 14, 8, 0, 9, /* 4X */ - 16, 32, 13, 19, 2, 7, 12, 74, 53, 20, 83,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 6, 18, 3, 21, 24, 71, 5, 1, 4, 78, 22, 14, 8, 0, 9, /* 6X */ - 16, 32, 13, 19, 2, 7, 12, 74, 53, 20, 83,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM,ILL,SYM,105,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 97,ILL,ILL,ILL, /* 8X */ - ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 97,ILL,ILL,104, /* 9X */ + SYM, 6, 20, 3, 23, 25, 72, 5, 1, 4, 81, 21, 14, 9, 0, 10, /* 4X */ + 17, 31, 12, 18, 2, 7, 13, 73, 51, 19, 84,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 6, 20, 3, 23, 25, 72, 5, 1, 4, 81, 21, 14, 9, 0, 10, /* 6X */ + 17, 31, 12, 18, 2, 7, 13, 73, 51, 19, 84,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM,ILL,SYM,107,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,101,ILL,ILL,ILL, /* 8X */ + ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,101,ILL,ILL,104, /* 9X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM,107,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 11, 15, 27, 44,101,106, 95, 92, 90, 73, 28,108,SYM, 39,103,102, /* CX */ - 10,100,SYM, 35, 29, 47, 98,SYM, 96, 62, 61,109, 93, 17,SYM, 99, /* DX */ - 11, 15, 27, 44,101,106, 95, 92, 90, 73, 28,110,SYM, 39,103,102, /* EX */ - 10,100,SYM, 35, 29, 47, 98,SYM, 96, 62, 61,111, 93, 17,112,104, /* FX */ + SYM,SYM,SYM,SYM,SYM,108,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 11, 15, 32, 39, 97,106, 95, 96, 85, 74, 30,105,SYM, 43, 99,102, /* CX */ + 8,103,SYM, 36, 29, 44, 94,SYM, 98, 62, 61,109, 92, 16,SYM,100, /* DX */ + 11, 15, 32, 39, 97,106, 95, 96, 85, 74, 30,105,SYM, 43, 99,102, /* EX */ + 8,103,SYM, 36, 29, 44, 94,SYM, 98, 62, 61,110, 92, 16,111,104, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ static const unsigned char Viscii_CharToOrderMap[] = { - CTR,CTR, 86,CTR,CTR, 91, 77,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ - CTR,CTR,CTR,CTR, 82,CTR,CTR,CTR,CTR, 84,CTR,CTR,CTR,CTR, 94,CTR, /* 1X */ + CTR,CTR, 86,CTR,CTR, 91, 78,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ + CTR,CTR,CTR,CTR, 79,CTR,CTR,CTR,CTR, 80,CTR,CTR,CTR,CTR, 93,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 6, 18, 3, 21, 24, 71, 5, 1, 4, 78, 22, 14, 8, 0, 9, /* 4X */ - 16, 32, 13, 19, 2, 7, 12, 74, 53, 20, 83,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 6, 18, 3, 21, 24, 71, 5, 1, 4, 78, 22, 14, 8, 0, 9, /* 6X */ - 16, 32, 13, 19, 2, 7, 12, 74, 53, 20, 83,SYM,SYM,SYM,SYM,CTR, /* 7X */ - 31, 60, 68, 64, 42, 51, 76, 46, 80, 89, 23, 38, 43, 72, 26, 30, /* 8X */ - 49, 59, 81, 25, 41, 37, 40, 57, 45, 75, 54, 70, 36, 69, 50, 79, /* 9X */ - 85, 60, 68, 64, 42, 51, 76, 46, 80, 89, 23, 38, 43, 72, 26, 30, /* AX */ - 49, 59, 81, 87, 47, 25, 40, 57, 45, 48, 55, 58, 65, 47, 37, 17, /* BX */ - 11, 15, 27, 56, 33, 44, 86, 91, 90, 73, 28, 88, 52, 39, 67, 79, /* CX */ - 10, 55, 63, 35, 29, 31, 82, 58, 65, 62, 61, 84, 94, 66, 87, 17, /* DX */ - 11, 15, 27, 56, 33, 44, 34, 77, 90, 73, 28, 88, 52, 39, 67, 70, /* EX */ - 10, 48, 63, 35, 29, 85, 75, 54, 50, 62, 61, 69, 36, 66, 41, 34, /* FX */ + SYM, 6, 20, 3, 23, 25, 72, 5, 1, 4, 81, 21, 14, 9, 0, 10, /* 4X */ + 17, 31, 12, 18, 2, 7, 13, 73, 51, 19, 84,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 6, 20, 3, 23, 25, 72, 5, 1, 4, 81, 21, 14, 9, 0, 10, /* 6X */ + 17, 31, 12, 18, 2, 7, 13, 73, 51, 19, 84,SYM,SYM,SYM,SYM,CTR, /* 7X */ + 26, 60, 69, 66, 38, 50, 75, 45, 83, 88, 22, 37, 46, 65, 28, 27, /* 8X */ + 56, 58, 82, 24, 41, 35, 40, 54, 42, 77, 53, 71, 34, 67, 57, 76, /* 9X */ + 87, 60, 69, 66, 38, 50, 75, 45, 83, 88, 22, 37, 46, 65, 28, 27, /* AX */ + 56, 58, 82, 90, 44, 24, 40, 54, 42, 47, 52, 59, 64, 44, 35, 16, /* BX */ + 11, 15, 32, 55, 33, 39, 86, 91, 85, 74, 30, 89, 49, 43, 68, 76, /* CX */ + 8, 52, 63, 36, 29, 26, 79, 59, 64, 62, 61, 80, 93, 70, 90, 16, /* DX */ + 11, 15, 32, 55, 33, 39, 48, 78, 85, 74, 30, 89, 49, 43, 68, 71, /* EX */ + 8, 47, 63, 36, 29, 87, 77, 53, 57, 62, 61, 67, 34, 70, 41, 48, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const int Unicode_Char_size = 190; +static const int Unicode_Char_size = 188; static const unsigned int Unicode_CharOrder[] = { - 65, 6, 66, 18, 67, 3, 68, 21, 69, 24, 70, 71, 71, 5, 72, 1, - 73, 4, 74, 78, 75, 22, 76, 14, 77, 8, 78, 0, 79, 9, 80, 16, - 81, 32, 82, 13, 83, 19, 84, 2, 85, 7, 86, 12, 87, 74, 88, 53, - 89, 20, 90, 83, 97, 6, 98, 18, 99, 3, 100, 21, 101, 24, 102, 71, - 103, 5, 104, 1, 105, 4, 106, 78, 107, 22, 108, 14, 109, 8, 110, 0, - 111, 9, 112, 16, 113, 32, 114, 13, 115, 19, 116, 2, 117, 7, 118, 12, - 119, 74, 120, 53, 121, 20, 122, 83, 192, 11, 193, 15, 194, 27, 195, 56, - 199, 92, 200, 90, 201, 73, 202, 28, 204, 52, 205, 39, 210, 63, 211, 35, - 212, 29, 213, 85, 217, 62, 218, 61, 220, 93, 221, 66, 224, 11, 225, 15, - 226, 27, 227, 56, 231, 92, 232, 90, 233, 73, 234, 28, 236, 52, 237, 39, - 242, 63, 243, 35, 244, 29, 245, 85, 249, 62, 250, 61, 252, 93, 253, 66, - 258, 44, 259, 44, 272, 10, 273, 10, 296, 67, 297, 67, 360, 69, 361, 69, - 416, 47, 417, 47, 431, 17, 432, 17, 7840, 31, 7841, 31, 7842, 33,7843, 33, - 7844, 42, 7845, 42, 7846, 51, 7847, 51, 7848, 76, 7849, 76, 7850, 77,7851, 77, - 7852, 46, 7853, 46, 7854, 60, 7855, 60, 7856, 68, 7857, 68, 7858, 86,7859, 86, - 7860, 91, 7861, 91, 7862, 64, 7863, 64, 7864, 89, 7865, 89, 7866, 88,7867, 88, - 7868, 80, 7869, 80, 7870, 23, 7871, 23, 7872, 38, 7873, 38, 7874, 43,7875, 43, - 7876, 72, 7877, 72, 7878, 26, 7879, 26, 7880, 70, 7881, 70, 7882, 45,7883, 45, - 7884, 54, 7885, 54, 7886, 75, 7887, 75, 7888, 30, 7889, 30, 7890, 49,7891, 49, - 7892, 59, 7893, 59, 7894, 81, 7895, 81, 7896, 25, 7897, 25, 7898, 37,7899, 37, - 7900, 40, 7901, 40, 7902, 57, 7903, 57, 7904, 87, 7905, 87, 7906, 41,7907, 41, - 7908, 50, 7909, 50, 7910, 36, 7911, 36, 7912, 55, 7913, 55, 7914, 58,7915, 58, - 7916, 65, 7917, 65, 7918, 34, 7919, 34, 7920, 48, 7921, 48, 7922, 79,7923, 79, - 7924, 94, 7925, 94, 7926, 82, 7927, 82, 7928, 84, 7929, 84, + 65, 6, 66, 20, 67, 3, 68, 23, 69, 25, 70, 72, 71, 5, 72, 1, + 73, 4, 74, 81, 75, 21, 76, 14, 77, 9, 78, 0, 79, 10, 80, 17, + 81, 31, 82, 12, 83, 18, 84, 2, 85, 7, 86, 13, 87, 73, 88, 51, + 89, 19, 90, 84, 97, 6, 98, 20, 99, 3, 100, 23, 101, 25, 102, 72, + 103, 5, 104, 1, 105, 4, 106, 81, 107, 21, 108, 14, 109, 9, 110, 0, + 111, 10, 112, 17, 113, 31, 114, 12, 115, 18, 116, 2, 117, 7, 118, 13, + 119, 73, 120, 51, 121, 19, 122, 84, 192, 11, 193, 15, 194, 32, 195, 55, + 200, 85, 201, 74, 202, 30, 204, 49, 205, 43, 210, 63, 211, 36, 212, 29, + 213, 87, 217, 62, 218, 61, 220, 92, 221, 70, 224, 11, 225, 15, 226, 32, + 227, 55, 232, 85, 233, 74, 234, 30, 236, 49, 237, 43, 242, 63, 243, 36, + 244, 29, 245, 87, 249, 62, 250, 61, 252, 92, 253, 70, 258, 39, 259, 39, + 272, 8, 273, 8, 296, 68, 297, 68, 360, 67, 361, 67, 416, 44, 417, 44, + 431, 16, 432, 16, 7840, 26, 7841, 26, 7842, 33, 7843, 33, 7844, 38,7845, 38, + 7846, 50, 7847, 50, 7848, 75, 7849, 75, 7850, 78, 7851, 78, 7852, 45,7853, 45, + 7854, 60, 7855, 60, 7856, 69, 7857, 69, 7858, 86, 7859, 86, 7860, 91,7861, 91, + 7862, 66, 7863, 66, 7864, 88, 7865, 88, 7866, 89, 7867, 89, 7868, 83,7869, 83, + 7870, 22, 7871, 22, 7872, 37, 7873, 37, 7874, 46, 7875, 46, 7876, 65,7877, 65, + 7878, 28, 7879, 28, 7880, 71, 7881, 71, 7882, 42, 7883, 42, 7884, 53,7885, 53, + 7886, 77, 7887, 77, 7888, 27, 7889, 27, 7890, 56, 7891, 56, 7892, 58,7893, 58, + 7894, 82, 7895, 82, 7896, 24, 7897, 24, 7898, 35, 7899, 35, 7900, 40,7901, 40, + 7902, 54, 7903, 54, 7904, 90, 7905, 90, 7906, 41, 7907, 41, 7908, 57,7909, 57, + 7910, 34, 7911, 34, 7912, 52, 7913, 52, 7914, 59, 7915, 59, 7916, 64,7917, 64, + 7918, 48, 7919, 48, 7920, 47, 7921, 47, 7922, 76, 7923, 76, 7924, 93,7925, 93, + 7926, 79, 7927, 79, 7928, 80, 7929, 80, }; /* Model Table: - * Total considered sequences: 1892 / 9025 - * - Positive sequences: first 1119 (0.9950141222722985) - * - Probable sequences: next 364 (1483-1119) (0.003989870519062855) - * - Neutral sequences: last 7542 (0.0009960072086386829) - * - Negative sequences: 7133 (off-ratio) + * Total considered sequences: 1993 / 8836 + * - Positive sequences: first 1119 (0.9950155124227584) + * - Probable sequences: next 340 (1459-1119) (0.003985367896549574) + * - Neutral sequences: last 7377 (0.0009991196806919955) + * - Negative sequences: 6843 (off-ratio) * Negative sequences: TODO */ static const PRUint8 VietnameseLangModel[] = { - 3,3,3,3,3,3,3,3,2,3,2,3,2,3,3,3,1,3,2,3,3,3,3,3,3,3,1,3,3,3,3,3,3,1,3,3,0,2,3,2,0,2,2,0,3,1,2, - 3,0,2,0,2,0,3,2,2,2,1,0,3,3,3,2,2,3,3,0,0,3,0,0,3,0,3,2,0,0,0,3,0,0,3,0,3,0,0,0,1,0,0,2,3,3,1,0, - 3,1,3,2,3,0,3,3,3,3,1,3,1,3,3,3,2,3,1,2,3,3,1,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,3,3,3, - 3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,1,3,2,0,3,1,3,3,2,0,0,3,3,2,2,0,1,3,2,2,3,3,2,0,0,0, - 3,3,3,3,3,1,3,3,3,3,0,3,3,3,3,3,2,3,1,3,3,3,2,3,3,3,3,3,3,3,3,3,0,3,0,3,2,3,2,3,3,0,3,0,3,3,3, - 1,3,3,3,3,3,1,2,3,2,0,3,3,3,3,3,3,2,3,1,3,0,0,3,2,1,3,3,3,2,0,0,1,0,0,3,2,0,1,0,0,1,0,2,0,0,1,1, - 3,3,3,3,3,0,3,3,2,3,3,3,2,3,3,3,3,3,2,3,3,3,3,0,3,3,0,3,0,3,3,3,2,3,1,3,3,1,0,2,3,1,3,0,3,0,3, - 3,3,0,3,3,0,1,1,3,3,1,2,3,3,3,3,3,3,3,0,0,1,3,0,0,0,1,3,2,3,0,1,0,0,2,0,1,0,1,1,2,0,0,1,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,3,3,2,3,3,3,3,0,3,2,3,3,3,1,3,3,3,3,0,3,3,0,3,0,3,3,1,0,2, - 1,0,0,1,1,0,3,3,0,2,0,0,0,0,3,1,0,3,0,0,0,0,1,0,3,3,2,0,2,0,2,2,0,1,1,0,3,0,0,0,1,1,1,2,0,1,0,0, - 3,3,2,1,3,2,3,3,2,3,1,3,2,3,3,3,2,3,1,3,3,2,2,2,3,3,0,3,1,3,3,3,0,2,3,3,2,1,0,0,3,3,2,0,3,2,2, - 2,3,3,3,3,3,1,3,0,3,1,3,1,3,0,0,3,3,3,0,0,1,3,0,1,0,2,1,1,1,1,0,0,0,2,0,0,0,3,0,2,0,0,2,0,0,1,0, - 3,3,3,3,3,3,2,3,3,3,2,0,3,3,3,0,3,0,3,3,3,3,3,0,3,0,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0, - 0,0,0,0,0,0,3,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,3,0,0,3,0,0,0,3,0,0,0,0,2,0,0,0,0,0,0,0,0,3,1,0, - 3,2,3,3,3,3,3,2,3,3,0,2,2,3,3,3,3,0,3,3,3,3,3,3,3,3,2,3,3,3,3,3,1,3,0,0,0,1,1,2,2,0,3,0,1,0,3, - 2,0,3,0,3,0,2,1,0,2,2,0,3,0,0,0,0,2,0,3,0,0,0,0,2,0,2,1,0,3,3,2,3,1,3,3,3,2,0,0,0,1,0,1,0,1,0,2, - 3,1,2,3,3,1,3,3,3,3,0,3,1,3,3,3,3,3,3,3,2,2,1,2,3,3,3,3,2,3,3,3,0,3,0,3,1,3,3,1,3,0,3,0,2,2,3, - 2,3,3,3,3,3,2,3,3,3,3,3,0,3,2,3,2,3,0,0,3,1,3,1,2,0,3,1,2,2,3,1,1,2,3,0,0,3,1,0,0,1,3,1,0,0,0,1, - 3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,0,3,3,2,3,3,0,3,0,0,0,0,0,0,3,2,3,0,0,0,0,0,0,0,0,0,0,3,0,0, - 0,0,0,0,0,0,3,0,0,3,0,0,0,3,0,0,0,3,0,0,0,1,0,0,3,0,1,3,0,0,0,2,0,0,0,0,3,0,0,0,0,2,0,2,1,0,0,0, - 0,3,1,0,3,0,3,3,0,3,0,3,0,2,0,3,0,3,1,0,0,0,1,3,3,3,3,3,3,3,3,3,0,3,0,3,3,2,3,3,3,3,3,3,3,3,3, - 3,3,3,2,3,3,0,3,3,3,0,2,3,3,3,2,3,3,0,0,2,2,1,3,0,0,1,0,3,3,1,0,0,0,3,0,0,0,0,3,3,2,3,2,0,0,0,0, - 3,0,0,0,3,0,0,3,3,3,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,2,1,0,3,1,3,3,0,3,0,3,0,2,1,3,1,3,0,2,2,2,2,2,3,1,3,3,2,3,3,3,0,2,3,1,0,3,3,3,1,3,3,0,3,3,3, - 1,3,0,3,3,3,0,3,1,2,3,3,0,3,0,3,3,1,0,0,3,0,3,1,0,0,2,0,2,1,3,1,0,3,2,0,0,2,3,0,2,3,3,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,2,2,3,3,3,3,2,3,3,2,2,0,0,3,3,3,3,1,3,3,3, - 2,3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,1,3,0,0,3,0,1,2,2,3,2,1,1,0,1,0,2,2,0,1,0,3,0,0,3,0,1,0,1,1,0, - 2,3,3,3,3,3,3,3,3,3,0,3,3,1,3,3,3,3,3,3,3,3,2,0,3,3,3,3,3,3,3,3,1,0,3,2,0,3,1,3,3,3,3,0,3,3,3, - 1,3,1,3,3,1,2,3,0,3,0,2,0,3,3,2,3,3,3,3,3,0,2,0,3,3,3,1,2,0,3,1,0,3,3,0,0,0,1,0,0,2,1,1,0,0,1,2, - 3,0,3,3,3,1,0,3,3,3,0,0,1,2,2,0,3,0,2,1,3,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,3,3,1,3,2,3,3,2,3,0,0,0,3,3,2,3,1,1,3,2,2,0,0,3,0,0,1,1,2,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0, - 0,0,1,0,0,0,1,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,2,0,2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0, - 3,1,3,1,2,0,3,3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,3,3,0,0,0,0,0, - 3,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0, - 3,2,1,3,3,0,3,3,2,3,0,3,1,3,3,3,0,3,3,3,3,2,1,3,3,3,3,3,3,2,3,3,0,3,2,3,0,3,3,3,3,0,3,1,3,3,3, - 2,1,3,2,3,3,1,3,3,3,3,1,3,3,3,3,2,0,2,0,0,3,0,3,1,0,3,1,3,2,0,2,0,1,2,0,0,0,1,0,1,0,1,2,0,0,1,0, - 2,3,3,3,3,2,3,3,3,3,0,3,1,2,3,3,3,3,3,3,3,2,3,0,3,1,0,3,1,3,3,2,2,3,1,3,2,3,0,1,1,3,3,0,1,0,1, - 3,3,2,2,1,1,0,0,3,2,3,0,2,3,3,3,0,1,3,0,3,0,0,0,2,0,3,2,0,0,0,2,0,3,0,0,2,2,0,0,0,2,0,1,3,0,0,0, - 3,0,2,3,2,2,3,3,3,3,0,0,0,3,3,0,3,0,3,3,2,2,2,3,3,0,3,0,3,0,0,0,0,0,0,1,0,0,3,0,0,0,0,3,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0, - 3,2,1,3,3,3,3,3,2,3,0,3,1,3,3,2,2,3,2,3,2,2,1,0,3,2,2,3,1,0,2,3,0,2,3,2,0,0,2,3,2,0,3,1,0,3,3, - 1,3,2,3,3,2,0,3,3,3,2,2,0,2,0,3,3,3,0,0,3,1,2,0,2,3,2,3,0,0,3,2,0,0,1,0,3,0,2,2,1,2,1,2,0,0,0,0, - 3,3,2,0,3,0,3,3,3,3,0,0,3,3,2,2,0,0,1,3,2,1,1,3,3,0,1,0,3,3,0,1,0,0,0,1,2,0,0,3,0,0,0,3,1,3,0, - 2,0,0,1,1,3,0,0,1,0,0,0,0,0,0,0,0,0,0,3,2,0,0,2,0,0,3,2,0,0,0,0,3,2,0,3,1,3,0,0,0,3,2,3,0,0,0,1, - 3,0,3,3,0,0,0,3,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,0,0,3,3,3,0,3,0,3,3,3,3,3,0,3,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,3,0,0,0,3,0,0,0,0,3,0,0,0,0,0,0,0,0,1,0,0, - 3,0,3,3,3,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,3,3,0,0,0,3,3,0,0,0,0,0,0,0,3,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,2,2,0,0,0,3,3,0,0,0,0,0,1,0,1,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,2,2,1,2,0,3,3,0,2,0,0,3,0,0,2,0,0,3,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,2,2,3,1,0,0,3,0,0,0,0,2,0,0,2,0,1,0,0,1,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,3,3,3,0,0,0,2,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,3,3,3,0,0,0,3,3,0,0,0,0,0,0,3,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,1,0,0,1,0,1,3,0,2,2,0,0,2,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,0,0,3,0,0,0,3,3,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,0,0,0,0,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,3,3,3,0,3,1,3,0,0,0,0,2,0,0,3,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,1,0,2,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,3,3,3,0,0,1,3,0,0,0,0,0,0,0,3,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,0,0,0,0,0,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,3,3,0,1,3,1,3,1,0,0,0,1,1,0,2,0,0,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,0,0,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,3,3,3,0,0,2,0,0,0,0,0,0,0,0,3,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,3,2,2,0,0,3,3,0,0,0,0,0,0,0,3,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,0,0,0,0,0,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,2,2,0,0,0,0,3,0,0,0,0,0,1,0,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,3,3,0,0,3,3,1,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,3,3,0,0,0,3,3,0,0,0,0,0,0,0,3,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,2,1,3,0,0,2,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,0,3,0,0,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0, - 3,0,0,0,3,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,2,3,2,0,1,0,3,0,0,0,0,0,0,0,3,0,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,1,0,0,0,0,3,3,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,0,0,0,0,2,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,2,3,2,3,0,3,3,1,3,0,1,3,0,2,3,2,3,0,0,2,1,0,3,3,1,0,3,2,3,0,2,0,3,0,3,0,0,0,3,0,0,3,0,1,1,0, - 1,0,0,0,0,1,3,0,3,3,1,0,0,0,3,1,0,0,3,0,0,0,0,1,2,0,3,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0, - 3,0,2,3,3,0,3,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,3,3,0,0,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,1,0,0,3,0,0,0,3,3,0,0,0,0,0,0,0,0,0,0,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3,3,3,3,3,1,3,3,2,3,2,0,2,3,3,2,3,1,2,1,1,3,1,1, + 0,3,0,1,2,1,1,2,2,2,1,3,0,3,3,1,2,3,0,3,0,1,3,0,1,2,2,3,1,0,1,0,0,0,3,3,0,3,2,0,0,0,0,1,3,1,0, + 3,1,3,2,3,1,3,3,0,3,3,3,3,1,3,3,3,1,3,3,2,2,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,0,3,2,2,3,3,0,3,2,1,0,1,3,3,1,3,3,1,3,1,1,1,1,0, + 3,3,3,3,3,2,3,3,1,3,3,3,3,3,3,3,3,3,3,3,2,1,3,2,3,3,3,3,3,3,3,1,3,3,2,3,3,2,3,3,3,0,3,3,2,3,3, + 3,0,3,3,1,3,2,0,1,3,3,3,3,3,3,3,3,3,0,3,0,3,1,1,3,2,3,2,2,1,3,1,3,0,0,0,0,3,1,0,0,0,1,0,0,1,2, + 3,3,3,3,3,0,3,3,1,3,3,3,3,1,3,3,3,2,3,3,2,3,0,3,3,3,3,3,0,3,0,2,3,3,3,2,3,0,3,3,3,1,0,0,3,3,0, + 3,1,0,3,1,3,2,1,3,2,3,3,2,3,3,3,3,3,0,2,3,0,1,0,0,2,2,1,3,0,2,0,0,0,2,2,0,1,1,0,2,0,0,2,0,0,0, + 3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,0,3,1,3,3,3,3,3,2,3,0,3,3,3,3,1,3,0,0,0,1,2,3, + 0,3,0,0,3,1,3,0,1,1,1,0,0,0,3,1,1,0,3,3,1,0,1,0,0,3,1,3,0,0,2,1,0,0,2,0,1,3,2,0,0,1,0,0,0,0,0, + 3,3,3,1,3,2,3,3,1,2,3,3,3,1,3,3,3,2,3,3,2,2,1,3,2,3,3,3,0,3,1,0,3,2,2,0,3,0,3,3,3,3,1,1,2,2,0, + 3,3,3,3,1,1,3,2,3,3,3,0,3,3,1,1,3,3,0,3,3,0,1,0,0,1,2,2,1,0,1,2,0,0,1,3,0,1,1,0,2,0,0,2,0,1,0, + 3,3,3,3,3,3,3,3,1,3,3,0,3,3,3,0,0,3,3,3,3,3,0,3,0,3,0,0,0,0,0,3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, + 0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,0,0,0,0,0,0,0,3,0,0,3,0,0,0,0,0,0,0,1,0, + 3,3,3,3,3,3,3,1,0,3,2,2,3,2,3,3,0,3,3,3,3,3,3,3,3,3,2,3,3,3,3,0,3,3,0,1,1,1,3,1,0,0,0,1,1,3,0, + 0,0,1,3,3,0,1,2,2,3,0,3,0,0,0,0,0,0,0,2,0,0,0,3,0,2,1,3,3,3,0,3,2,3,2,2,1,2,0,0,0,1,1,0,0,0,2, + 0,1,0,1,3,0,3,3,0,0,3,3,1,0,0,3,3,1,0,0,0,0,3,0,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 2,0,3,3,0,3,3,0,3,3,2,3,2,3,3,1,3,0,0,3,1,3,2,0,3,0,0,0,3,0,3,1,0,0,0,3,1,0,3,3,0,3,2,3,0,0,0, + 2,2,2,2,3,1,3,3,1,3,3,3,1,1,2,3,3,3,3,3,3,1,2,2,3,3,3,3,3,3,2,0,3,3,0,3,3,3,3,2,3,0,1,2,2,3,0, + 3,0,3,1,1,3,3,3,3,2,3,1,2,3,2,3,2,0,1,3,2,2,0,0,1,2,2,3,1,0,3,3,0,3,1,3,3,0,2,1,1,3,2,2,0,2,0, + 3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,0,3,3,3,3,3,0,3,0,3,3,0,0,0,0,2,0,3,0,0,1,0,0,2,0,0,0,0,0,0,0, + 0,0,0,0,3,0,0,0,2,0,0,0,0,2,0,0,1,0,0,3,0,0,1,0,0,3,3,2,0,0,0,0,0,0,2,0,0,2,1,0,0,1,1,0,1,0,0, + 3,1,1,0,3,0,0,3,0,3,3,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,2,3,3,2,1,1,1,3,3,3,3,3,3,3,3,2, + 3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,3,3,2,2,1,2,1,3,0,1,3,2,3,0,0,0,2,0,0,0,2,2,2,2,0,3,0,3,2,0,2,0, + 2,1,2,1,3,1,3,3,2,1,3,3,2,1,2,2,3,1,2,2,1,1,2,2,2,3,3,3,3,3,2,1,3,3,0,3,2,3,3,3,1,3,3,3,1,3,1, + 3,3,3,3,0,1,3,3,2,0,3,0,3,2,2,3,3,0,0,1,3,3,0,0,1,1,0,2,1,0,2,3,0,2,1,1,3,0,1,0,3,2,2,3,0,0,0, + 2,3,3,3,3,2,3,3,0,3,3,3,1,3,3,3,3,3,3,3,3,2,1,3,3,3,3,3,3,3,3,0,3,0,0,3,2,2,3,3,3,3,3,3,1,3,1, + 3,3,1,3,1,1,2,1,3,3,3,1,2,3,3,2,3,3,3,3,3,3,0,3,0,3,2,3,1,0,3,3,0,0,1,3,3,2,1,0,2,1,2,0,1,3,2, + 3,0,3,3,3,0,0,3,0,3,3,0,0,0,2,0,0,3,1,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,2,1,1,0,3,3,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,3,0,0,0,0,3,3,0,0,3,0,0, + 0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0, + 1,3,3,2,3,0,3,3,0,2,3,0,3,1,3,2,0,3,3,2,2,2,0,2,0,3,0,0,0,2,1,1,0,1,0,0,0,0,0,0,1,0,1,1,1,0,0, + 0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, + 2,3,3,3,3,2,3,3,0,3,3,3,3,2,3,3,3,3,3,3,3,3,1,3,2,3,2,3,0,3,1,2,3,3,2,3,3,0,2,2,0,3,0,1,3,2,0, + 3,1,1,2,0,3,2,3,2,1,3,2,1,3,3,2,1,3,0,1,0,3,0,0,1,2,3,3,1,0,1,0,0,2,0,0,3,2,0,0,0,0,2,1,3,1,0, + 3,1,2,3,3,2,3,2,0,3,3,0,2,1,3,0,0,2,3,1,3,2,3,2,0,3,0,0,3,0,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,3, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0, + 3,3,2,3,3,0,3,3,0,1,3,3,3,0,3,3,3,0,3,3,3,0,3,1,3,3,3,3,3,3,3,0,2,3,0,3,3,3,3,3,3,1,3,3,2,3,1, + 0,1,3,3,1,3,3,3,3,3,2,3,1,3,3,3,2,2,0,0,0,1,3,0,3,0,1,3,2,0,3,1,0,0,2,1,1,1,3,0,0,0,1,1,0,1,0, + 2,3,2,1,3,2,3,3,0,3,3,0,3,2,3,3,0,1,3,3,1,1,3,1,0,3,1,0,1,3,3,0,0,0,0,0,1,2,0,0,0,0,3,3,0,0,3, + 0,0,3,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2,0,3,2,1,1,3,0,3,0,0,3,3,0,0,3,0,3,0,0,1,3,0,0,1,2, + 3,0,3,3,0,0,0,3,0,3,0,0,0,1,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, + 2,2,3,3,3,3,3,3,1,2,3,3,3,2,3,2,3,3,3,2,2,1,0,2,2,3,3,2,2,1,1,0,3,2,0,0,1,1,3,1,3,0,3,1,2,3,0, + 3,3,1,3,0,3,3,2,3,2,3,1,2,2,0,3,3,0,3,3,2,2,0,0,0,2,3,3,0,0,0,3,0,0,1,1,0,2,2,1,2,3,2,1,0,1,0, + 3,0,3,3,3,0,0,0,0,2,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,1,3,3,0,3,3,3,0,0,3,3,3,3,3,0,3,0,3,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,0,0,0,0,0,0,0,2,0,0,3,0,0,0,0,0,0,0,0,0, + 3,0,3,3,3,0,0,0,0,3,3,0,0,0,0,0,0,3,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,3,3,3,0,0,0,0,3,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,1,3,3,0,0,0,3,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,2,2,3,1,0,0,1,3,0,0,2,0,2,0,0,1,0,0,1,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,2,2,0,2,0,3,1,3,0,0,2,0,1,0,0,1,2,0,1,1,0,1,0,0,0,0,0,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,1,1,0,1,0,1,3,2,0,0,0,1,0,1,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,1,1,0,0,0,3,0,3,0,0,0,0,0,0,0,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,0,0,3,0,0,0,0,3,3,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,1,0,2,0,3,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,3,3,3,0,0,1,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,3,3,3,0,3,0,0,3,0,0,1,0,1,0,0,3,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,0,0,0,0,0,3,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,3,2,1,0,0,3,0,3,0,0,0,0,0,0,0,3,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,1,1,0,0,0,0,0,3,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 3,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,0,0,0,0,3,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,3,3,3,0,0,2,0,2,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,3,3,0,0,3,3,0,1,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,3,3,0,0,3,1,0,2,1,0,1,0,1,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,1,1,3,0,0,2,0,3,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,3,3,0,0,0,3,0,3,0,0,0,0,0,0,0,3,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,0,0,0,0,0,3,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,0,3,0,0,3,3,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, + 3,0,0,0,0,0,3,3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,0,0,0,0,2,1,0,3,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,0,0,0,0,0,3,0,3,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,2,2,2,3,0,3,3,0,0,3,2,0,3,0,3,3,2,0,2,2,1,3,1,1,3,2,1,1,3,2,0,3,3,0,0,3,1,2,1,0,0,1,3,1,0,0, + 0,0,1,0,3,3,0,1,3,0,0,1,0,1,3,2,1,3,0,0,0,0,0,0,2,2,0,3,1,0,0,0,0,0,0,0,0,0,1,0,0,0,2,0,0,0,0, + 3,0,3,3,0,0,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,3,3,3,0,3,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,1,0,3,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,1,1,0,3,0,0,0,0,3,3,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,0,0,3,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,3,3,2,0,2,0,0,3,0,0,0,0,0,0,0,3,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 3,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,3,3,0,0,0,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,3,3,3,0,3,0,1,0,0,0,0,1,1,0,3,0,2,1,3,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,0,0,3,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,0,0,3,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,3,3,0,0,0,0,2,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,0,0,0,3,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,0,0,1,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,0,0,3,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,0,0,0,0,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,3,3,2,3,1,3,2,1,3,0,0,0,3,2,2,1,1,1,1,0,0,0,0,3,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,0,0,1,0,0,0,0, - 0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,3,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0, - 3,0,0,0,0,0,0,1,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,1,3,2,1,2,1,0,3,3,0,0,2,3,2,0,3,0,2,3,0,2,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,3,1,2,3,0,3,2,1,3,0,0,0,3,2,0,1,0,1,3,3,2,1,0,3,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0, - 1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, - 3,0,0,0,3,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,0,0,0,0,0,3,3,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,0,0,0,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,0,0,0,3,1,3,3,1,3,0,0,0,2,0,0,1,0,0,1,1,0,1,0,3,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,0,0,0,0,1,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,1,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,0,0,0,0,3,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,3,3,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,3,3,3,0,3,0,0,1,0,0,0,0,0,0,0,3,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,0,0,3,0,3,0,0,3,0,0,0,0,0,0,0,0,1,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,0,0,3,0,3,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,0,0,0,3,0,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,0,0,0,0,0,2,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,3,3,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,0,0,2,0,1,1,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,0,0,0,0,3,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,0,0,1,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,0,0,0,0,2,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,1,3,2,3,2,3,3,0,1,3,0,3,1,3,1,1,1,2,1,1,0,0,1,0,3,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,3,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0, + 2,3,2,1,3,1,3,1,0,1,3,0,2,0,2,0,0,0,3,1,2,1,0,0,0,3,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, + 3,1,3,3,1,2,1,2,0,3,3,0,3,2,2,0,0,3,2,0,2,1,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, + 3,0,0,0,0,0,0,3,0,3,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,2,0,0,3,0,3,3,0,3,0,1,0,0,0,2,0,0,0,1,2,0,1,3,3,0,0,2,0,0,0,2,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0, - 0,0,0,2,0,1,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,0,0,3,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,0,0,0,0,0,3,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,2,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,2,1,1,3,1,3,3,0,1,3,0,2,1,0,1,0,1,1,1,0,0,0,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0, + 0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0, 3,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,1,1,0,0,0,0,0,2,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,2,1,0,1,0,0,3,3,0,0,2,2,1,0,0,0,1,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,3,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, - 1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,2,2,1,3,1,3,2,0,1,3,0,1,0,1,1,0,0,1,1,1,1,1,1,0,3,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,1,0,0,0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,1,0, + 3,0,1,1,0,1,0,0,0,3,3,0,2,2,1,0,0,0,2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 2,0,0,0,3,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,1,1,0,0,0,0,0,0,2,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,0,0,0,0,0,0,0,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 2,1,0,1,0,1,0,0,0,1,0,0,2,0,1,0,0,0,1,1,3,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, }; @@ -341,8 +339,8 @@ const SequenceModel Windows_1258VietnameseModel = { Windows_1258_CharToOrderMap, VietnameseLangModel, - 95, - (float)0.9990039927913613, + 94, + (float)0.999000880319308, PR_FALSE, "WINDOWS-1258", "vi" @@ -352,8 +350,8 @@ const SequenceModel VisciiVietnameseModel = { Viscii_CharToOrderMap, VietnameseLangModel, - 95, - (float)0.9990039927913613, + 94, + (float)0.999000880319308, PR_FALSE, "VISCII", "vi" @@ -363,8 +361,11 @@ const LanguageModel VietnameseModel = { "vi", Unicode_CharOrder, - 190, + 188, VietnameseLangModel, - 95, - (float)0.9999159700479902, + 94, + 4, + (float)0.3451482480607094, + 57, + (float)0.03101094622486187, }; |