summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJehan <jehan@girinstud.io>2022-12-17 22:25:32 +0100
committerJehan <jehan@girinstud.io>2022-12-17 22:47:54 +0100
commitd00d4d52b7b389d970685491eccb10228ffccdbd (patch)
treea6a54ad761057a1b4455f9815569e5d23f68fd0e
parent41d309e8a28407372317b048342e2bb23d9c8959 (diff)
src, script: add Macedonian support.
For UTF-8, ISO-8859-5, WINDOWS-1251 and IBM855 encodings. Test files' contents come from page 'Хибернација' on Wikipedia in Macedonian.
-rw-r--r--README.md5
-rw-r--r--script/BuildLangModelLogs/LangMacedonianModel.log248
-rw-r--r--script/langs/mk.py58
-rw-r--r--src/CMakeLists.txt1
-rw-r--r--src/LangModels/LangMacedonianModel.cpp317
-rw-r--r--src/nsLanguageDetector.h1
-rw-r--r--src/nsMBCSGroupProber.cpp1
-rw-r--r--src/nsMBCSGroupProber.h2
-rw-r--r--src/nsSBCSGroupProber.cpp4
-rw-r--r--src/nsSBCSGroupProber.h2
-rw-r--r--src/nsSBCharSetProber.h5
-rw-r--r--test/mk/ibm855.txt1
-rw-r--r--test/mk/iso-8859-5.txt1
-rw-r--r--test/mk/utf-8.txt1
-rw-r--r--test/mk/windows-1251.txt1
15 files changed, 646 insertions, 2 deletions
diff --git a/README.md b/README.md
index 07018e0..301c6a3 100644
--- a/README.md
+++ b/README.md
@@ -130,6 +130,11 @@ uchardet started as a C language binding of the original C++ implementation of t
* Maltese
* UTF-8
* ISO-8859-3
+ * Macedonian
+ * UTF-8
+ * ISO-8859-5
+ * WINDOWS-1251
+ * IBM855
* Norwegian
* IBM865
* ISO-8859-1
diff --git a/script/BuildLangModelLogs/LangMacedonianModel.log b/script/BuildLangModelLogs/LangMacedonianModel.log
new file mode 100644
index 0000000..e442bcb
--- /dev/null
+++ b/script/BuildLangModelLogs/LangMacedonianModel.log
@@ -0,0 +1,248 @@
+= Logs of language model for Macedonian (mk) =
+
+- Generated by BuildLangModel.py
+- Started: 2022-12-17 22:01:17.484142
+- Maximum depth: 4
+- Max number of pages: 200
+
+== Parsed pages ==
+
+Хелсинки (revision 4901169)
+Западен Берлин (revision 4609007)
+Средна Европа (revision 4658727)
+Украина (revision 4859969)
+Земји членки на Европската Унија (revision 3925804)
+Кипар (revision 4898295)
+Метрополитенска област (revision 4601372)
+Вајмар (revision 4642566)
+Гернзи (revision 4849858)
+Летни олимписки игри 2008 (revision 4904330)
+Кина (revision 4874294)
+Пекинг (revision 4898517)
+Бразил (revision 4887064)
+Нормативна контрола (revision 4647772)
+Пном Пен (revision 4655657)
+Малта (revision 4859950)
+Паритет на куповна моќ (revision 4885746)
+Обединета нормативна податотека (revision 4624340)
+ФИБА Европа (revision 4888619)
+Белгија (revision 4881095)
+Европски суд за човекови права (revision 4872819)
+GMT (revision 4857360)
+Јохан Волфганг фон Гете (revision 4847592)
+Лондон (revision 4820446)
+САД (revision 4887829)
+Хонгконг (revision 4644474)
+Регион (revision 4440392)
+Шангај (revision 4829926)
+Холандија (revision 4859982)
+Национална библиотека на Австралија (revision 4821571)
+Сантјаго де Компостела (revision 4790447)
+В’лтава (revision 4480493)
+Печ (revision 4836968)
+Литванија (revision 4859985)
+Централна Африка (revision 4880126)
+1808 (revision 4205012)
+Втора светска војна (revision 4878249)
+Сувереност (revision 4847447)
+Општествено уредување (revision 4562058)
+Француска Гвајана (revision 4658818)
+Бразавил (revision 4833032)
+Англија (revision 4831557)
+Сомалија (revision 4826045)
+Собрание (revision 4721533)
+Поштенски број (revision 4890825)
+ISO 4217 (revision 4900097)
+Никозија (revision 4821748)
+Исток (revision 4575999)
+Европа (revision 4898183)
+Азија (revision 4879497)
+Романија (revision 4880087)
+Република Кина (revision 4859932)
+Музејски Остров (revision 4642577)
+Ченгду (revision 4838944)
+Саудиска Арабија (revision 4904971)
+Шри Ланка (revision 4829991)
+Соединети Американски Држави (revision 4887829)
+Питкерн (revision 4879701)
+Берлин (revision 4898023)
+Турција (revision 4898742)
+Зимски олимписки игри 2022 (revision 4879017)
+Страна на возење (revision 4883774)
+Национална библиотека на Франција (revision 4859687)
+Статуа на Исус Христос Искупителот (revision 4781328)
+Грција (revision 4883904)
+Британска Индоокеанска Територија (revision 4847444)
+Германија (revision 4898116)
+Унгарија (revision 4859996)
+Список на земјите по највисок државен домен (revision 4660191)
+Естонија (revision 4904061)
+Ризница (Викимедија) (revision 4605630)
+5 мај (revision 4286017)
+Ријад (revision 4825472)
+Баптизам (revision 4893797)
+Индокина (revision 3860492)
+ГДР (revision 4804373)
+Черкаска Област (revision 4586879)
+Данска (revision 4860001)
+Источен Берлин (revision 4847439)
+Парагвај (revision 4823976)
+Студена војна (revision 4873005)
+Лихтенштајн (revision 4859989)
+Србија (revision 4888612)
+1933 (revision 4205151)
+Монголски јазик (revision 4822543)
+Виртуелна меѓународна нормативна податотека (revision 4063132)
+Европска Унија (revision 4878852)
+Трет Рајх (revision 4873021)
+Шведска (revision 4859974)
+Тириншка Шума (revision 4620246)
+Стрелаштво на Летните олимписки игри - 2008 (revision 4854954)
+Караимски јазик (revision 4578663)
+Бруто-домашен производ (revision 4839401)
+Западна Европа (revision 4795691)
+Туркиски јазици (revision 4811424)
+Зимски олимписки игри 2006 (revision 4279937)
+Список на држави и територии по површина (revision 4880407)
+Венецијанска Република (revision 4872002)
+Монголија (revision 4859944)
+Држава Палестина (revision 4898178)
+Список на земји (revision 4818847)
+1821 (revision 4205027)
+Контролен број на Конгресната библиотека (revision 4500225)
+Јунан (revision 4609778)
+Гибралтар (revision 4849866)
+Тихи Океан (revision 4898720)
+Брисел (revision 4816384)
+Острава (revision 4648232)
+Хрватска (revision 4859986)
+Авганистан (revision 4897935)
+Обединетото Кралство (revision 4878275)
+Везер (revision 4611529)
+Марлен Дитрих (revision 4898384)
+Фарски Острови (revision 4828720)
+Илм (округ) (revision 4622799)
+Пакистан (revision 4893644)
+Пафос (revision 4540073)
+Москва (revision 4836476)
+Океанија (revision 4847323)
+Франција (revision 4859997)
+Брно (revision 4654440)
+Премиер на Кина (revision 4365963)
+Тибетско писмо (revision 4855132)
+Рајхстаг (revision 4748712)
+Географија (revision 4593743)
+Бенгалски календар (revision 4467317)
+Соединетите Држави (revision 4887829)
+Список на држави (revision 4818847)
+Запорошка област (revision 4795592)
+Литвански (revision 4847113)
+ОБСЕ (revision 4751462)
+Молиер (revision 4834680)
+Моравскошлески крај (revision 4203476)
+Португалија (revision 4859979)
+Зимски олимписки игри 1948 (revision 4750285)
+1938 (revision 4444155)
+Град во Парагвај (revision 4530019)
+Норвешка (revision 4859981)
+Државно знаме (revision 3360721)
+Уганда (revision 4828524)
+Калај (revision 4901898)
+Национална библиотека на Чешка (revision 4859689)
+Кувајт (revision 4859952)
+Национална парламентарна библиотека (Јапонија) (revision 4821574)
+Сеул (revision 4837760)
+Авторитаризам (revision 4763980)
+Чисто писмо (revision 4648379)
+УНЕСКО (revision 4768869)
+Кампала (revision 4724511)
+Монголи (revision 4821043)
+Валдовињо (revision 4555459)
+Стреличарство на Летните олимписки игри 2016 (revision 4827288)
+Сеута (revision 4529464)
+Тоуро (revision 4555813)
+1880 (revision 4485297)
+Кинески Тајпеј (revision 4859932)
+Азербејџан (revision 4897943)
+Источен Тимор (revision 4859940)
+Меѓународен олимписки комитет (revision 4585376)
+Обединето Кралство (revision 4878275)
+Ежен Делакроа (revision 4850741)
+Квадратен километар (revision 4177969)
+Бронзено време (revision 4687506)
+
+== End of Parsed pages ==
+
+- Wikipedia parsing ended at: 2022-12-17 22:04:50.749722
+
+71 characters appeared 1512742 times.
+
+Most Frequent characters:
+[ 0] Char а: 13.171909023481861 %
+[ 1] Char о: 10.210531604199527 %
+[ 2] Char и: 9.815156847631652 %
+[ 3] Char е: 7.806089868596231 %
+[ 4] Char н: 7.393263358854318 %
+[ 5] Char т: 6.73862429945093 %
+[ 6] Char р: 5.301366657367879 %
+[ 7] Char с: 4.862098097362273 %
+[ 8] Char в: 4.178372782668823 %
+[ 9] Char к: 3.809307866113323 %
+[10] Char д: 3.5743702495204075 %
+[11] Char л: 3.571263308614423 %
+[12] Char п: 2.6521376414484426 %
+[13] Char м: 2.191384915603586 %
+[14] Char у: 2.13499724341626 %
+[15] Char ј: 1.9410448047320692 %
+[16] Char г: 1.8507452030815565 %
+[17] Char з: 1.656858869523025 %
+[18] Char б: 1.3595180143077934 %
+[19] Char ц: 0.8072757945505579 %
+[20] Char ч: 0.6892120401231671 %
+[21] Char ш: 0.6130589353637302 %
+[22] Char ж: 0.44072287277010885 %
+[23] Char ф: 0.3884998234993145 %
+[24] Char х: 0.27631942525559544 %
+[25] Char њ: 0.24419233418520805 %
+[26] Char e: 0.19844758722901856 %
+[27] Char i: 0.19097770802952518 %
+[28] Char a: 0.17398869073510223 %
+[29] Char ќ: 0.16175924248814405 %
+[30] Char n: 0.14477022519372107 %
+[31] Char ѓ: 0.14113444328246325 %
+[32] Char r: 0.13577992810406533 %
+[33] Char s: 0.12645910538611344 %
+[34] Char t: 0.1238149003597441 %
+[35] Char o: 0.11032945472526048 %
+[36] Char l: 0.08944023501694275 %
+[37] Char c: 0.08203646094310861 %
+[38] Char u: 0.0737072151100452 %
+[39] Char d: 0.06313039500456787 %
+[40] Char m: 0.0618082924913832 %
+[41] Char h: 0.055726620930733724 %
+[42] Char џ: 0.054470623543208294 %
+[43] Char g: 0.051165367260246626 %
+[44] Char b: 0.043232752181138624 %
+[45] Char p: 0.04078686253174699 %
+[46] Char f: 0.030540568054565814 %
+[47] Char k: 0.028160783530833414 %
+[48] Char v: 0.02730141689726338 %
+[49] Char y: 0.025847104132760243 %
+[50] Char w: 0.02201300684452471 %
+[51] Char x: 0.021219745336613912 %
+[52] Char j: 0.009519138094929606 %
+[53] Char z: 0.00786650995344877 %
+[54] Char љ: 0.007403774073834138 %
+[55] Char ѕ: 0.00489177929878327 %
+
+The first 56 characters have an accumulated ratio of 0.9998605181848591.
+The first 4 characters have an accumulated ratio of 0.41003687343909273.
+All characters whose order is over 22 have an accumulated ratio of 0.03216741519703955.
+
+1405 sequences found.
+
+First 613 (typical positive ratio): 0.9950204964819953
+Next 273 (886-613): 0.003979891583654749
+Rest: 0.0009996119343499421
+
+- Processing end: 2022-12-17 22:04:50.898793
diff --git a/script/langs/mk.py b/script/langs/mk.py
new file mode 100644
index 0000000..aee9748
--- /dev/null
+++ b/script/langs/mk.py
@@ -0,0 +1,58 @@
+#!/bin/python3
+# -*- coding: utf-8 -*-
+
+# ##### BEGIN LICENSE BLOCK #####
+# Version: MPL 1.1/GPL 2.0/LGPL 2.1
+#
+# The contents of this file are subject to the Mozilla Public License Version
+# 1.1 (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+# http://www.mozilla.org/MPL/
+#
+# Software distributed under the License is distributed on an "AS IS" basis,
+# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+# for the specific language governing rights and limitations under the
+# License.
+#
+# The Original Code is Mozilla Universal charset detector code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 2001
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+# Jehan <jehan@girinstud.io>
+#
+# Alternatively, the contents of this file may be used under the terms of
+# either the GNU General Public License Version 2 or later (the "GPL"), or
+# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+# in which case the provisions of the GPL or the LGPL are applicable instead
+# of those above. If you wish to allow use of your version of this file only
+# under the terms of either the GPL or the LGPL, and not to allow others to
+# use your version of this file under the terms of the MPL, indicate your
+# decision by deleting the provisions above and replace them with the notice
+# and other provisions required by the GPL or the LGPL. If you do not delete
+# the provisions above, a recipient may use your version of this file under
+# the terms of any one of the MPL, the GPL or the LGPL.
+#
+# ##### END LICENSE BLOCK #####
+
+import re
+
+## Mandatory Properties ##
+
+name = 'Macedonian'
+code = 'mk'
+use_ascii = False
+charsets = [ 'WINDOWS-1251', 'IBM855', 'ISO-8859-5' ]
+
+## Optional Properties ##
+
+# Alphabet characters.
+alphabet = 'абвгдѓежзѕијклљмнњопрстќуфхцчџш'
+# A starred page which was rewarded on the main page when I created
+# the data.
+start_pages = ['Хелсинки']
+wikipedia_code = code
+case_mapping = True
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 763c492..cec12ab 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -27,6 +27,7 @@ set(
LangModels/LangItalianModel.cpp
LangModels/LangLithuanianModel.cpp
LangModels/LangLatvianModel.cpp
+ LangModels/LangMacedonianModel.cpp
LangModels/LangMalteseModel.cpp
LangModels/LangPolishModel.cpp
LangModels/LangPortugueseModel.cpp
diff --git a/src/LangModels/LangMacedonianModel.cpp b/src/LangModels/LangMacedonianModel.cpp
new file mode 100644
index 0000000..bae13ad
--- /dev/null
+++ b/src/LangModels/LangMacedonianModel.cpp
@@ -0,0 +1,317 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Mozilla Communicator client code.
+ *
+ * The Initial Developer of the Original Code is
+ * Netscape Communications Corporation.
+ * Portions created by the Initial Developer are Copyright (C) 1998
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+#include "../nsSBCharSetProber.h"
+#include "../nsLanguageDetector.h"
+
+/********* Language model for: Macedonian *********/
+
+/**
+ * Generated by BuildLangModel.py
+ * On: 2022-12-17 22:04:50.749968
+ **/
+
+/* Character Mapping Table:
+ * ILL: illegal character.
+ * CTR: control character specific to the charset.
+ * RET: carriage/return.
+ * SYM: symbol (punctuation) that does not belong to word.
+ * NUM: 0 - 9.
+ *
+ * Other characters are ordered by probabilities
+ * (0 is the most common character in the language).
+ *
+ * Orders are generic to a language. So the codepoint with order X in
+ * CHARSET1 maps to the same character as the codepoint with the same
+ * order X in CHARSET2 for the same language.
+ * As such, it is possible to get missing order. For instance the
+ * ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1
+ * even though they are both used for French. Same for the euro sign.
+ */
+static const unsigned char Windows_1251_CharToOrderMap[] =
+{
+ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
+ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
+ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
+ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
+ SYM, 28, 44, 37, 39, 26, 46, 43, 41, 27, 52, 47, 36, 40, 30, 35, /* 4X */
+ 45, 58, 32, 33, 34, 38, 48, 50, 51, 49, 53,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 28, 44, 37, 39, 26, 46, 43, 41, 27, 52, 47, 36, 40, 30, 35, /* 6X */
+ 45, 58, 32, 33, 34, 38, 48, 50, 51, 49, 53,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ 63, 31,SYM, 31,SYM,SYM,SYM,SYM,SYM,SYM, 54,SYM, 25, 29, 70, 42, /* 8X */
+ 63,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 54,SYM, 25, 29, 70, 42, /* 9X */
+ SYM, 71, 72, 15,SYM, 73,SYM,SYM, 67,SYM, 68,SYM,SYM,SYM,SYM, 64, /* AX */
+ SYM,SYM, 69, 69, 74,SYM,SYM,SYM, 67,SYM, 68,SYM, 15, 55, 55, 64, /* BX */
+ 0, 18, 8, 16, 10, 3, 22, 17, 2, 57, 9, 11, 13, 4, 1, 12, /* CX */
+ 6, 7, 5, 14, 23, 24, 19, 20, 21, 65, 66, 59, 60, 56, 62, 61, /* DX */
+ 0, 18, 8, 16, 10, 3, 22, 17, 2, 57, 9, 11, 13, 4, 1, 12, /* EX */
+ 6, 7, 5, 14, 23, 24, 19, 20, 21, 65, 66, 59, 60, 56, 62, 61, /* FX */
+};
+/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
+
+static const unsigned char Ibm855_CharToOrderMap[] =
+{
+ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
+ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
+ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
+ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
+ SYM, 28, 44, 37, 39, 26, 46, 43, 41, 27, 52, 47, 36, 40, 30, 35, /* 4X */
+ 45, 58, 32, 33, 34, 38, 48, 50, 51, 49, 53,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 28, 44, 37, 39, 26, 46, 43, 41, 27, 52, 47, 36, 40, 30, 35, /* 6X */
+ 45, 58, 32, 33, 34, 38, 48, 50, 51, 49, 53,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ 63, 63, 31, 31, 67, 67, 68, 68, 55, 55, 69, 69, 64, 64, 15, 15, /* 8X */
+ 54, 54, 25, 25, 70, 70, 29, 29, 75, 76, 42, 42, 62, 62, 66, 66, /* 9X */
+ 0, 0, 18, 18, 19, 19, 10, 10, 3, 3, 23, 23, 16, 16,SYM,SYM, /* AX */
+ SYM,SYM,SYM,SYM,SYM, 24, 24, 2, 2,SYM,SYM,SYM,SYM, 57, 57,SYM, /* BX */
+ SYM,SYM,SYM,SYM,SYM,SYM, 9, 9,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* CX */
+ 11, 11, 13, 13, 4, 4, 1, 1, 12,SYM,SYM,SYM,SYM, 12, 61,SYM, /* DX */
+ 61, 6, 6, 7, 7, 5, 5, 14, 14, 22, 22, 8, 8, 60, 60,SYM, /* EX */
+ SYM, 59, 59, 17, 17, 21, 21, 56, 56, 65, 65, 20, 20,SYM,SYM,SYM, /* FX */
+};
+/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
+
+static const unsigned char Iso_8859_5_CharToOrderMap[] =
+{
+ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
+ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
+ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
+ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
+ SYM, 28, 44, 37, 39, 26, 46, 43, 41, 27, 52, 47, 36, 40, 30, 35, /* 4X */
+ 45, 58, 32, 33, 34, 38, 48, 50, 51, 49, 53,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 28, 44, 37, 39, 26, 46, 43, 41, 27, 52, 47, 36, 40, 30, 35, /* 6X */
+ 45, 58, 32, 33, 34, 38, 48, 50, 51, 49, 53,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
+ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
+ SYM, 67, 63, 31, 68, 55, 69, 64, 15, 54, 25, 70, 29,SYM, 77, 42, /* AX */
+ 0, 18, 8, 16, 10, 3, 22, 17, 2, 57, 9, 11, 13, 4, 1, 12, /* BX */
+ 6, 7, 5, 14, 23, 24, 19, 20, 21, 65, 66, 59, 60, 56, 62, 61, /* CX */
+ 0, 18, 8, 16, 10, 3, 22, 17, 2, 57, 9, 11, 13, 4, 1, 12, /* DX */
+ 6, 7, 5, 14, 23, 24, 19, 20, 21, 65, 66, 59, 60, 56, 62, 61, /* EX */
+ SYM, 67, 63, 31, 68, 55, 69, 64, 15, 54, 25, 70, 29,SYM, 78, 42, /* FX */
+};
+/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
+
+static const int Unicode_Char_size = 112;
+static const unsigned int Unicode_CharOrder[] =
+{
+ 65, 28, 66, 44, 67, 37, 68, 39, 69, 26, 70, 46, 71, 43, 72, 41,
+ 73, 27, 74, 52, 75, 47, 76, 36, 77, 40, 78, 30, 79, 35, 80, 45,
+ 82, 32, 83, 33, 84, 34, 85, 38, 86, 48, 87, 50, 88, 51, 89, 49,
+ 90, 53, 97, 28, 98, 44, 99, 37, 100, 39, 101, 26, 102, 46, 103, 43,
+ 104, 41, 105, 27, 106, 52, 107, 47, 108, 36, 109, 40, 110, 30, 111, 35,
+ 112, 45, 114, 32, 115, 33, 116, 34, 117, 38, 118, 48, 119, 50, 120, 51,
+ 121, 49, 122, 53, 1027, 31, 1029, 55, 1032, 15, 1033, 54, 1034, 25,1036, 29,
+ 1039, 42, 1040, 0, 1041, 18, 1042, 8, 1043, 16, 1044, 10, 1045, 3,1046, 22,
+ 1047, 17, 1048, 2, 1050, 9, 1051, 11, 1052, 13, 1053, 4, 1054, 1,1055, 12,
+ 1056, 6, 1057, 7, 1058, 5, 1059, 14, 1060, 23, 1061, 24, 1062, 19,1063, 20,
+ 1064, 21, 1072, 0, 1073, 18, 1074, 8, 1075, 16, 1076, 10, 1077, 3,1078, 22,
+ 1079, 17, 1080, 2, 1082, 9, 1083, 11, 1084, 13, 1085, 4, 1086, 1,1087, 12,
+ 1088, 6, 1089, 7, 1090, 5, 1091, 14, 1092, 23, 1093, 24, 1094, 19,1095, 20,
+ 1096, 21, 1107, 31, 1109, 55, 1112, 15, 1113, 54, 1114, 25, 1116, 29,1119, 42,
+};
+
+
+/* Model Table:
+ * Total considered sequences: 1405 / 3136
+ * - Positive sequences: first 613 (0.9950204964819953)
+ * - Probable sequences: next 273 (886-613) (0.003979891583654749)
+ * - Neutral sequences: last 2250 (0.0009996119343499421)
+ * - Negative sequences: 1731 (off-ratio)
+ * Negative sequences: TODO
+ */
+static const PRUint8 MacedonianLangModel[] =
+{
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,
+ 0,3,0,3,0,0,1,1,1,1,0,1,0,0,3,0,0,0,0,0,1,0,0,0,1,0,2,1,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,0,
+ 0,3,0,3,0,0,0,0,0,0,0,0,0,0,2,0,0,1,1,0,0,0,0,0,0,0,1,1,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,
+ 0,3,0,1,0,0,1,0,1,1,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,2,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,
+ 0,3,0,3,0,0,0,0,1,0,0,0,0,0,3,0,0,0,0,0,1,0,0,0,0,0,2,0,
+ 3,3,3,3,1,3,3,3,3,3,3,3,2,2,3,2,3,3,3,3,3,3,2,3,3,0,1,0,
+ 1,1,0,2,0,0,0,1,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,1,
+ 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,2,1,2,2,2,1,1,3,3,0,0,0,
+ 1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,0,
+ 1,0,0,2,0,0,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,1,1,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,2,3,3,2,1,0,3,3,1,1,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,
+ 3,3,3,3,3,3,3,3,1,3,3,3,3,2,3,3,3,1,2,3,2,1,1,0,1,0,1,0,
+ 1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,
+ 3,3,3,3,3,3,3,3,3,2,1,3,2,3,3,2,1,1,1,3,1,2,1,2,2,1,0,0,
+ 1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
+ 3,3,3,3,3,2,3,3,3,2,3,3,3,3,3,2,3,3,3,2,1,0,1,1,2,0,1,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,3,3,3,1,3,3,3,3,2,3,3,3,3,3,2,3,3,2,3,3,3,3,0,0,0,
+ 1,0,0,0,0,0,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,3,3,3,3,3,1,3,1,3,2,1,3,2,1,1,1,3,3,3,0,3,2,0,1,0,
+ 1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,3,3,2,3,3,3,3,2,3,3,2,3,3,2,2,3,2,2,2,0,3,1,0,1,0,
+ 1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,1,0,0,
+ 0,2,0,3,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,1,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,1,2,3,0,0,0,
+ 0,3,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,3,3,2,3,2,3,2,3,3,1,2,3,1,1,2,2,1,1,2,1,1,2,1,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,3,3,1,3,1,3,2,3,3,1,3,3,3,3,1,3,1,0,0,0,0,0,0,0,0,
+ 1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,3,3,2,3,3,3,1,3,3,1,1,3,3,1,2,1,1,0,1,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,
+ 3,3,3,3,3,1,3,0,3,2,2,1,0,1,3,0,0,0,2,1,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,3,3,1,1,1,2,3,1,3,1,1,3,3,0,0,0,0,0,0,1,0,1,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,3,3,3,2,1,3,3,1,3,3,2,3,2,1,0,0,1,2,0,0,0,0,1,1,0,
+ 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,3,3,1,2,0,1,0,3,2,0,1,3,3,1,0,3,0,1,0,0,0,0,1,0,0,
+ 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,3,2,3,3,3,1,1,1,3,1,1,3,2,2,0,1,1,0,1,0,1,1,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,3,3,3,3,3,2,1,2,2,0,3,3,2,1,1,1,0,2,1,0,1,0,0,0,1,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,2,2,3,0,1,0,1,0,0,0,0,0,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,1,0,1,1,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,3,3,
+ 3,0,3,0,3,3,3,2,3,3,3,3,3,2,0,3,2,3,2,2,2,2,2,2,1,2,0,0,
+ 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,
+ 3,0,3,0,3,3,3,3,3,3,2,3,3,1,0,3,3,2,2,3,3,2,2,3,2,2,0,0,
+ 0,0,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,2,3,
+ 2,0,3,0,3,3,3,1,3,3,3,3,3,2,0,3,3,2,2,2,3,2,2,1,2,2,0,0,
+ 3,1,3,3,3,2,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,
+ 3,0,3,0,1,3,3,3,2,3,2,3,2,2,0,3,2,1,2,2,1,2,1,0,1,2,0,0,
+ 3,2,2,3,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
+ 0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3,3,
+ 3,0,3,0,2,3,3,3,3,2,3,2,2,1,0,3,2,2,1,3,2,2,1,1,1,2,0,0,
+ 0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,3,3,
+ 3,0,3,0,2,3,3,3,2,3,2,1,2,3,0,1,3,2,1,2,2,2,2,0,1,1,0,0,
+ 0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,3,3,
+ 3,0,1,0,3,3,2,3,2,3,3,1,2,3,0,1,1,1,1,1,2,3,1,0,1,2,0,0,
+ 0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,
+ 1,0,3,0,3,3,3,2,3,3,3,2,3,2,0,2,2,3,3,2,2,1,2,2,1,1,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,3,3,
+ 3,0,1,0,1,3,3,3,3,2,2,3,2,2,0,2,2,1,1,2,2,2,0,1,1,1,0,0,
+ 0,1,0,1,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,
+ 3,0,2,0,2,2,3,3,3,2,3,2,1,3,0,1,2,0,1,2,1,2,0,1,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,
+ 3,0,3,0,3,3,3,1,3,3,2,2,3,1,0,2,3,2,1,2,2,1,1,1,0,1,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,3,3,
+ 3,0,1,0,2,2,1,3,1,1,2,2,2,0,0,2,2,1,1,1,1,1,1,0,1,1,0,0,
+ 1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,3,3,
+ 3,0,1,0,1,2,2,3,2,1,2,2,3,1,0,1,2,2,1,1,0,2,2,1,0,1,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,
+ 3,0,2,0,2,2,2,3,2,1,3,0,1,1,0,0,1,0,0,1,1,2,1,0,1,0,0,0,
+ 3,3,3,3,1,0,1,1,0,1,2,1,0,1,3,0,0,1,1,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,
+ 3,0,2,0,3,2,1,3,2,0,3,2,2,2,0,1,1,1,1,1,0,2,1,1,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,
+ 3,0,3,0,3,2,1,2,3,2,3,1,1,1,0,0,2,1,0,1,1,2,0,0,0,0,0,0,
+ 0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,2,
+ 3,0,1,0,3,2,2,3,2,1,2,1,1,2,0,1,1,2,1,1,0,1,0,0,1,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,
+ 2,0,0,0,3,1,2,3,1,0,2,0,1,1,0,2,1,0,2,1,0,1,0,0,0,0,0,0,
+ 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,
+ 3,0,1,0,2,2,1,3,1,0,2,1,2,1,0,1,0,1,1,2,1,1,1,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,
+ 3,0,1,0,2,1,0,2,1,1,1,1,0,1,0,1,1,2,0,1,1,1,0,0,1,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,
+ 2,0,2,0,1,2,1,2,2,1,1,1,2,0,0,1,1,2,1,0,0,1,0,0,0,1,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,
+ 3,0,1,0,1,2,1,2,1,0,1,1,1,2,0,0,0,1,0,1,0,0,2,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,3,
+ 1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,1,1,1,0,3,0,0,3,0,0,0,0,
+ 1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,2,1,
+ 3,0,1,0,1,0,0,2,1,0,2,0,0,1,0,1,0,1,0,1,1,1,0,0,0,1,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,
+ 2,0,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,
+ 2,0,1,2,0,0,0,1,0,1,1,0,0,1,3,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 1,1,3,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
+};
+
+
+const SequenceModel Windows_1251MacedonianModel =
+{
+ Windows_1251_CharToOrderMap,
+ MacedonianLangModel,
+ 56,
+ (float)0.9990003880656501,
+ PR_FALSE,
+ "WINDOWS-1251",
+ "mk"
+};
+
+const SequenceModel Ibm855MacedonianModel =
+{
+ Ibm855_CharToOrderMap,
+ MacedonianLangModel,
+ 56,
+ (float)0.9990003880656501,
+ PR_FALSE,
+ "IBM855",
+ "mk"
+};
+
+const SequenceModel Iso_8859_5MacedonianModel =
+{
+ Iso_8859_5_CharToOrderMap,
+ MacedonianLangModel,
+ 56,
+ (float)0.9990003880656501,
+ PR_FALSE,
+ "ISO-8859-5",
+ "mk"
+};
+
+const LanguageModel MacedonianModel =
+{
+ "mk",
+ Unicode_CharOrder,
+ 112,
+ MacedonianLangModel,
+ 56,
+ 4,
+ (float)0.41003687343909273,
+ 22,
+ (float)0.03216741519703955,
+};
diff --git a/src/nsLanguageDetector.h b/src/nsLanguageDetector.h
index 01eedfb..01bba14 100644
--- a/src/nsLanguageDetector.h
+++ b/src/nsLanguageDetector.h
@@ -145,6 +145,7 @@ extern const LanguageModel IrishModel;
extern const LanguageModel ItalianModel;
extern const LanguageModel LatvianModel;
extern const LanguageModel LithuanianModel;
+extern const LanguageModel MacedonianModel;
extern const LanguageModel MalteseModel;
extern const LanguageModel NorwegianModel;
extern const LanguageModel PolishModel;
diff --git a/src/nsMBCSGroupProber.cpp b/src/nsMBCSGroupProber.cpp
index 04c628d..9a0680a 100644
--- a/src/nsMBCSGroupProber.cpp
+++ b/src/nsMBCSGroupProber.cpp
@@ -112,6 +112,7 @@ nsMBCSGroupProber::nsMBCSGroupProber(PRUint32 aLanguageFilter)
langDetectors[i][j++] = new nsLanguageDetector(&ItalianModel);
langDetectors[i][j++] = new nsLanguageDetector(&LatvianModel);
langDetectors[i][j++] = new nsLanguageDetector(&LithuanianModel);
+ langDetectors[i][j++] = new nsLanguageDetector(&MacedonianModel);
langDetectors[i][j++] = new nsLanguageDetector(&MalteseModel);
langDetectors[i][j++] = new nsLanguageDetector(&NorwegianModel);
langDetectors[i][j++] = new nsLanguageDetector(&PolishModel);
diff --git a/src/nsMBCSGroupProber.h b/src/nsMBCSGroupProber.h
index 17c3f66..1374786 100644
--- a/src/nsMBCSGroupProber.h
+++ b/src/nsMBCSGroupProber.h
@@ -49,7 +49,7 @@
#include "nsEUCTWProber.h"
#define NUM_OF_PROBERS 8
-#define NUM_OF_LANGUAGES 35
+#define NUM_OF_LANGUAGES 36
class nsMBCSGroupProber: public nsCharSetProber {
public:
diff --git a/src/nsSBCSGroupProber.cpp b/src/nsSBCSGroupProber.cpp
index 7d474ca..93dac13 100644
--- a/src/nsSBCSGroupProber.cpp
+++ b/src/nsSBCSGroupProber.cpp
@@ -219,6 +219,10 @@ nsSBCSGroupProber::nsSBCSGroupProber()
mProbers[n++] = new nsSingleByteCharSetProber(&Windows_1251UkrainianModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Windows_1251MacedonianModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Ibm855MacedonianModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_5MacedonianModel);
+
Reset();
}
diff --git a/src/nsSBCSGroupProber.h b/src/nsSBCSGroupProber.h
index 2ae0f85..2a25e1f 100644
--- a/src/nsSBCSGroupProber.h
+++ b/src/nsSBCSGroupProber.h
@@ -40,7 +40,7 @@
#define nsSBCSGroupProber_h__
-#define NUM_OF_SBCS_PROBERS 112
+#define NUM_OF_SBCS_PROBERS 115
class nsCharSetProber;
class nsSBCSGroupProber: public nsCharSetProber {
diff --git a/src/nsSBCharSetProber.h b/src/nsSBCharSetProber.h
index 3ab5830..942d3ec 100644
--- a/src/nsSBCharSetProber.h
+++ b/src/nsSBCharSetProber.h
@@ -273,4 +273,9 @@ extern const SequenceModel Ibm865NorwegianModel;
extern const SequenceModel Windows_1251UkrainianModel;
+extern const SequenceModel Windows_1251MacedonianModel;
+extern const SequenceModel Ibm855MacedonianModel;
+extern const SequenceModel Iso_8859_5MacedonianModel;
+
+
#endif /* nsSingleByteCharSetProber_h__ */
diff --git a/test/mk/ibm855.txt b/test/mk/ibm855.txt
new file mode 100644
index 0000000..b11df79
--- /dev/null
+++ b/test/mk/ibm855.txt
@@ -0,0 +1 @@
+Ԡ з Ʒ ֎ Ԡ 렒 Ԡ Ҩ堢з ԷƠ ШԠ ب Ơ ֦ᨦԷ Է Ҩ Ԡ Ҡ. Ԡ ؠ ֬Ш ֎ ԨԷ Է Ҡ ֎ . ՠ Ơ ҨƠ Ҡ ԷԠ ШԠ ب 禠 Ш Ԩ Ҩ堠 Ʒ Ԡ. 먖 Ԡ ֨Ơ Է Рᠠ Ԡ Է (. Ơ) з Է Ҡ ( Ҡ Ʒ). 淨 禠 먖ؠ Ҩ Ԡ Ҡ. ѠԷ Է ᠠ Էᠠ Ơ ԠԠ ب ֦ Ơ Ԡ 렒. Ԡ Ԡ Ԡ Ҩ Ԡ Ш 려.
diff --git a/test/mk/iso-8859-5.txt b/test/mk/iso-8859-5.txt
new file mode 100644
index 0000000..1f4d357
--- /dev/null
+++ b/test/mk/iso-8859-5.txt
@@ -0,0 +1 @@
+ . . . (. ) ( ). . . .
diff --git a/test/mk/utf-8.txt b/test/mk/utf-8.txt
new file mode 100644
index 0000000..3208b2d
--- /dev/null
+++ b/test/mk/utf-8.txt
@@ -0,0 +1 @@
+Хибернација или зимски сон е состојба на успорување на метаболизмот и ниска телесна температура кај одредени животни за време на зимата. Во хибернатори спаѓаат поголем број крзнени животни и мал број цицачи. На цицачите како мечката само малку им е снижена телесната температура се будат лесно и не се сметаат за вистински хибернатори. Повеќето хибернатори доека се активни складираат храна во засолништата (пр. верверичка) или резервни масти во телото (во масното ткиво). Тие можат да се разбудат повеќепати за време на зимата. Ладнокрвните животни мораат да хибернираат таму каде што надворешната температура се спушта под точка на смрзнување. Еднакво на хибернација за време на лето е естивација.
diff --git a/test/mk/windows-1251.txt b/test/mk/windows-1251.txt
new file mode 100644
index 0000000..1f4120d
--- /dev/null
+++ b/test/mk/windows-1251.txt
@@ -0,0 +1 @@
+ . . . (. ) ( ). . . .