summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJehan <jehan@girinstud.io>2022-12-20 14:23:24 +0100
committerJehan <jehan@girinstud.io>2022-12-20 14:28:29 +0100
commit7875272a8c61fdccba1db6b3c29ce248cc5fd65f (patch)
treeb51d6fca83d3a9978b89ca38de834767cf50665b /src
parentc843d23a17eebaa69be56565c5963471d5f1295f (diff)
script, src, test: new Georgian support.
For charsets UTF-8, GEORGIAN-ACADEMY and GEORGIAN-PS. The 2 GEORGIAN-* sets were generated thanks to the new create-table.py script. Test text comes from page 'ვირზაზუნა' page of Wikipedia in Georgian.
Diffstat (limited to 'src')
-rw-r--r--src/CMakeLists.txt1
-rw-r--r--src/LangModels/LangGeorgianModel.cpp288
-rw-r--r--src/nsLanguageDetector-generated.h3
-rw-r--r--src/nsMBCSGroupProber.cpp1
-rw-r--r--src/nsSBCSGroupProber.cpp3
-rw-r--r--src/nsSBCharSetProber-generated.h5
6 files changed, 299 insertions, 2 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 17fd980..1226a4f 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -20,6 +20,7 @@ set(
LangModels/LangFrenchModel.cpp
LangModels/LangDanishModel.cpp
LangModels/LangGermanModel.cpp
+ LangModels/LangGeorgianModel.cpp
LangModels/LangGreekModel.cpp
LangModels/LangHungarianModel.cpp
LangModels/LangHebrewModel.cpp
diff --git a/src/LangModels/LangGeorgianModel.cpp b/src/LangModels/LangGeorgianModel.cpp
new file mode 100644
index 0000000..7da31ee
--- /dev/null
+++ b/src/LangModels/LangGeorgianModel.cpp
@@ -0,0 +1,288 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Mozilla Communicator client code.
+ *
+ * The Initial Developer of the Original Code is
+ * Netscape Communications Corporation.
+ * Portions created by the Initial Developer are Copyright (C) 1998
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+#include "../nsSBCharSetProber.h"
+#include "../nsSBCharSetProber-generated.h"
+#include "../nsLanguageDetector.h"
+
+#include "../nsLanguageDetector-generated.h"
+
+/********* Language model for: Georgian *********/
+
+/**
+ * Generated by BuildLangModel.py
+ * On: 2022-12-20 12:56:27.859568
+ **/
+
+ /* Character Mapping Table:
+ * ILL: illegal character.
+ * CTR: control character specific to the charset.
+ * RET: carriage/return.
+ * SYM: symbol (punctuation) that does not belong to word.
+ * NUM: 0 - 9.
+ *
+ * Other characters are ordered by probabilities
+ * (0 is the most common character in the language).
+ *
+ * Orders are generic to a language. So the codepoint with order X in
+ * CHARSET1 maps to the same character as the codepoint with the same
+ * order X in CHARSET2 for the same language.
+ * As such, it is possible to get missing order. For instance the
+ * ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1
+ * even though they are both used for French. Same for the euro sign.
+ */
+ static const unsigned char Georgian_Academy_CharToOrderMap[] =
+{
+ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
+ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
+ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
+ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
+ SYM, 29, 45, 41, 42, 27, 51, 47, 40, 26, 57, 54, 38, 44, 31, 32, /* 4X */
+ 46, 59, 34, 33, 35, 43, 50, 52, 53, 49, 56,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 29, 45, 41, 42, 27, 51, 47, 40, 26, 57, 54, 38, 44, 31, 32, /* 6X */
+ 46, 59, 34, 33, 35, 43, 50, 52, 53, 49, 56,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ CTR,CTR,SYM, 77,SYM,SYM,SYM,SYM, 78,SYM, 79,SYM, 68,CTR,CTR,CTR, /* 8X */
+ CTR,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 80,SYM, 68,CTR,CTR, 81, /* 9X */
+ CTR,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 82,SYM,SYM,CTR,SYM,SYM, /* AX */
+ SYM,SYM,NUM,NUM,SYM, 83,SYM,SYM,SYM,NUM, 84,SYM,SYM,SYM,SYM,SYM, /* BX */
+ 0, 10, 14, 9, 2, 11, 22, 13, 1, 16, 7, 6, 8, 5, 20, 55, /* CX */
+ 4, 3, 15, 12, 24, 23, 28, 25, 17, 36, 19, 30, 21, 48, 18, 39, /* DX */
+ 37, 85, 86, 87, 88, 89, 76, 67, 71, 61, 74, 90, 73, 66, 72, 91, /* EX */
+ 65, 92, 93, 64, 94, 75, 60,SYM, 70, 95, 69, 96, 58, 97, 63, 98, /* FX */
+};
+/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
+
+static const unsigned char Georgian_Ps_CharToOrderMap[] =
+{
+ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
+ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
+ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
+ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
+ SYM, 29, 45, 41, 42, 27, 51, 47, 40, 26, 57, 54, 38, 44, 31, 32, /* 4X */
+ 46, 59, 34, 33, 35, 43, 50, 52, 53, 49, 56,SYM,SYM,SYM,SYM,SYM, /* 5X */
+ SYM, 29, 45, 41, 42, 27, 51, 47, 40, 26, 57, 54, 38, 44, 31, 32, /* 6X */
+ 46, 59, 34, 33, 35, 43, 50, 52, 53, 49, 56,SYM,SYM,SYM,SYM,CTR, /* 7X */
+ CTR,CTR,SYM, 99,SYM,SYM,SYM,SYM,100,SYM,101,SYM, 68,CTR,CTR,CTR, /* 8X */
+ CTR,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,102,SYM, 68,CTR,CTR,103, /* 9X */
+ CTR,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,104,SYM,SYM,CTR,SYM,SYM, /* AX */
+ SYM,SYM,NUM,NUM,SYM,105,SYM,SYM,SYM,NUM,106,SYM,SYM,SYM,SYM,SYM, /* BX */
+ 0, 10, 14, 9, 2, 11, 22,107, 13, 1, 16, 7, 6, 8,108, 5, /* CX */
+ 20, 55, 4, 3, 15,109, 12, 24, 23, 28, 25, 17, 36, 19, 30, 21, /* DX */
+ 48, 18,110, 39, 37,111, 62, 67, 71, 61, 74,112, 73, 66, 72,113, /* EX */
+ 65,114,115, 64,116, 75, 60,SYM, 70,117, 69,118, 58,119, 63,120, /* FX */
+};
+/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
+
+static const int Unicode_Char_size = 112;
+static const unsigned int Unicode_CharOrder[] =
+{
+ 65, 29, 66, 45, 67, 41, 68, 42, 69, 27, 70, 51, 71, 47, 72, 40,
+ 73, 26, 75, 54, 76, 38, 77, 44, 78, 31, 79, 32, 80, 46, 82, 34,
+ 83, 33, 84, 35, 85, 43, 86, 50, 87, 52, 88, 53, 89, 49, 97, 29,
+ 98, 45, 99, 41, 100, 42, 101, 27, 102, 51, 103, 47, 104, 40, 105, 26,
+ 107, 54, 108, 38, 109, 44, 110, 31, 111, 32, 112, 46, 114, 34, 115, 33,
+ 116, 35, 117, 43, 118, 50, 119, 52, 120, 53, 121, 49, 4304, 0,4305, 10,
+ 4306, 14, 4307, 9, 4308, 2, 4309, 11, 4310, 22, 4311, 13, 4312, 1,4313, 16,
+ 4314, 7, 4315, 6, 4316, 8, 4317, 5, 4318, 20, 4319, 55, 4320, 4,4321, 3,
+ 4322, 15, 4323, 12, 4324, 24, 4325, 23, 4326, 28, 4327, 25, 4328, 17,4329, 36,
+ 4330, 19, 4331, 30, 4332, 21, 4333, 48, 4334, 18, 4335, 39, 4336, 37,7312, 0,
+ 7313, 10, 7314, 14, 7315, 9, 7316, 2, 7317, 11, 7318, 22, 7319, 13,7320, 1,
+ 7321, 16, 7322, 7, 7323, 6, 7324, 8, 7325, 5, 7326, 20, 7327, 55,7328, 4,
+ 7329, 3, 7330, 15, 7331, 12, 7332, 24, 7333, 23, 7334, 28, 7335, 25,7336, 17,
+ 7337, 36, 7338, 19, 7339, 30, 7340, 21, 7341, 48, 7342, 18, 7343, 39,7344, 37,
+};
+
+
+ /* Model Table:
+ * Total considered sequences: 1485 / 3136
+ * - Positive sequences: first 819 (0.9950126614517769)
+ * - Probable sequences: next 240 (1059-819) (0.003988409500368384)
+ * - Neutral sequences: last 2077 (0.000998929047854702)
+ * - Negative sequences: 1651 (off-ratio)
+ * Negative sequences: TODO
+ */
+static const PRUint8 GeorgianLangModel[] =
+{
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,
+ 3,0,3,0,0,0,0,0,3,3,0,3,0,0,0,1,0,0,0,0,3,0,0,0,0,0,0,3,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,
+ 3,0,3,0,0,0,0,0,3,2,0,3,0,0,0,0,0,0,0,0,3,0,0,0,1,0,0,3,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,
+ 3,0,3,0,1,0,0,0,3,3,0,3,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,3,
+ 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,0,
+ 1,0,3,0,0,0,0,0,1,2,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
+ 3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,
+ 3,0,3,0,0,0,0,0,3,2,0,3,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,
+ 3,0,3,0,0,0,0,0,3,3,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,1,
+ 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,
+ 3,0,3,0,0,0,0,0,2,1,0,3,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,3,
+ 3,3,3,3,2,3,3,1,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,0,0,
+ 2,0,1,0,0,0,0,0,2,3,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,
+ 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,0,0,
+ 2,0,3,0,0,0,0,0,2,3,0,3,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,3,
+ 3,3,3,3,3,3,3,3,3,3,1,3,3,2,3,2,1,3,2,0,0,2,2,1,0,1,0,0,
+ 3,0,1,0,0,0,0,0,1,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
+ 3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,1,3,0,0,1,0,3,1,0,0,0,0,
+ 1,0,0,0,0,0,0,0,1,3,0,1,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,
+ 3,3,3,3,3,3,3,3,3,3,1,1,3,3,3,3,3,3,3,2,1,1,3,1,2,2,0,0,
+ 2,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,
+ 3,0,3,0,0,0,0,0,3,3,0,2,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,2,3,3,1,2,2,3,3,0,0,0,0,
+ 1,0,0,0,0,0,0,0,0,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,3,3,3,3,3,3,3,2,3,3,2,1,3,1,3,0,1,1,1,3,1,2,0,0,0,
+ 0,0,0,0,0,0,0,0,1,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,3,3,3,3,3,2,2,3,3,3,3,2,2,3,3,1,1,1,2,3,0,2,3,0,0,
+ 0,0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,3,3,3,3,3,3,2,2,3,3,2,1,1,1,3,1,0,1,0,2,1,2,0,0,0,
+ 0,0,1,0,0,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,2,3,3,3,3,3,0,0,3,3,3,0,3,3,3,2,0,3,0,1,3,2,0,0,0,
+ 0,0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,3,1,3,1,1,0,1,3,0,0,2,0,0,
+ 0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
+ 3,3,3,2,3,3,3,3,3,3,2,3,3,2,1,0,2,2,3,0,0,1,2,0,1,0,0,0,
+ 0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,3,3,3,2,3,1,0,1,1,3,1,0,3,1,1,1,1,1,0,2,0,1,3,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,0,3,3,3,3,1,0,0,3,3,0,0,0,1,0,0,0,0,3,0,1,1,3,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,3,3,3,3,1,3,2,2,3,3,1,2,1,0,3,0,0,0,0,2,0,0,0,0,0,
+ 3,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,3,3,3,3,2,3,1,0,3,3,2,1,3,0,2,0,3,1,0,0,1,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,3,3,3,2,3,3,2,1,1,3,2,2,2,2,2,3,3,0,0,0,1,1,0,0,0,
+ 1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,1,3,3,3,3,3,2,1,3,3,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,
+ 0,3,0,3,3,3,3,3,0,0,3,0,2,3,3,2,3,3,2,3,0,2,3,3,1,3,3,0,
+ 1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,3,3,
+ 0,3,0,3,3,3,3,3,0,0,3,0,2,3,3,3,3,3,3,3,0,3,3,2,3,3,2,0,
+ 3,3,3,3,3,3,3,3,3,3,1,3,3,2,1,1,1,2,0,0,0,3,2,1,2,0,0,0,
+ 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
+ 0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,2,
+ 0,2,1,3,1,3,3,3,0,0,3,0,2,3,3,3,3,3,3,3,0,3,3,3,2,2,2,0,
+ 3,3,3,1,3,3,3,3,3,0,0,3,3,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,
+ 3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,
+ 0,3,0,3,3,3,2,3,0,0,2,0,2,3,3,3,1,3,1,3,0,2,1,2,1,1,2,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,
+ 0,2,0,3,3,3,3,3,0,0,3,0,3,3,3,3,3,3,3,3,0,2,3,3,3,3,3,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,3,3,
+ 0,3,0,2,3,3,2,3,0,0,3,0,3,3,2,2,3,3,3,2,0,3,1,1,2,0,3,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,
+ 0,3,0,3,3,3,3,3,0,0,3,0,2,3,3,3,3,3,2,3,0,3,2,2,2,0,3,0,
+ 0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,
+ 0,3,0,1,3,3,3,3,0,0,3,1,3,2,1,3,2,1,1,1,0,3,1,2,2,0,1,0,
+ 3,3,3,1,3,3,1,1,3,1,1,3,3,1,0,0,1,1,2,0,1,0,0,2,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 3,3,3,0,1,3,3,2,1,1,1,1,3,1,2,0,2,0,0,1,2,0,1,3,1,3,0,0,
+ 0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,
+ 0,3,0,1,3,3,1,3,0,0,3,0,2,2,3,3,2,2,2,2,0,3,1,2,1,0,2,0,
+ 3,3,3,2,2,3,3,1,2,2,0,3,3,1,3,0,0,2,0,0,0,0,2,0,0,0,0,0,
+ 1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,
+ 0,3,0,3,3,2,3,3,0,0,2,0,1,0,2,3,2,1,1,2,0,3,1,1,1,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,
+ 0,3,0,0,3,2,3,3,0,0,3,0,3,2,2,3,1,2,1,1,0,3,0,1,1,0,3,0,
+ 0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,
+ 0,3,0,2,3,3,2,1,0,0,2,0,1,1,2,3,2,1,1,2,0,2,1,2,1,0,0,0,
+ 0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,
+ 0,3,0,3,2,3,3,3,0,0,3,0,1,3,3,1,3,3,2,3,0,0,1,2,1,1,2,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,
+ 0,3,0,1,3,2,0,2,0,0,2,0,1,2,0,3,3,3,3,0,0,2,0,1,0,0,1,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,
+ 0,3,0,3,3,2,3,1,0,0,3,0,1,2,0,3,0,2,1,0,0,3,0,1,0,0,1,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,
+ 0,3,0,2,3,2,3,2,0,0,3,0,3,0,2,3,1,1,3,0,0,2,0,1,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,
+ 0,3,0,2,3,3,3,1,0,0,3,0,3,0,1,3,0,1,0,2,0,2,0,1,0,0,0,0,
+ 3,3,3,1,3,3,2,1,1,3,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,3,
+ 0,2,0,2,3,3,2,2,0,0,2,0,0,3,1,1,2,3,2,0,0,1,1,1,1,0,1,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,
+ 0,3,0,0,3,1,2,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,
+ 0,3,0,1,3,0,3,2,0,0,2,0,1,0,0,2,1,1,1,1,0,0,0,2,1,0,1,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,
+ 0,3,0,2,3,2,2,1,0,0,1,0,3,0,0,1,1,0,1,0,0,0,1,1,2,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,1,
+ 0,2,0,0,1,0,0,2,0,0,0,0,1,1,0,1,0,0,2,0,0,1,3,2,0,3,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,
+ 0,3,0,2,2,3,2,1,0,0,2,0,1,0,0,2,1,1,0,1,0,2,1,1,1,0,1,0,
+ 3,3,3,1,0,3,1,1,2,0,0,0,3,1,0,1,1,1,0,0,0,0,1,0,0,0,0,0,
+ 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+};
+
+
+const SequenceModel Georgian_AcademyGeorgianModel =
+{
+ Georgian_Academy_CharToOrderMap,
+ GeorgianLangModel,
+ 56,
+ (float)0.9990010709521453,
+ PR_FALSE,
+ "GEORGIAN-ACADEMY",
+ "ka"
+};
+
+const SequenceModel Georgian_PsGeorgianModel =
+{
+ Georgian_Ps_CharToOrderMap,
+ GeorgianLangModel,
+ 56,
+ (float)0.9990010709521453,
+ PR_FALSE,
+ "GEORGIAN-PS",
+ "ka"
+};
+
+const LanguageModel GeorgianModel =
+{
+ "ka",
+ Unicode_CharOrder,
+ 112,
+ GeorgianLangModel,
+ 56,
+ 4,
+ (float)0.4034647649351511,
+ 33,
+ (float)0.03062631944282519,
+};
diff --git a/src/nsLanguageDetector-generated.h b/src/nsLanguageDetector-generated.h
index 39e0936..64054fb 100644
--- a/src/nsLanguageDetector-generated.h
+++ b/src/nsLanguageDetector-generated.h
@@ -38,7 +38,7 @@
#ifndef nsLanguageDetector_h_generated_h__
#define nsLanguageDetector_h_generated_h__
-#define NUM_OF_LANGUAGE_MODELS 37
+#define NUM_OF_LANGUAGE_MODELS 38
extern const LanguageModel ArabicModel;
extern const LanguageModel BelarusianModel;
@@ -60,6 +60,7 @@ extern const LanguageModel HindiModel;
extern const LanguageModel CroatianModel;
extern const LanguageModel HungarianModel;
extern const LanguageModel ItalianModel;
+extern const LanguageModel GeorgianModel;
extern const LanguageModel LithuanianModel;
extern const LanguageModel LatvianModel;
extern const LanguageModel MacedonianModel;
diff --git a/src/nsMBCSGroupProber.cpp b/src/nsMBCSGroupProber.cpp
index e9d7548..9512f3a 100644
--- a/src/nsMBCSGroupProber.cpp
+++ b/src/nsMBCSGroupProber.cpp
@@ -105,6 +105,7 @@ nsMBCSGroupProber::nsMBCSGroupProber(PRUint32 aLanguageFilter)
langDetectors[i][j++] = new nsLanguageDetector(&FinnishModel);
langDetectors[i][j++] = new nsLanguageDetector(&FrenchModel);
langDetectors[i][j++] = new nsLanguageDetector(&GermanModel);
+ langDetectors[i][j++] = new nsLanguageDetector(&GeorgianModel);
langDetectors[i][j++] = new nsLanguageDetector(&GreekModel);
langDetectors[i][j++] = new nsLanguageDetector(&HebrewModel);
langDetectors[i][j++] = new nsLanguageDetector(&HindiModel);
diff --git a/src/nsSBCSGroupProber.cpp b/src/nsSBCSGroupProber.cpp
index ba054c8..74340e3 100644
--- a/src/nsSBCSGroupProber.cpp
+++ b/src/nsSBCSGroupProber.cpp
@@ -240,6 +240,9 @@ nsSBCSGroupProber::nsSBCSGroupProber()
mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_1CatalanModel);
mProbers[n++] = new nsSingleByteCharSetProber(&Windows_1252CatalanModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Georgian_AcademyGeorgianModel);
+ mProbers[n++] = new nsSingleByteCharSetProber(&Georgian_PsGeorgianModel);
+
assert (n_sbcs_probers == n);
Reset();
diff --git a/src/nsSBCharSetProber-generated.h b/src/nsSBCharSetProber-generated.h
index e110f08..ef15b75 100644
--- a/src/nsSBCharSetProber-generated.h
+++ b/src/nsSBCharSetProber-generated.h
@@ -38,7 +38,7 @@
#ifndef nsSingleByteCharSetProber_generated_h__
#define nsSingleByteCharSetProber_generated_h__
-#define NUM_OF_SEQUENCE_MODELS 118
+#define NUM_OF_SEQUENCE_MODELS 120
extern const SequenceModel Iso_8859_6ArabicModel;
extern const SequenceModel Windows_1256ArabicModel;
@@ -121,6 +121,9 @@ extern const SequenceModel Iso_8859_9ItalianModel;
extern const SequenceModel Iso_8859_15ItalianModel;
extern const SequenceModel Windows_1252ItalianModel;
+extern const SequenceModel Georgian_AcademyGeorgianModel;
+extern const SequenceModel Georgian_PsGeorgianModel;
+
extern const SequenceModel Iso_8859_4LithuanianModel;
extern const SequenceModel Iso_8859_10LithuanianModel;
extern const SequenceModel Iso_8859_13LithuanianModel;