diff options
author | Jehan <jehan@girinstud.io> | 2022-12-20 14:23:24 +0100 |
---|---|---|
committer | Jehan <jehan@girinstud.io> | 2022-12-20 14:28:29 +0100 |
commit | 7875272a8c61fdccba1db6b3c29ce248cc5fd65f (patch) | |
tree | b51d6fca83d3a9978b89ca38de834767cf50665b /src | |
parent | c843d23a17eebaa69be56565c5963471d5f1295f (diff) |
script, src, test: new Georgian support.
For charsets UTF-8, GEORGIAN-ACADEMY and GEORGIAN-PS. The 2 GEORGIAN-*
sets were generated thanks to the new create-table.py script.
Test text comes from page 'ვირზაზუნა' page of Wikipedia in Georgian.
Diffstat (limited to 'src')
-rw-r--r-- | src/CMakeLists.txt | 1 | ||||
-rw-r--r-- | src/LangModels/LangGeorgianModel.cpp | 288 | ||||
-rw-r--r-- | src/nsLanguageDetector-generated.h | 3 | ||||
-rw-r--r-- | src/nsMBCSGroupProber.cpp | 1 | ||||
-rw-r--r-- | src/nsSBCSGroupProber.cpp | 3 | ||||
-rw-r--r-- | src/nsSBCharSetProber-generated.h | 5 |
6 files changed, 299 insertions, 2 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 17fd980..1226a4f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -20,6 +20,7 @@ set( LangModels/LangFrenchModel.cpp LangModels/LangDanishModel.cpp LangModels/LangGermanModel.cpp + LangModels/LangGeorgianModel.cpp LangModels/LangGreekModel.cpp LangModels/LangHungarianModel.cpp LangModels/LangHebrewModel.cpp diff --git a/src/LangModels/LangGeorgianModel.cpp b/src/LangModels/LangGeorgianModel.cpp new file mode 100644 index 0000000..7da31ee --- /dev/null +++ b/src/LangModels/LangGeorgianModel.cpp @@ -0,0 +1,288 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Mozilla Communicator client code. + * + * The Initial Developer of the Original Code is + * Netscape Communications Corporation. + * Portions created by the Initial Developer are Copyright (C) 1998 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +#include "../nsSBCharSetProber.h" +#include "../nsSBCharSetProber-generated.h" +#include "../nsLanguageDetector.h" + +#include "../nsLanguageDetector-generated.h" + +/********* Language model for: Georgian *********/ + +/** + * Generated by BuildLangModel.py + * On: 2022-12-20 12:56:27.859568 + **/ + + /* Character Mapping Table: + * ILL: illegal character. + * CTR: control character specific to the charset. + * RET: carriage/return. + * SYM: symbol (punctuation) that does not belong to word. + * NUM: 0 - 9. + * + * Other characters are ordered by probabilities + * (0 is the most common character in the language). + * + * Orders are generic to a language. So the codepoint with order X in + * CHARSET1 maps to the same character as the codepoint with the same + * order X in CHARSET2 for the same language. + * As such, it is possible to get missing order. For instance the + * ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1 + * even though they are both used for French. Same for the euro sign. + */ + static const unsigned char Georgian_Academy_CharToOrderMap[] = +{ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ + NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ + SYM, 29, 45, 41, 42, 27, 51, 47, 40, 26, 57, 54, 38, 44, 31, 32, /* 4X */ + 46, 59, 34, 33, 35, 43, 50, 52, 53, 49, 56,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 29, 45, 41, 42, 27, 51, 47, 40, 26, 57, 54, 38, 44, 31, 32, /* 6X */ + 46, 59, 34, 33, 35, 43, 50, 52, 53, 49, 56,SYM,SYM,SYM,SYM,CTR, /* 7X */ + CTR,CTR,SYM, 77,SYM,SYM,SYM,SYM, 78,SYM, 79,SYM, 68,CTR,CTR,CTR, /* 8X */ + CTR,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 80,SYM, 68,CTR,CTR, 81, /* 9X */ + CTR,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 82,SYM,SYM,CTR,SYM,SYM, /* AX */ + SYM,SYM,NUM,NUM,SYM, 83,SYM,SYM,SYM,NUM, 84,SYM,SYM,SYM,SYM,SYM, /* BX */ + 0, 10, 14, 9, 2, 11, 22, 13, 1, 16, 7, 6, 8, 5, 20, 55, /* CX */ + 4, 3, 15, 12, 24, 23, 28, 25, 17, 36, 19, 30, 21, 48, 18, 39, /* DX */ + 37, 85, 86, 87, 88, 89, 76, 67, 71, 61, 74, 90, 73, 66, 72, 91, /* EX */ + 65, 92, 93, 64, 94, 75, 60,SYM, 70, 95, 69, 96, 58, 97, 63, 98, /* FX */ +}; +/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ + +static const unsigned char Georgian_Ps_CharToOrderMap[] = +{ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ + NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ + SYM, 29, 45, 41, 42, 27, 51, 47, 40, 26, 57, 54, 38, 44, 31, 32, /* 4X */ + 46, 59, 34, 33, 35, 43, 50, 52, 53, 49, 56,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 29, 45, 41, 42, 27, 51, 47, 40, 26, 57, 54, 38, 44, 31, 32, /* 6X */ + 46, 59, 34, 33, 35, 43, 50, 52, 53, 49, 56,SYM,SYM,SYM,SYM,CTR, /* 7X */ + CTR,CTR,SYM, 99,SYM,SYM,SYM,SYM,100,SYM,101,SYM, 68,CTR,CTR,CTR, /* 8X */ + CTR,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,102,SYM, 68,CTR,CTR,103, /* 9X */ + CTR,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,104,SYM,SYM,CTR,SYM,SYM, /* AX */ + SYM,SYM,NUM,NUM,SYM,105,SYM,SYM,SYM,NUM,106,SYM,SYM,SYM,SYM,SYM, /* BX */ + 0, 10, 14, 9, 2, 11, 22,107, 13, 1, 16, 7, 6, 8,108, 5, /* CX */ + 20, 55, 4, 3, 15,109, 12, 24, 23, 28, 25, 17, 36, 19, 30, 21, /* DX */ + 48, 18,110, 39, 37,111, 62, 67, 71, 61, 74,112, 73, 66, 72,113, /* EX */ + 65,114,115, 64,116, 75, 60,SYM, 70,117, 69,118, 58,119, 63,120, /* FX */ +}; +/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ + +static const int Unicode_Char_size = 112; +static const unsigned int Unicode_CharOrder[] = +{ + 65, 29, 66, 45, 67, 41, 68, 42, 69, 27, 70, 51, 71, 47, 72, 40, + 73, 26, 75, 54, 76, 38, 77, 44, 78, 31, 79, 32, 80, 46, 82, 34, + 83, 33, 84, 35, 85, 43, 86, 50, 87, 52, 88, 53, 89, 49, 97, 29, + 98, 45, 99, 41, 100, 42, 101, 27, 102, 51, 103, 47, 104, 40, 105, 26, + 107, 54, 108, 38, 109, 44, 110, 31, 111, 32, 112, 46, 114, 34, 115, 33, + 116, 35, 117, 43, 118, 50, 119, 52, 120, 53, 121, 49, 4304, 0,4305, 10, + 4306, 14, 4307, 9, 4308, 2, 4309, 11, 4310, 22, 4311, 13, 4312, 1,4313, 16, + 4314, 7, 4315, 6, 4316, 8, 4317, 5, 4318, 20, 4319, 55, 4320, 4,4321, 3, + 4322, 15, 4323, 12, 4324, 24, 4325, 23, 4326, 28, 4327, 25, 4328, 17,4329, 36, + 4330, 19, 4331, 30, 4332, 21, 4333, 48, 4334, 18, 4335, 39, 4336, 37,7312, 0, + 7313, 10, 7314, 14, 7315, 9, 7316, 2, 7317, 11, 7318, 22, 7319, 13,7320, 1, + 7321, 16, 7322, 7, 7323, 6, 7324, 8, 7325, 5, 7326, 20, 7327, 55,7328, 4, + 7329, 3, 7330, 15, 7331, 12, 7332, 24, 7333, 23, 7334, 28, 7335, 25,7336, 17, + 7337, 36, 7338, 19, 7339, 30, 7340, 21, 7341, 48, 7342, 18, 7343, 39,7344, 37, +}; + + + /* Model Table: + * Total considered sequences: 1485 / 3136 + * - Positive sequences: first 819 (0.9950126614517769) + * - Probable sequences: next 240 (1059-819) (0.003988409500368384) + * - Neutral sequences: last 2077 (0.000998929047854702) + * - Negative sequences: 1651 (off-ratio) + * Negative sequences: TODO + */ +static const PRUint8 GeorgianLangModel[] = +{ + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0, + 3,0,3,0,0,0,0,0,3,3,0,3,0,0,0,1,0,0,0,0,3,0,0,0,0,0,0,3, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0, + 3,0,3,0,0,0,0,0,3,2,0,3,0,0,0,0,0,0,0,0,3,0,0,0,1,0,0,3, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0, + 3,0,3,0,1,0,0,0,3,3,0,3,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,3, + 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,0, + 1,0,3,0,0,0,0,0,1,2,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0, + 3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0, + 3,0,3,0,0,0,0,0,3,2,0,3,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0, + 3,0,3,0,0,0,0,0,3,3,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,1, + 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0, + 3,0,3,0,0,0,0,0,2,1,0,3,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,3, + 3,3,3,3,2,3,3,1,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,0,0, + 2,0,1,0,0,0,0,0,2,3,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1, + 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,0,0, + 2,0,3,0,0,0,0,0,2,3,0,3,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,3, + 3,3,3,3,3,3,3,3,3,3,1,3,3,2,3,2,1,3,2,0,0,2,2,1,0,1,0,0, + 3,0,1,0,0,0,0,0,1,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, + 3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,1,3,0,0,1,0,3,1,0,0,0,0, + 1,0,0,0,0,0,0,0,1,3,0,1,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,1,1,3,3,3,3,3,3,3,2,1,1,3,1,2,2,0,0, + 2,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0, + 3,0,3,0,0,0,0,0,3,3,0,2,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,2,3,3,1,2,2,3,3,0,0,0,0, + 1,0,0,0,0,0,0,0,0,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,2,3,3,2,1,3,1,3,0,1,1,1,3,1,2,0,0,0, + 0,0,0,0,0,0,0,0,1,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,2,2,3,3,3,3,2,2,3,3,1,1,1,2,3,0,2,3,0,0, + 0,0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,2,2,3,3,2,1,1,1,3,1,0,1,0,2,1,2,0,0,0, + 0,0,1,0,0,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,2,3,3,3,3,3,0,0,3,3,3,0,3,3,3,2,0,3,0,1,3,2,0,0,0, + 0,0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,3,1,3,1,1,0,1,3,0,0,2,0,0, + 0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, + 3,3,3,2,3,3,3,3,3,3,2,3,3,2,1,0,2,2,3,0,0,1,2,0,1,0,0,0, + 0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,2,3,1,0,1,1,3,1,0,3,1,1,1,1,1,0,2,0,1,3,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,0,3,3,3,3,1,0,0,3,3,0,0,0,1,0,0,0,0,3,0,1,1,3,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,1,3,2,2,3,3,1,2,1,0,3,0,0,0,0,2,0,0,0,0,0, + 3,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,2,3,1,0,3,3,2,1,3,0,2,0,3,1,0,0,1,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,2,3,3,2,1,1,3,2,2,2,2,2,3,3,0,0,0,1,1,0,0,0, + 1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,1,3,3,3,3,3,2,1,3,3,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3, + 0,3,0,3,3,3,3,3,0,0,3,0,2,3,3,2,3,3,2,3,0,2,3,3,1,3,3,0, + 1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,3,3, + 0,3,0,3,3,3,3,3,0,0,3,0,2,3,3,3,3,3,3,3,0,3,3,2,3,3,2,0, + 3,3,3,3,3,3,3,3,3,3,1,3,3,2,1,1,1,2,0,0,0,3,2,1,2,0,0,0, + 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0, + 0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,2, + 0,2,1,3,1,3,3,3,0,0,3,0,2,3,3,3,3,3,3,3,0,3,3,3,2,2,2,0, + 3,3,3,1,3,3,3,3,3,0,0,3,3,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0, + 3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3, + 0,3,0,3,3,3,2,3,0,0,2,0,2,3,3,3,1,3,1,3,0,2,1,2,1,1,2,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3, + 0,2,0,3,3,3,3,3,0,0,3,0,3,3,3,3,3,3,3,3,0,2,3,3,3,3,3,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,3,3, + 0,3,0,2,3,3,2,3,0,0,3,0,3,3,2,2,3,3,3,2,0,3,1,1,2,0,3,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3, + 0,3,0,3,3,3,3,3,0,0,3,0,2,3,3,3,3,3,2,3,0,3,2,2,2,0,3,0, + 0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3, + 0,3,0,1,3,3,3,3,0,0,3,1,3,2,1,3,2,1,1,1,0,3,1,2,2,0,1,0, + 3,3,3,1,3,3,1,1,3,1,1,3,3,1,0,0,1,1,2,0,1,0,0,2,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,0,1,3,3,2,1,1,1,1,3,1,2,0,2,0,0,1,2,0,1,3,1,3,0,0, + 0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3, + 0,3,0,1,3,3,1,3,0,0,3,0,2,2,3,3,2,2,2,2,0,3,1,2,1,0,2,0, + 3,3,3,2,2,3,3,1,2,2,0,3,3,1,3,0,0,2,0,0,0,0,2,0,0,0,0,0, + 1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3, + 0,3,0,3,3,2,3,3,0,0,2,0,1,0,2,3,2,1,1,2,0,3,1,1,1,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3, + 0,3,0,0,3,2,3,3,0,0,3,0,3,2,2,3,1,2,1,1,0,3,0,1,1,0,3,0, + 0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3, + 0,3,0,2,3,3,2,1,0,0,2,0,1,1,2,3,2,1,1,2,0,2,1,2,1,0,0,0, + 0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3, + 0,3,0,3,2,3,3,3,0,0,3,0,1,3,3,1,3,3,2,3,0,0,1,2,1,1,2,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3, + 0,3,0,1,3,2,0,2,0,0,2,0,1,2,0,3,3,3,3,0,0,2,0,1,0,0,1,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3, + 0,3,0,3,3,2,3,1,0,0,3,0,1,2,0,3,0,2,1,0,0,3,0,1,0,0,1,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3, + 0,3,0,2,3,2,3,2,0,0,3,0,3,0,2,3,1,1,3,0,0,2,0,1,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3, + 0,3,0,2,3,3,3,1,0,0,3,0,3,0,1,3,0,1,0,2,0,2,0,1,0,0,0,0, + 3,3,3,1,3,3,2,1,1,3,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,3, + 0,2,0,2,3,3,2,2,0,0,2,0,0,3,1,1,2,3,2,0,0,1,1,1,1,0,1,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3, + 0,3,0,0,3,1,2,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3, + 0,3,0,1,3,0,3,2,0,0,2,0,1,0,0,2,1,1,1,1,0,0,0,2,1,0,1,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3, + 0,3,0,2,3,2,2,1,0,0,1,0,3,0,0,1,1,0,1,0,0,0,1,1,2,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,1, + 0,2,0,0,1,0,0,2,0,0,0,0,1,1,0,1,0,0,2,0,0,1,3,2,0,3,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3, + 0,3,0,2,2,3,2,1,0,0,2,0,1,0,0,2,1,1,0,1,0,2,1,1,1,0,1,0, + 3,3,3,1,0,3,1,1,2,0,0,0,3,1,0,1,1,1,0,0,0,0,1,0,0,0,0,0, + 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +}; + + +const SequenceModel Georgian_AcademyGeorgianModel = +{ + Georgian_Academy_CharToOrderMap, + GeorgianLangModel, + 56, + (float)0.9990010709521453, + PR_FALSE, + "GEORGIAN-ACADEMY", + "ka" +}; + +const SequenceModel Georgian_PsGeorgianModel = +{ + Georgian_Ps_CharToOrderMap, + GeorgianLangModel, + 56, + (float)0.9990010709521453, + PR_FALSE, + "GEORGIAN-PS", + "ka" +}; + +const LanguageModel GeorgianModel = +{ + "ka", + Unicode_CharOrder, + 112, + GeorgianLangModel, + 56, + 4, + (float)0.4034647649351511, + 33, + (float)0.03062631944282519, +}; diff --git a/src/nsLanguageDetector-generated.h b/src/nsLanguageDetector-generated.h index 39e0936..64054fb 100644 --- a/src/nsLanguageDetector-generated.h +++ b/src/nsLanguageDetector-generated.h @@ -38,7 +38,7 @@ #ifndef nsLanguageDetector_h_generated_h__ #define nsLanguageDetector_h_generated_h__ -#define NUM_OF_LANGUAGE_MODELS 37 +#define NUM_OF_LANGUAGE_MODELS 38 extern const LanguageModel ArabicModel; extern const LanguageModel BelarusianModel; @@ -60,6 +60,7 @@ extern const LanguageModel HindiModel; extern const LanguageModel CroatianModel; extern const LanguageModel HungarianModel; extern const LanguageModel ItalianModel; +extern const LanguageModel GeorgianModel; extern const LanguageModel LithuanianModel; extern const LanguageModel LatvianModel; extern const LanguageModel MacedonianModel; diff --git a/src/nsMBCSGroupProber.cpp b/src/nsMBCSGroupProber.cpp index e9d7548..9512f3a 100644 --- a/src/nsMBCSGroupProber.cpp +++ b/src/nsMBCSGroupProber.cpp @@ -105,6 +105,7 @@ nsMBCSGroupProber::nsMBCSGroupProber(PRUint32 aLanguageFilter) langDetectors[i][j++] = new nsLanguageDetector(&FinnishModel); langDetectors[i][j++] = new nsLanguageDetector(&FrenchModel); langDetectors[i][j++] = new nsLanguageDetector(&GermanModel); + langDetectors[i][j++] = new nsLanguageDetector(&GeorgianModel); langDetectors[i][j++] = new nsLanguageDetector(&GreekModel); langDetectors[i][j++] = new nsLanguageDetector(&HebrewModel); langDetectors[i][j++] = new nsLanguageDetector(&HindiModel); diff --git a/src/nsSBCSGroupProber.cpp b/src/nsSBCSGroupProber.cpp index ba054c8..74340e3 100644 --- a/src/nsSBCSGroupProber.cpp +++ b/src/nsSBCSGroupProber.cpp @@ -240,6 +240,9 @@ nsSBCSGroupProber::nsSBCSGroupProber() mProbers[n++] = new nsSingleByteCharSetProber(&Iso_8859_1CatalanModel); mProbers[n++] = new nsSingleByteCharSetProber(&Windows_1252CatalanModel); + mProbers[n++] = new nsSingleByteCharSetProber(&Georgian_AcademyGeorgianModel); + mProbers[n++] = new nsSingleByteCharSetProber(&Georgian_PsGeorgianModel); + assert (n_sbcs_probers == n); Reset(); diff --git a/src/nsSBCharSetProber-generated.h b/src/nsSBCharSetProber-generated.h index e110f08..ef15b75 100644 --- a/src/nsSBCharSetProber-generated.h +++ b/src/nsSBCharSetProber-generated.h @@ -38,7 +38,7 @@ #ifndef nsSingleByteCharSetProber_generated_h__ #define nsSingleByteCharSetProber_generated_h__ -#define NUM_OF_SEQUENCE_MODELS 118 +#define NUM_OF_SEQUENCE_MODELS 120 extern const SequenceModel Iso_8859_6ArabicModel; extern const SequenceModel Windows_1256ArabicModel; @@ -121,6 +121,9 @@ extern const SequenceModel Iso_8859_9ItalianModel; extern const SequenceModel Iso_8859_15ItalianModel; extern const SequenceModel Windows_1252ItalianModel; +extern const SequenceModel Georgian_AcademyGeorgianModel; +extern const SequenceModel Georgian_PsGeorgianModel; + extern const SequenceModel Iso_8859_4LithuanianModel; extern const SequenceModel Iso_8859_10LithuanianModel; extern const SequenceModel Iso_8859_13LithuanianModel; |