diff options
author | Luo Jinghua <sunmoon1997@gmail.com> | 2010-01-07 16:27:41 +0800 |
---|---|---|
committer | Luo Jinghua <sunmoon1997@gmail.com> | 2010-01-07 16:27:41 +0800 |
commit | 16001b547a8f27aceb1691efb3e07ab9c3af0030 (patch) | |
tree | 9f804667053a6a4ea3e10aac11ae6c9f2cb5af87 /charsetalias.c |
Initial commit of uniconv
Diffstat (limited to 'charsetalias.c')
-rw-r--r-- | charsetalias.c | 499 |
1 files changed, 499 insertions, 0 deletions
diff --git a/charsetalias.c b/charsetalias.c new file mode 100644 index 0000000..edfe863 --- /dev/null +++ b/charsetalias.c @@ -0,0 +1,499 @@ +#include "charsetalias.h" + +#include <ctype.h> +#include <string.h> + +#if defined(WIN32) || defined(_WIN32) +#define strcasecmp(x, y) stricmp(x, y) +#endif + +const char* +get_canonical_charset(const char *charset) +{ + static const char *aliases[] = { + "646" , "ascii", + "ansi_x3.4_1968" , "ascii", + "ansi_x3_4_1968" , "ascii", /* some email headers use this non-standard name */ + "ansi_x3.4_1986" , "ascii", + "cp367" , "ascii", + "csascii" , "ascii", + "ibm367" , "ascii", + "iso646_us" , "ascii", + "iso_646.irv_1991" , "ascii", + "iso_ir_6" , "ascii", + "us" , "ascii", + "us_ascii" , "ascii", + + /* big5 codec */ + "big5_tw" , "big5", + "csbig5" , "big5", + + /* big5hkscs codec */ + "big5_hkscs" , "big5hkscs", + "hkscs" , "big5hkscs", + + /* cp037 codec */ + "037" , "cp037", + "csibm037" , "cp037", + "ebcdic_cp_ca" , "cp037", + "ebcdic_cp_nl" , "cp037", + "ebcdic_cp_us" , "cp037", + "ebcdic_cp_wt" , "cp037", + "ibm037" , "cp037", + "ibm039" , "cp037", + + /* cp1026 codec */ + "1026" , "cp1026", + "csibm1026" , "cp1026", + "ibm1026" , "cp1026", + + /* cp1140 codec */ + "1140" , "cp1140", + "ibm1140" , "cp1140", + + /* cp1250 codec */ + "1250" , "cp1250", + "windows_1250" , "cp1250", + + /* cp1251 codec */ + "1251" , "cp1251", + "windows_1251" , "cp1251", + + /* cp1252 codec */ + "1252" , "cp1252", + "windows_1252" , "cp1252", + + /* cp1253 codec */ + "1253" , "cp1253", + "windows_1253" , "cp1253", + + /* cp1254 codec */ + "1254" , "cp1254", + "windows_1254" , "cp1254", + + /* cp1255 codec */ + "1255" , "cp1255", + "windows_1255" , "cp1255", + + /* cp1256 codec */ + "1256" , "cp1256", + "windows_1256" , "cp1256", + + /* cp1257 codec */ + "1257" , "cp1257", + "windows_1257" , "cp1257", + + /* cp1258 codec */ + "1258" , "cp1258", + "windows_1258" , "cp1258", + + /* cp424 codec */ + "424" , "cp424", + "csibm424" , "cp424", + "ebcdic_cp_he" , "cp424", + "ibm424" , "cp424", + + /* cp437 codec */ + "437" , "cp437", + "cspc8codepage437" , "cp437", + "ibm437" , "cp437", + + /* cp500 codec */ + "500" , "cp500", + "csibm500" , "cp500", + "ebcdic_cp_be" , "cp500", + "ebcdic_cp_ch" , "cp500", + "ibm500" , "cp500", + + /* cp775 codec */ + "775" , "cp775", + "cspc775baltic" , "cp775", + "ibm775" , "cp775", + + /* cp850 codec */ + "850" , "cp850", + "cspc850multilingual" , "cp850", + "ibm850" , "cp850", + + /* cp852 codec */ + "852" , "cp852", + "cspcp852" , "cp852", + "ibm852" , "cp852", + + /* cp855 codec */ + "855" , "cp855", + "csibm855" , "cp855", + "ibm855" , "cp855", + + /* cp857 codec */ + "857" , "cp857", + "csibm857" , "cp857", + "ibm857" , "cp857", + + /* cp860 codec */ + "860" , "cp860", + "csibm860" , "cp860", + "ibm860" , "cp860", + + /* cp861 codec */ + "861" , "cp861", + "cp_is" , "cp861", + "csibm861" , "cp861", + "ibm861" , "cp861", + + /* cp862 codec */ + "862" , "cp862", + "cspc862latinhebrew" , "cp862", + "ibm862" , "cp862", + + /* cp863 codec */ + "863" , "cp863", + "csibm863" , "cp863", + "ibm863" , "cp863", + + /* cp864 codec */ + "864" , "cp864", + "csibm864" , "cp864", + "ibm864" , "cp864", + + /* cp865 codec */ + "865" , "cp865", + "csibm865" , "cp865", + "ibm865" , "cp865", + + /* cp866 codec */ + "866" , "cp866", + "csibm866" , "cp866", + "ibm866" , "cp866", + + /* cp869 codec */ + "869" , "cp869", + "cp_gr" , "cp869", + "csibm869" , "cp869", + "ibm869" , "cp869", + + /* cp932 codec */ + "932" , "cp932", + "ms932" , "cp932", + "mskanji" , "cp932", + "ms_kanji" , "cp932", + + /* cp949 codec */ + "949" , "cp949", + "ms949" , "cp949", + "uhc" , "cp949", + + /* cp950 codec */ + "950" , "cp950", + "ms950" , "cp950", + + /* euc_jis_2004 codec */ + "jisx0213" , "euc_jis_2004", + "eucjis2004" , "euc_jis_2004", + "euc_jis2004" , "euc_jis_2004", + + /* euc_jisx0213 codec */ + "eucjisx0213" , "euc_jisx0213", + + /* euc_jp codec */ + "eucjp" , "euc_jp", + "ujis" , "euc_jp", + "u_jis" , "euc_jp", + + /* euc_kr codec */ + "euckr" , "euc_kr", + "korean" , "euc_kr", + "ksc5601" , "euc_kr", + "ks_c_5601" , "euc_kr", + "ks_c_5601_1987" , "euc_kr", + "ksx1001" , "euc_kr", + "ks_x_1001" , "euc_kr", + + /* gb18030 codec */ + "gb18030_2000" , "gb18030", + + /* gb2312 codec */ + "chinese" , "gb2312", + "csiso58gb231280" , "gb2312", + "euc_cn" , "gb2312", + "euccn" , "gb2312", + "eucgb2312_cn" , "gb2312", + "gb2312_1980" , "gb2312", + "gb2312_80" , "gb2312", + "iso_ir_58" , "gb2312", + + /* gbk codec */ + "936" , "gbk", + "cp936" , "gbk", + "ms936" , "gbk", + + /* hp_roman8 codec */ + "roman8" , "hp_roman8", + "r8" , "hp_roman8", + "csHPRoman8" , "hp_roman8", + + /* hz codec */ + "hzgb" , "hz", + "hz_gb" , "hz", + "hz_gb_2312" , "hz", + + /* iso2022_jp codec */ + "csiso2022jp" , "iso2022_jp", + "iso2022jp" , "iso2022_jp", + "iso_2022_jp" , "iso2022_jp", + + /* iso2022_jp_1 codec */ + "iso2022jp_1" , "iso2022_jp_1", + "iso_2022_jp_1" , "iso2022_jp_1", + + /* iso2022_jp_2 codec */ + "iso2022jp_2" , "iso2022_jp_2", + "iso_2022_jp_2" , "iso2022_jp_2", + + /* iso2022_jp_2004 codec */ + "iso_2022_jp_2004" , "iso2022_jp_2004", + "iso2022jp_2004" , "iso2022_jp_2004", + + /* iso2022_jp_3 codec */ + "iso2022jp_3" , "iso2022_jp_3", + "iso_2022_jp_3" , "iso2022_jp_3", + + /* iso2022_jp_ext codec */ + "iso2022jp_ext" , "iso2022_jp_ext", + "iso_2022_jp_ext" , "iso2022_jp_ext", + + /* iso2022_kr codec */ + "csiso2022kr" , "iso2022_kr", + "iso2022kr" , "iso2022_kr", + "iso_2022_kr" , "iso2022_kr", + + /* iso8859_10 codec */ + "csisolatin6" , "iso8859_10", + "iso_8859_10" , "iso8859_10", + "iso_8859_10_1992" , "iso8859_10", + "iso_ir_157" , "iso8859_10", + "l6" , "iso8859_10", + "latin6" , "iso8859_10", + + /* iso8859_11 codec */ + "thai" , "iso8859_11", + "iso_8859_11" , "iso8859_11", + "iso_8859_11_2001" , "iso8859_11", + + /* iso8859_13 codec */ + "iso_8859_13" , "iso8859_13", + "l7" , "iso8859_13", + "latin7" , "iso8859_13", + + /* iso8859_14 codec */ + "iso_8859_14" , "iso8859_14", + "iso_8859_14_1998" , "iso8859_14", + "iso_celtic" , "iso8859_14", + "iso_ir_199" , "iso8859_14", + "l8" , "iso8859_14", + "latin8" , "iso8859_14", + + /* iso8859_15 codec */ + "iso_8859_15" , "iso8859_15", + "l9" , "iso8859_15", + "latin9" , "iso8859_15", + + /* iso8859_16 codec */ + "iso_8859_16" , "iso8859_16", + "iso_8859_16_2001" , "iso8859_16", + "iso_ir_226" , "iso8859_16", + "l10" , "iso8859_16", + "latin10" , "iso8859_16", + + /* iso8859_2 codec */ + "csisolatin2" , "iso8859_2", + "iso_8859_2" , "iso8859_2", + "iso_8859_2_1987" , "iso8859_2", + "iso_ir_101" , "iso8859_2", + "l2" , "iso8859_2", + "latin2" , "iso8859_2", + + /* iso8859_3 codec */ + "csisolatin3" , "iso8859_3", + "iso_8859_3" , "iso8859_3", + "iso_8859_3_1988" , "iso8859_3", + "iso_ir_109" , "iso8859_3", + "l3" , "iso8859_3", + "latin3" , "iso8859_3", + + /* iso8859_4 codec */ + "csisolatin4" , "iso8859_4", + "iso_8859_4" , "iso8859_4", + "iso_8859_4_1988" , "iso8859_4", + "iso_ir_110" , "iso8859_4", + "l4" , "iso8859_4", + "latin4" , "iso8859_4", + + /* iso8859_5 codec */ + "csisolatincyrillic" , "iso8859_5", + "cyrillic" , "iso8859_5", + "iso_8859_5" , "iso8859_5", + "iso_8859_5_1988" , "iso8859_5", + "iso_ir_144" , "iso8859_5", + + /* iso8859_6 codec */ + "arabic" , "iso8859_6", + "asmo_708" , "iso8859_6", + "csisolatinarabic" , "iso8859_6", + "ecma_114" , "iso8859_6", + "iso_8859_6" , "iso8859_6", + "iso_8859_6_1987" , "iso8859_6", + "iso_ir_127" , "iso8859_6", + + /* iso8859_7 codec */ + "csisolatingreek" , "iso8859_7", + "ecma_118" , "iso8859_7", + "elot_928" , "iso8859_7", + "greek" , "iso8859_7", + "greek8" , "iso8859_7", + "iso_8859_7" , "iso8859_7", + "iso_8859_7_1987" , "iso8859_7", + "iso_ir_126" , "iso8859_7", + + /* iso8859_8 codec */ + "csisolatinhebrew" , "iso8859_8", + "hebrew" , "iso8859_8", + "iso_8859_8" , "iso8859_8", + "iso_8859_8_1988" , "iso8859_8", + "iso_ir_138" , "iso8859_8", + + /* iso8859_9 codec */ + "csisolatin5" , "iso8859_9", + "iso_8859_9" , "iso8859_9", + "iso_8859_9_1989" , "iso8859_9", + "iso_ir_148" , "iso8859_9", + "l5" , "iso8859_9", + "latin5" , "iso8859_9", + + /* johab codec */ + "cp1361" , "johab", + "ms1361" , "johab", + + /* koi8_r codec */ + "cskoi8r" , "koi8_r", + + /* latin_1 code */ + /* Note that the latin_1 codec is implemented internally in C and a */ + /* lot faster than the charmap codec iso8859_1 which uses the same */ + /* encoding. This is why we discourage the use of the iso8859_1 */ + /* codec and alias it to latin_1 instead. */ + + "8859" , "iso8859_1", + "cp819" , "iso8859_1", + "csisolatin1" , "iso8859_1", + "ibm819" , "iso8859_1", + "iso8859" , "iso8859_1", + "iso8859_1" , "iso8859_1", + "iso_8859_1" , "iso8859_1", + "iso_8859_1_1987" , "iso8859_1", + "iso_ir_100" , "iso8859_1", + "l1" , "iso8859_1", + "latin" , "iso8859_1", + "latin1" , "iso8859_1", + + /* mac_cyrillic codec */ + "maccyrillic" , "mac_cyrillic", + + /* mac_greek codec */ + "macgreek" , "mac_greek", + + /* mac_iceland codec */ + "maciceland" , "mac_iceland", + + /* mac_latin2 codec */ + "maccentraleurope" , "mac_latin2", + "maclatin2" , "mac_latin2", + + /* mac_roman codec */ + "macroman" , "mac_roman", + + /* mac_turkish codec */ + "macturkish" , "mac_turkish", + + /* ptcp154 codec */ + "csptcp154" , "ptcp154", + "pt154" , "ptcp154", + "cp154" , "ptcp154", + "cyrillic-asian" , "ptcp154", + + /* quopri_codec codec */ + "quopri" , "quopri_codec", + "quoted_printable" , "quopri_codec", + "quotedprintable" , "quopri_codec", + + /* shift_jis codec */ + "csshiftjis" , "shift_jis", + "shiftjis" , "shift_jis", + "sjis" , "shift_jis", + "s_jis" , "shift_jis", + + /* shift_jis_2004 codec */ + "shiftjis2004" , "shift_jis_2004", + "sjis_2004" , "shift_jis_2004", + "s_jis_2004" , "shift_jis_2004", + + /* shift_jisx0213 codec */ + "shiftjisx0213" , "shift_jisx0213", + "sjisx0213" , "shift_jisx0213", + "s_jisx0213" , "shift_jisx0213", + + /* tactis codec */ + "tis260" , "tactis", + + /* tis_620 codec */ + "tis620" , "tis_620", + "tis_620_0" , "tis_620", + "tis_620_2529_0" , "tis_620", + "tis_620_2529_1" , "tis_620", + "iso_ir_166" , "tis_620", + + /* utf_16 codec */ + "u16" , "utf_16", + "utf16" , "utf_16", + + /* utf_16_be codec */ + "unicodebigunmarked" , "utf_16_be", + "utf_16be" , "utf_16_be", + + /* utf_16_le codec */ + "unicodelittleunmarked" , "utf_16_le", + "utf_16le" , "utf_16_le", + + /* utf_32 codec */ + "u32" , "utf_32", + "utf32" , "utf_32", + "unicode" , "utf_32", + + /* utf_32_be codec */ + "utf_32be" , "utf_32_be", + + /* utf_32_le codec */ + "utf_32le" , "utf_32_le", + + /* utf_7 codec */ + "u7" , "utf_7", + "utf7" , "utf_7", + "unicode_1_1_utf_7" , "utf_7", + + /* utf_8 codec */ + "u8" , "utf_8", + "utf" , "utf_8", + "utf8" , "utf_8", + "utf8_ucs2" , "utf_8", + "utf8_ucs4" , "utf_8", + NULL, NULL + }; + size_t i = 0; + + for (i = 0; aliases[i]; i += 2) + if (!strcasecmp(charset, aliases[i])) + return aliases[i + 1]; + + return charset; +} + |