diff options
author | pfaedit <pfaedit> | 2007-05-28 14:13:18 +0000 |
---|---|---|
committer | pfaedit <pfaedit> | 2007-05-28 14:13:18 +0000 |
commit | d6691effa993ea2c8afef2579124b9216bafb0fe (patch) | |
tree | 3099154a9332927a065e9a388b9bdf8a0850571f /Unicode | |
parent | d1aae3c0ebdd90f5c00953d43e1427ee4b38df8f (diff) |
Switch from USC2 to USC4 internally.
Diffstat (limited to 'Unicode')
-rw-r--r-- | Unicode/Makefile.dynamic.in | 4 | ||||
-rw-r--r-- | Unicode/Makefile.in | 4 | ||||
-rw-r--r-- | Unicode/backtrns.c | 26 | ||||
-rw-r--r-- | Unicode/cjk.c | 32 | ||||
-rw-r--r-- | Unicode/dump.c | 26 | ||||
-rw-r--r-- | Unicode/gwwiconv.c | 510 | ||||
-rw-r--r-- | Unicode/unialt.c | 12 | ||||
-rw-r--r-- | Unicode/ustring.c | 44 |
8 files changed, 583 insertions, 75 deletions
diff --git a/Unicode/Makefile.dynamic.in b/Unicode/Makefile.dynamic.in index 499d52e4..ec2966c3 100644 --- a/Unicode/Makefile.dynamic.in +++ b/Unicode/Makefile.dynamic.in @@ -7,8 +7,8 @@ libdir = @libdir@ VPATH = @srcdir@ bindir = @bindir@ -GU_VERSION=2 -GU_REVISION=3 +GU_VERSION=3 +GU_REVISION=0 GU_AGE=0 LIBTOOL = @LIBTOOL@ diff --git a/Unicode/Makefile.in b/Unicode/Makefile.in index 499d52e4..ec2966c3 100644 --- a/Unicode/Makefile.in +++ b/Unicode/Makefile.in @@ -7,8 +7,8 @@ libdir = @libdir@ VPATH = @srcdir@ bindir = @bindir@ -GU_VERSION=2 -GU_REVISION=3 +GU_VERSION=3 +GU_REVISION=0 GU_AGE=0 LIBTOOL = @LIBTOOL@ diff --git a/Unicode/backtrns.c b/Unicode/backtrns.c index 1c1a2b96..f9402da5 100644 --- a/Unicode/backtrns.c +++ b/Unicode/backtrns.c @@ -20,18 +20,18 @@ static const unsigned long unicode_backtrans_0[] = { 0x000001, 0x000001, 0x000001, 0x000001, 0x000001, 0x000001, 0x000001, 0x000001, 0x000001, 0x000001, 0x000001, 0x000001, 0x000001, 0x000001, 0x000001, 0x000001, 0x000001, 0x000001, 0x000001, 0x000001, 0x000001, 0x000001, 0x000001, 0x000001, - 0x037fff, 0x8c32101, 0x2032981, 0x20339c5, 0x9c109af, 0x203a181, 0x4309c1, 0xfa33bdf, - 0xda301cf, 0x4779c1, 0x8c32101, 0x88329c1, 0x0729c1, 0x033bff, 0x473981, 0x6432309, - 0xfa76bcf, 0xfa729c1, 0x88369c5, 0x88329c5, 0x8a3018f, 0x032985, 0x8a33981, 0x60369c5, - 0x8c3018f, 0x8832981, 0x8c32101, 0x88329c1, 0x8830981, 0x88309c5, 0x8830981, 0x8c32101, + 0x037fff, 0x8c32101, 0x2032981, 0x20339c5, 0x9c109af, 0x203a181, 0x4109c1, 0xfa33bdf, + 0xda301cf, 0x4779c1, 0x8c32101, 0x88329c1, 0x0729c1, 0x013bff, 0x473981, 0x6432309, + 0xfa76bcf, 0xfa729c1, 0x88169c5, 0x88129c5, 0x8a3018f, 0x032985, 0x8a33981, 0x60369c5, + 0x8c3018f, 0x8812981, 0x8c32101, 0x88329c1, 0x8810981, 0x88109c5, 0x8810981, 0x8c32101, 0x4433105, 0x443330f, 0x43330f, 0x433309, 0x433b0f, 0x433b09, 0x8c33b09, 0x433107, 0x4433105, 0x4433b0f, 0x4433105, 0x43330f, 0x433105, 0x43330f, 0x43330f, 0x433305, - 0x8832201, 0x433105, 0x4433105, 0x4433b07, 0x43330f, 0x433b09, 0x433b0f, 0xfa7298f, - 0x8c33b09, 0x433105, 0x433b0f, 0x433b0d, 0x433b0f, 0x433203, 0x8c32201, 0x8c33b0f, + 0x8812201, 0x433105, 0x4433105, 0x4433b07, 0x43330f, 0x433b09, 0x433b0f, 0xfa5298f, + 0x8c33b09, 0x433105, 0x433b0f, 0x433b0d, 0x433b0f, 0x413203, 0x8c12201, 0x8c33b0f, 0x5433105, 0x543330f, 0x43330f, 0x433309, 0x433b0f, 0x433b09, 0x8c33b09, 0x433107, 0x5433105, 0x5433b0f, 0x5433105, 0x43330f, 0x5433105, 0x543330f, 0x43330f, 0x433305, - 0x8c32201, 0x433105, 0x5433105, 0x5433b07, 0x43330f, 0x433b09, 0x433b0f, 0xfa7698f, - 0xcc33b09, 0x5433105, 0x5433b0f, 0x433b0d, 0x5433b0f, 0x433203, 0x8c32201, 0x433101 + 0x8c12201, 0x433105, 0x5433105, 0x5433b07, 0x43330f, 0x433b09, 0x433b0f, 0xfa7698f, + 0xcc33b09, 0x5433105, 0x5433b0f, 0x433b0d, 0x5433b0f, 0x413203, 0x8c12201, 0x433101 }; static const unsigned long unicode_backtrans_1[] = { @@ -43,14 +43,14 @@ static const unsigned long unicode_backtrans_1[] = { 0x400208, 0x400208, 0x400a08, 0x5400a08, 0x000000, 0x000000, 0x400a08, 0x400208, 0x400104, 0x8c20104, 0x8c00000, 0x8c00000, 0x400004, 0x400004, 0x400a08, 0x400a08, 0x8c00208, 0x400002, 0x400002, 0x400a08, 0x400a08, 0x400002, 0x400002, 0x8c00000, - 0x8c00000, 0x8c20802, 0x8c20802, 0x400802, 0x1400802, 0x400a08, 0x400a08, 0x400002, + 0x8c00000, 0x8c00802, 0x8c00802, 0x400802, 0x1400802, 0x400a08, 0x400a08, 0x400002, 0x1400002, 0x8c00000, 0x8c00208, 0xcc00208, 0x4400a08, 0x5400a08, 0x000000, 0x000000, 0x400002, 0x400002, 0x8c32000, 0xcc32000, 0x400002, 0x400002, 0x400808, 0x400808, 0x400002, 0x400002, 0x400002, 0x400002, 0x400004, 0x400004, 0x400106, 0x400106, - 0x432a0a, 0x432a0a, 0x400002, 0x400002, 0x400002, 0x400002, 0x8c00208, 0x8c00208, + 0x412a0a, 0x412a0a, 0x400002, 0x400002, 0x400002, 0x400002, 0x8c00208, 0x8c00208, 0x400208, 0x400208, 0x400208, 0x5400208, 0x400004, 0x400004, 0x400002, 0x400002, 0x400002, 0x400002, 0x400a08, 0x400a08, 0x401000, 0x401000, 0x401000, 0x401000, - 0x433000, 0x400802, 0x400802, 0x400806, 0x400806, 0x422a0a, 0x422a0a, 0x000000, + 0x433000, 0x400802, 0x400802, 0x400806, 0x400806, 0x402a0a, 0x402a0a, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000, 0x070000, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000, @@ -126,7 +126,7 @@ static const unsigned long unicode_backtrans_3[] = { 0x400040, 0xfa40040, 0xfa40040, 0xfa40040, 0xfa40040, 0xfa40040, 0xfa40040, 0xfa40040, 0xfa40040, 0xfa40040, 0xfa40040, 0xfa40040, 0xfa40040, 0xfa40040, 0xfa40040, 0xfa40040, 0xfa40040, 0xfa40040, 0x000000, 0xfa40040, 0xfa40040, 0xfa40040, 0xfa40040, 0xfa40040, - 0xfa40040, 0xfa40040, 0x400040, 0x400040, 0x400040, 0x400040, 0x400040, 0x400040, + 0xfa40040, 0xfa60040, 0x400040, 0x400040, 0x400040, 0x400040, 0x400040, 0x400040, 0x400040, 0xfa40040, 0xfa40040, 0xfa40040, 0xfa40040, 0xfa40040, 0xfa40040, 0xfa40040, 0xfa40040, 0xfa40040, 0xfa40040, 0xfa40040, 0xfa40040, 0xfa40040, 0xfa40040, 0xfa40040, 0xfa60040, 0xfa40040, 0x440040, 0xfa40040, 0xfa40040, 0xfa40040, 0xfa40040, 0xfa40040, @@ -354,7 +354,7 @@ static const unsigned long unicode_backtrans_21[] = { 0x000000, 0xe800000, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000, 0x040000, 0x000000, 0x8800000, 0x000000, 0x000000, 0xdc00010, 0x000000, 0x040000, 0x000000, 0x000000, 0x000000, 0x040000, 0x000000, 0x000000, 0x000000, - 0x000000, 0xc800000, 0x8c70000, 0x000000, 0x000000, 0x000000, 0x8820000, 0x000000, + 0x000000, 0xc800000, 0x8c70000, 0x000000, 0x000000, 0x000000, 0x8800000, 0x000000, 0x000000, 0x000000, 0x000000, 0x8a00000, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000, 0x040000, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000, diff --git a/Unicode/cjk.c b/Unicode/cjk.c index 963545c9..84bdf121 100644 --- a/Unicode/cjk.c +++ b/Unicode/cjk.c @@ -1107,8 +1107,8 @@ const unichar_t unicode_from_jis208[] = { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x500d, 0x4204, 0x0a14, 0x4213, - 0x9fa5, 0x0000, 0x1c2a, 0x0000, 0xea08, 0xbfff, 0xf04f, 0x4204 + 0x0000, 0x0000, 0x0000, 0x0000, 0x9fa5, 0x1c2a, 0xbfffe018, 0x4204f04f, + 0x804ef70, 0x0000, 0x2288, 0x804f240, 0x42130a14, 0x40015360, 0xbfffe038, 0x804bcc9 }; const unichar_t unicode_from_jis212[] = { @@ -2216,8 +2216,8 @@ const unichar_t unicode_from_jis212[] = { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x2003, 0x3001, 0x3002, 0xff0c, 0xff0e, 0x30fb, 0xff1a, 0xff1b + 0x0000, 0x0000, 0x0000, 0x0000, 0x2003, 0x3001, 0x3002, 0xff0c, + 0xff0e, 0x30fb, 0xff1a, 0xff1b, 0xff1f, 0xff01, 0x309b, 0x309c }; static const unsigned short jis_from_unicode_0[] = { @@ -5804,7 +5804,7 @@ static const unsigned short * const jis_from_unicode_[] = { jis_from_unicode_ff }; -struct charmap2 jis_from_unicode = { 0, 255, (unsigned short **) jis_from_unicode_, (unsigned short *) unicode_from_jis212 }; +struct charmap2 jis_from_unicode = { 0, 255, (unsigned short **) jis_from_unicode_, (unichar_t *) unicode_from_jis212 }; const unichar_t unicode_from_big5[] = { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, @@ -8879,7 +8879,7 @@ const unichar_t unicode_from_big5[] = { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0xfe4f, 0x0000, 0xffff, 0xffff, 0x00ff, 0x0000, 0x0000, 0x0000 + 0xfe4f, 0xffffffff, 0x00ff, 0x0000, 0x0100, 0x0002, 0x6000, 0x804f240 }; static const unsigned short big5_from_unicode_0[] = { @@ -12711,7 +12711,7 @@ static const unsigned short * const big5_from_unicode_[] = { big5_from_unicode_ff }; -struct charmap2 big5_from_unicode = { 0, 255, (unsigned short **) big5_from_unicode_, (unsigned short *) unicode_from_big5 }; +struct charmap2 big5_from_unicode = { 0, 255, (unsigned short **) big5_from_unicode_, (unichar_t *) unicode_from_big5 }; const unichar_t unicode_from_big5hkscs[] = { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, @@ -16810,7 +16810,7 @@ const unichar_t unicode_from_big5hkscs[] = { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x79d4, 0x0000, 0xfefe, 0x0000, 0x00ff, 0x0000, 0x0000, 0x0000 + 0x79d4, 0xfefe, 0x00ff, 0x0000, 0x0100, 0x0005, 0x8000, 0x804f240 }; static const unsigned short big5hkscs_from_unicode_0[] = { @@ -22532,7 +22532,7 @@ static const unsigned short * const big5hkscs_from_unicode_[] = { big5hkscs_from_unicode_ff }; -struct charmap2 big5hkscs_from_unicode = { 0, 255, (unsigned short **) big5hkscs_from_unicode_, (unsigned short *) unicode_from_big5hkscs }; +struct charmap2 big5hkscs_from_unicode = { 0, 255, (unsigned short **) big5hkscs_from_unicode_, (unichar_t *) unicode_from_big5hkscs }; const unichar_t unicode_from_ksc5601[] = { 0x3164, 0x3001, 0x3002, 0x30fb, 0x2025, 0x22ef, 0x00a8, 0x3003, @@ -23639,8 +23639,8 @@ const unichar_t unicode_from_ksc5601[] = { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xffffffff, + 0xffffffff, 0xffffffff, 0x0000, 0x0000, 0x79d4, 0x0004, 0x2288, 0x804f240 }; static unsigned short ksc5601_from_unicode_0[] = { @@ -28907,7 +28907,7 @@ static const unsigned short * const ksc5601_from_unicode_[] = { ksc5601_from_unicode_ff }; -struct charmap2 ksc5601_from_unicode = { 0, 255, (unsigned short **) ksc5601_from_unicode_, (unsigned short *) unicode_from_ksc5601 }; +struct charmap2 ksc5601_from_unicode = { 0, 255, (unsigned short **) ksc5601_from_unicode_, (unichar_t *) unicode_from_ksc5601 }; const unichar_t unicode_from_johab[] = { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, @@ -38145,7 +38145,7 @@ static const unsigned short * const johab_from_unicode_[] = { johab_from_unicode_ff }; -struct charmap2 johab_from_unicode = { 0, 255, (unsigned short **) johab_from_unicode_, (unsigned short *) unicode_from_johab }; +struct charmap2 johab_from_unicode = { 0, 255, (unsigned short **) johab_from_unicode_, (unichar_t *) unicode_from_johab }; const unichar_t unicode_from_gb2312[] = { 0x3000, 0x3001, 0x3002, 0x30fb, 0x02c9, 0x02c7, 0x00a8, 0x3003, @@ -39252,8 +39252,8 @@ const unichar_t unicode_from_gb2312[] = { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, - 0x9f44, 0x0000, 0xffff, 0xffff, 0x00ff, 0x0000, 0x0000, 0x0000 + 0x0000, 0x0000, 0x0000, 0x0000, 0x9f44, 0xffffffff, 0x00ff, 0x0000, + 0x0100, 0x0003, 0x2288, 0x804f240, 0x0100, 0x0005, 0x00f8, 0x42130a14 }; static unsigned short gb2312_from_unicode_0[] = { @@ -43015,5 +43015,5 @@ static const unsigned short * const gb2312_from_unicode_[] = { gb2312_from_unicode_ff }; -struct charmap2 gb2312_from_unicode = { 0, 255, (unsigned short **) gb2312_from_unicode_, (unsigned short *) unicode_from_gb2312 }; +struct charmap2 gb2312_from_unicode = { 0, 255, (unsigned short **) gb2312_from_unicode_, (unichar_t *) unicode_from_gb2312 }; diff --git a/Unicode/dump.c b/Unicode/dump.c index e2d9808d..5bd76ddb 100644 --- a/Unicode/dump.c +++ b/Unicode/dump.c @@ -367,7 +367,7 @@ static void dumpjis(FILE *output,FILE *header) { continue; } if ( table[_unicode>>8]==NULL ) - table[_unicode>>8] = calloc(256,2); + table[_unicode>>8] = calloc(256,sizeof(unichar_t)); table[_unicode>>8][_unicode&0xff] = _orig; _orig -= 0x2121; _orig = (_orig>>8)*94 + (_orig&0xff); @@ -406,7 +406,7 @@ static void dumpjis(FILE *output,FILE *header) { continue; } if ( table[_unicode>>8]==NULL ) - table[_unicode>>8] = calloc(256,2); + table[_unicode>>8] = calloc(256,sizeof(unichar_t)); if ( table[_unicode>>8][_unicode&0xff]==0 ) table[_unicode>>8][_unicode&0xff] = _orig|0x8000; else @@ -471,7 +471,7 @@ static void dumpjis(FILE *output,FILE *header) { fprintf( output, " u_allzeros,\n" ); fprintf( output, "};\n\n" ); fprintf( header, "extern struct charmap2 jis_from_unicode;\n" ); - fprintf( output, "struct charmap2 jis_from_unicode = { %d, %d, (unsigned short **) jis_from_unicode_, (unsigned short *) unicode_from_%s };\n\n", + fprintf( output, "struct charmap2 jis_from_unicode = { %d, %d, (unsigned short **) jis_from_unicode_, (unichar_t *) unicode_from_%s };\n\n", first, last, cjknames[j]); for ( k=first; k<=last; ++k ) @@ -528,7 +528,7 @@ static void dumpbig5(FILE *output,FILE *header) { } unicode[_orig-0xa100] = _unicode; if ( table[_unicode>>8]==NULL ) - table[_unicode>>8] = calloc(256,2); + table[_unicode>>8] = calloc(256,sizeof(unichar_t)); table[_unicode>>8][_unicode&0xff] = _orig; if ( used[_unicode>>8]==NULL ) { used[_unicode>>8] = calloc(256,sizeof(long)); @@ -572,7 +572,7 @@ static void dumpbig5(FILE *output,FILE *header) { fprintf( output, " u_allzeros,\n" ); fprintf( output, "};\n\n" ); fprintf( header, "extern struct charmap2 %s_from_unicode;\n", cjknames[j]); - fprintf( output, "struct charmap2 %s_from_unicode = { %d, %d, (unsigned short **) %s_from_unicode_, (unsigned short *) unicode_from_%s };\n\n", + fprintf( output, "struct charmap2 %s_from_unicode = { %d, %d, (unsigned short **) %s_from_unicode_, (unichar_t *) unicode_from_%s };\n\n", cjknames[j], first, last, cjknames[j], cjknames[j]); for ( k=first; k<=last; ++k ) @@ -610,7 +610,7 @@ static void dumpbig5hkscs(FILE *output,FILE *header) { } unicode[_orig-0x8100] = _unicode; if ( table[_unicode>>8]==NULL ) - table[_unicode>>8] = calloc(256,2); + table[_unicode>>8] = calloc(256,sizeof(unichar_t)); table[_unicode>>8][_unicode&0xff] = _orig; if ( used[_unicode>>8]==NULL ) { used[_unicode>>8] = calloc(256,sizeof(long)); @@ -654,7 +654,7 @@ static void dumpbig5hkscs(FILE *output,FILE *header) { fprintf( output, " u_allzeros,\n" ); fprintf( output, "};\n\n" ); fprintf( header, "extern struct charmap2 %s_from_unicode;\n", cjknames[j]); - fprintf( output, "struct charmap2 %s_from_unicode = { %d, %d, (unsigned short **) %s_from_unicode_, (unsigned short *) unicode_from_%s };\n\n", + fprintf( output, "struct charmap2 %s_from_unicode = { %d, %d, (unsigned short **) %s_from_unicode_, (unichar_t *) unicode_from_%s };\n\n", cjknames[j], first, last, cjknames[j], cjknames[j]); for ( k=first; k<=last; ++k ) @@ -702,7 +702,7 @@ static void dumpWansung(FILE *output,FILE *header) { } if ( _orig>=0x2121 && (_orig&0xff)>=0x21 && _orig<=0x7e7e && (_orig&0xff)<=0x7e ) { if ( table[_unicode>>8]==NULL ) - table[_unicode>>8] = calloc(256,2); + table[_unicode>>8] = calloc(256,sizeof(unichar_t)); table[_unicode>>8][_unicode&0xff] = _orig; _orig -= 0x2121; _orig = (_orig>>8)*94 + (_orig&0xff); @@ -718,7 +718,7 @@ static void dumpWansung(FILE *output,FILE *header) { } if ( _johab>=0x8431 && _johab<=0xf9fe ) { if ( jtable[_unicode>>8]==NULL ) - jtable[_unicode>>8] = calloc(256,2); + jtable[_unicode>>8] = calloc(256,sizeof(unichar_t)); jtable[_unicode>>8][_unicode&0xff] = _johab; _johab -= 0x8400; junicode[_johab] = _unicode; @@ -766,7 +766,7 @@ static void dumpWansung(FILE *output,FILE *header) { fprintf( output, " u_allzeros,\n" ); fprintf( output, "};\n\n" ); fprintf( header, "extern struct charmap2 %s_from_unicode;\n", cjknames[j]); - fprintf( output, "struct charmap2 %s_from_unicode = { %d, %d, (unsigned short **) %s_from_unicode_, (unsigned short *) unicode_from_%s };\n\n", + fprintf( output, "struct charmap2 %s_from_unicode = { %d, %d, (unsigned short **) %s_from_unicode_, (unichar_t *) unicode_from_%s };\n\n", cjknames[j], first, last, cjknames[j], cjknames[j]); if ( first==-1 ) @@ -812,7 +812,7 @@ static void dumpWansung(FILE *output,FILE *header) { fprintf( output, " u_allzeros,\n" ); fprintf( output, "};\n\n" ); fprintf( header, "extern struct charmap2 johab_from_unicode;\n" ); - fprintf( output, "struct charmap2 johab_from_unicode = { %d, %d, (unsigned short **) johab_from_unicode_, (unsigned short *) unicode_from_johab };\n\n", + fprintf( output, "struct charmap2 johab_from_unicode = { %d, %d, (unsigned short **) johab_from_unicode_, (unichar_t *) unicode_from_johab };\n\n", first, last ); if ( first==-1 ) @@ -855,7 +855,7 @@ static void dumpgb2312(FILE *output,FILE *header) { continue; } if ( table[_unicode>>8]==NULL ) - table[_unicode>>8] = calloc(256,2); + table[_unicode>>8] = calloc(256,sizeof(unichar_t)); table[_unicode>>8][_unicode&0xff] = _orig; _orig -= 0x2121; _orig = (_orig>>8)*94 + (_orig&0xff); @@ -902,7 +902,7 @@ static void dumpgb2312(FILE *output,FILE *header) { fprintf( output, " u_allzeros,\n" ); fprintf( output, "};\n\n" ); fprintf( header, "extern struct charmap2 %s_from_unicode;\n", cjknames[j]); - fprintf( output, "struct charmap2 %s_from_unicode = { %d, %d, (unsigned short **) %s_from_unicode_, (unsigned short *) unicode_from_%s };\n\n", + fprintf( output, "struct charmap2 %s_from_unicode = { %d, %d, (unsigned short **) %s_from_unicode_, (unichar_t *) unicode_from_%s };\n\n", cjknames[j], first, last, cjknames[j], cjknames[j]); if ( first==-1 ) diff --git a/Unicode/gwwiconv.c b/Unicode/gwwiconv.c index bd5360ca..0a4c5512 100644 --- a/Unicode/gwwiconv.c +++ b/Unicode/gwwiconv.c @@ -35,7 +35,8 @@ #ifndef HAVE_ICONV_H -/* I have written an limited iconv which will convert either to or from UCS2 */ +/* I have written an limited iconv which will convert either to or from unichar_t */ +/* (either UCS2 or UCS4) */ /* it will not convert latin1 to latin2, but latin1->UCS2, UCS2->latin2 */ /* it uses the encodings built into libgunicode for systems with no iconv */ /* (ie. macs before 10.3, perhaps others) */ @@ -66,6 +67,10 @@ static enum encoding name_to_enc(const char *encname) { { "UCS-2-INTERNAL", e_unicode }, { "ISO-10646/UCS2", e_unicode }, { "ISO-10646/USC2", e_unicode }, /* Old typo */ + { "UCS4", e_ucs4 }, + { "UCS-4", e_ucs4 }, + { "UCS-4-INTERNAL", e_ucs4 }, + { "ISO-10646/UCS4", e_ucs4 }, { "iso8859-1", e_iso8859_1 }, { "iso8859-2", e_iso8859_2 }, { "iso8859-3", e_iso8859_3 }, @@ -132,9 +137,15 @@ gww_iconv_t gww_iconv_open(const char *toenc,const char *fromenc) { if ( stuff.from==(enum encoding) -1 || stuff.to==(enum encoding) -1 ) { /*fprintf( stderr, "Unknown encoding\n" );*/ return( (iconv_t)(-1) ); +#ifdef UNICHAR_16 } else if ( stuff.from!=e_unicode && stuff.to!=e_unicode ) { fprintf( stderr, "Bad call to gww_iconv_open, neither arg is UCS2\n" ); return( (iconv_t)(-1) ); +#else + } else if ( stuff.from!=e_ucs4 && stuff.to!=e_ucs4 ) { + fprintf( stderr, "Bad call to gww_iconv_open, neither arg is UCS4\n" ); +return( (iconv_t)(-1) ); +#endif } ret = galloc(sizeof(struct gww_iconv_t)); @@ -168,11 +179,34 @@ return( (size_t) -1 ); int min = *inlen < *outlen ? *inlen : *outlen; min &= ~1; memcpy(*inbuf,*outbuf,min); - char_cnt = min/2; + char_cnt = min/sizeof(short); *inbuf += min; *outbuf += min; *inlen -= min; *outlen -= min; if ( *inlen==1 && *outlen>0 ) return( (size_t) -1 ); /* Incomplete multi-byte sequence */ + } else if ( cd->to==e_ucs4 ) { + int min = *inlen/sizeof(short) < *outlen/sizeof(int32) ? *inlen/sizeof(short) : *outlen/sizeof(int32); + int highch, lowch; + if ( endian == end_little ) { + while ( *inlen>=sizeof(short) && *outlen>=sizeof(int32) ) { + highch = ((unsigned char *) *inbuf)[1], lowch = *(unsigned char *) *inbuf; + ((uint8 *) outbuf)[3] = 0; ((uint8 *) outbuf)[2] = 0; + ((uint8 *) outbuf)[1] = highch; ((uint8 *) outbuf)[0] = lowch; + outbuf += sizeof(int32); inbuf += sizeof(short); + *outlen -= sizeof(int32); *inlen -= sizeof(short); + } + } else { + while ( *inlen>=sizeof(short) && *outlen>=sizeof(int32) ) { + highch = ((unsigned char *) *inbuf)[0], lowch = ((unsigned char *) *inbuf)[1]; + ((uint8 *) outbuf)[0] = 0; ((uint8 *) outbuf)[1] = 0; + ((uint8 *) outbuf)[2] = highch; ((uint8 *) outbuf)[3] = lowch; + outbuf += sizeof(int32); inbuf += sizeof(short); + *outlen -= sizeof(int32); *inlen -= sizeof(short); + } + } + char_cnt = min; + if ( *inlen==1 && *outlen>0 ) +return( (size_t) -1 ); /* Incomplete multi-byte sequence */ } else if ( cd->to<e_first2byte ) { struct charmap *table = NULL; table = alphabets_from_unicode[cd->to]; @@ -198,7 +232,7 @@ return( (size_t) -1 ); struct charmap2 *table = cd->to==e_johab ? &johab_from_unicode : cd->to==e_big5 ? &big5_from_unicode : &big5hkscs_from_unicode; - unichar_t *plane; + unsigned short *plane; while ( *inlen>1 && *outlen>0 ) { int highch, lowch; if ( endian == end_little ) { @@ -229,7 +263,7 @@ return( (size_t) -1 ); } else if ( cd->to==e_wansung || cd->to==e_jisgbpk ) { struct charmap2 *table = cd->to==e_wansung ? &ksc5601_from_unicode : &gb2312_from_unicode; - unichar_t *plane; + unsigned short *plane; while ( *inlen>1 && *outlen>0 ) { int highch, lowch; if ( endian == end_little ) { @@ -259,7 +293,7 @@ return( (size_t) -1 ); } } else if ( cd->to==e_sjis ) { unsigned char *plane1; - unichar_t *plane; + unsigned short *plane; while ( *inlen>1 && *outlen>0 ) { int highch, lowch; if ( endian == end_little ) { @@ -298,7 +332,7 @@ return( (size_t) -1 ); struct charmap2 *table = cd->to==e_jisgb ? &gb2312_from_unicode : cd->to==e_jiskorean ? &ksc5601_from_unicode : &jis_from_unicode; - unichar_t *plane; + unsigned short *plane; while ( *inlen>1 && *outlen>1 ) { int highch, lowch; if ( endian == end_little ) { @@ -359,6 +393,230 @@ return( (size_t) -1 ); fprintf( stderr, "Unexpected encoding\n" ); return( (size_t) -1 ); } + } else if ( cd->from==e_ucs4 ) { + if ( cd->to==e_unicode ) { + int min = *inlen/sizeof(int32) < *outlen/sizeof(int16) ? *inlen/sizeof(int32) : *outlen/sizeof(int16); + int highch, lowch; + if ( endian == end_little ) { + while ( *inlen>=sizeof(short) && *outlen>=sizeof(int32) ) { + highch = ((unsigned char *) *inbuf)[1], lowch = *(unsigned char *) *inbuf; + ((uint8 *) outbuf)[1] = highch; ((uint8 *) outbuf)[0] = lowch; + outbuf += sizeof(int16); inbuf += sizeof(int32); + *outlen -= sizeof(int16); *inlen -= sizeof(int32); + } + } else { + while ( *inlen>=sizeof(short) && *outlen>=sizeof(int32) ) { + highch = ((unsigned char *) *inbuf)[2], lowch = ((unsigned char *) *inbuf)[3]; + ((uint8 *) outbuf)[0] = highch; ((uint8 *) outbuf)[1] = lowch; + outbuf += sizeof(int16); inbuf += sizeof(int32); + *outlen -= sizeof(int16); *inlen -= sizeof(int32); + } + } + char_cnt = min; + if ( *inlen>0 && *outlen>0 ) +return( (size_t) -1 ); /* Incomplete multi-byte sequence */ + } else if ( cd->to<e_first2byte ) { + struct charmap *table = NULL; + table = alphabets_from_unicode[cd->to]; + while ( *inlen>1 && *outlen>0 ) { + int highch, lowch; + if ( endian == end_little ) { + highch = ((unsigned char *) *inbuf)[1], lowch = *(unsigned char *) *inbuf; + } else { + highch = ((unsigned char *) *inbuf)[2], lowch = ((unsigned char *) *inbuf)[3]; + } + if ( highch>=table->first && highch<=table->last && + (plane = table->table[highch])!=NULL && + (ch=plane[lowch])!=0 ) { + *((*outbuf)++) = ch; + -- *outlen; + *inlen -= 4; + *inbuf += 4; + ++char_cnt; + } else +return( (size_t) -1 ); + } + } else if ( cd->to==e_johab || cd->to==e_big5 || cd->to==e_big5hkscs ) { + struct charmap2 *table = cd->to==e_johab ? &johab_from_unicode : + cd->to==e_big5 ? &big5_from_unicode : + &big5hkscs_from_unicode; + unsigned short *plane; + while ( *inlen>1 && *outlen>0 ) { + int highch, lowch; + if ( endian == end_little ) { + highch = ((unsigned char *) *inbuf)[1], lowch = *(unsigned char *) *inbuf; + } else { + highch = ((unsigned char *) *inbuf)[2], lowch = ((unsigned char *) *inbuf)[3]; + } + if ( highch==0 && lowch<=0x80 ) { + *((*outbuf)++) = highch; + --*outlen; + *inlen-=4; + *inbuf+=4; + ++char_cnt; + } else if ( *outlen==1 ) +return( (size_t) -1 ); + else if ( highch>=table->first && highch<=table->last && + (plane = table->table[highch])!=NULL && + (ch=plane[lowch])!=0 ) { + *((*outbuf)++) = (ch>>8); + *((*outbuf)++) = (ch&0xff); + *outlen -= 2; + *inlen -= 4; + *inbuf += 4; + ++char_cnt; + } else +return( (size_t) -1 ); + } + } else if ( cd->to==e_wansung || cd->to==e_jisgbpk ) { + struct charmap2 *table = cd->to==e_wansung ? &ksc5601_from_unicode : + &gb2312_from_unicode; + unsigned short *plane; + while ( *inlen>1 && *outlen>0 ) { + int highch, lowch; + if ( endian == end_little ) { + highch = ((unsigned char *) *inbuf)[1], lowch = *(unsigned char *) *inbuf; + } else { + highch = ((unsigned char *) *inbuf)[2], lowch = ((unsigned char *) *inbuf)[3]; + } + if ( highch==0 && lowch<=0x80 ) { + *((*outbuf)++) = lowch; + --*outlen; + *inlen-=4; + *inbuf+=4; + ++char_cnt; + } else if ( *outlen==1 ) +return( (size_t) -1 ); + else if ( highch>=table->first && highch<=table->last && + (plane = table->table[highch])!=NULL && + (ch=plane[lowch])!=0 ) { + *((*outbuf)++) = (ch>>8)+0x80; + *((*outbuf)++) = (ch&0xff)+0x80; + *outlen -= 2; + *inlen -= 4; + *inbuf += 4; + ++char_cnt; + } else +return( (size_t) -1 ); + } + } else if ( cd->to==e_sjis ) { + unsigned char *plane1; + unsigned short *plane; + while ( *inlen>1 && *outlen>0 ) { + int highch, lowch; + if ( endian == end_little ) { + highch = ((unsigned char *) *inbuf)[1], lowch = *(unsigned char *) *inbuf; + } else { + highch = ((unsigned char *) *inbuf)[2], lowch = ((unsigned char *) *inbuf)[3]; + } + if (( highch>=jis201_from_unicode.first && highch<=jis201_from_unicode.last && + (plane1 = jis201_from_unicode.table[highch-jis201_from_unicode.first])!=NULL && + (ch=plane1[lowch])!=0 ) || + ( highch==0 && (ch=lowch)<' ' )) { /* control chars not mapped in jis201 */ + *((*outbuf)++) = ch; + --*outlen; + *inlen-=4; + *inbuf+=4; + ++char_cnt; + } else if ( *outlen==1 ) +return( (size_t) -1 ); + else if ( highch>=jis_from_unicode.first && highch<=jis_from_unicode.last && + (plane = jis_from_unicode.table[highch-jis_from_unicode.first])!=NULL && + (ch=plane[lowch])!=0 && ch<0x8000 ) { /* no jis212 */ + int j1 = ch>>8, j2 = ch&0xff; + int ro = j1<95 ? 112 : 176; + int co = (j1&1) ? (j2>95?32:31) : 126; + *((*outbuf)++) = ((j1+1)>>1)+ro; + *((*outbuf)++) = j2+co; + *outlen -= 2; + *inlen -= 4; + *inbuf += 4; + ++char_cnt; + } else +return( (size_t) -1 ); + } + } else if ( cd->to==e_jis || cd->to==e_jis2 || + cd->to==e_jiskorean || cd->to==e_jisgb ) { + struct charmap2 *table = cd->to==e_jisgb ? &gb2312_from_unicode : + cd->to==e_jiskorean ? &ksc5601_from_unicode : + &jis_from_unicode; + unsigned short *plane; + while ( *inlen>1 && *outlen>1 ) { + int highch, lowch; + if ( endian == end_little ) { + highch = ((unsigned char *) *inbuf)[1], lowch = *(unsigned char *) *inbuf; + } else { + highch = ((unsigned char *) *inbuf)[2], lowch = ((unsigned char *) *inbuf)[3]; + } + if ( highch>=table->first && highch<=table->last && + (plane = table->table[highch])!=NULL && + (ch=plane[lowch])!=0 ) { + if ( ch>=0x8000 ) { + if ( cd->to!=e_jis2 ) +return( (size_t) -1 ); + ch -= 0x8000; + } else { + if ( cd->to==e_jis2 ) +return( (size_t) -1 ); + } + *((*outbuf)++) = (ch>>8); + *((*outbuf)++) = (ch&0xff); + *outlen -= 2; + *inlen -= 4; + *inbuf += 4; + ++char_cnt; + } else +return( (size_t) -1 ); + } + } else if ( cd->to==e_utf8 ) { + while ( *inlen>1 && *outlen>0 ) { + int uch; + if ( endian == end_little ) { + uch = (((unsigned char *) *inbuf)[3]<<24) | + (((unsigned char *) *inbuf)[2]<<16) | + (((unsigned char *) *inbuf)[1]<<8) | + (*((unsigned char *) *inbuf)); + } else { + uch = (*((unsigned char *) *inbuf)<<24) | + (((unsigned char *) *inbuf)[1]<<16) | + (((unsigned char *) *inbuf)[2]<<8) | + (((unsigned char *) *inbuf)[3]); + } + if ( uch < 0x80 ) { + *((*outbuf)++) = uch; + --*outlen; + } else if ( uch<0x800 ) { + if ( *outlen==1 ) +return( (size_t) -1 ); + *((*outbuf)++) = 0xc0 | (uch>>6); + *((*outbuf)++) = 0x80 | (uch&0x3f); + *outlen-=2; + } else if ( uch < 0x10000 ) { + if ( *outlen<=2 ) +return( (size_t) -1 ); + *((*outbuf)++) = 0xe0 | (uch>>12); + *((*outbuf)++) = 0x80 | ((uch>>6)&0x3f); + *((*outbuf)++) = 0x80 | (uch&0x3f); + *outlen-=3; + } else { + uint32 val = uch-0x10000; + int u = ((val&0xf0000)>>16)+1, z=(val&0x0f000)>>12, y=(val&0x00fc0)>>6, x=val&0x0003f; + if ( *outlen<=3 ) +return( (size_t) -1 ); + *(*outbuf)++ = 0xf0 | (u>>2); + *(*outbuf)++ = 0x80 | ((u&3)<<4) | z; + *(*outbuf)++ = 0x80 | y; + *(*outbuf)++ = 0x80 | x; + *outlen-=4; + } + *inbuf += 4; + *inlen -= 4; + ++char_cnt; + } + } else { + fprintf( stderr, "Unexpected encoding\n" ); +return( (size_t) -1 ); + } } else if ( cd->to==e_unicode ) { const unichar_t *table; if ( cd->from<e_first2byte ) { @@ -373,7 +631,7 @@ return( (size_t) -1 ); *((*outbuf)++) = ch>>8; *((*outbuf)++) = ch&0xff; } - *outlen -= 2; + *outlen -= sizeof(unichar_t); ++char_cnt; } } else if ( cd->from==e_jis || cd->from==e_jis2 || @@ -398,7 +656,7 @@ return( (size_t) -1 ); *((*outbuf)++) = ch>>8; *((*outbuf)++) = ch&0xff; } - *outlen -= 2; + *outlen -= sizeof(unichar_t); ++char_cnt; } if ( *inlen==1 && *outlen>0 ) @@ -429,7 +687,7 @@ return( (size_t) -1 ); *((*outbuf)++) = ch>>8; *((*outbuf)++) = ch&0xff; } - *outlen -= 2; + *outlen -= sizeof(unichar_t); ++char_cnt; } } else if ( cd->from==e_johab || cd->from==e_big5 || cd->from==e_big5hkscs ) { @@ -450,7 +708,7 @@ return( (size_t) -1 ); if ( *ipt<0x7f ) { ch = *ipt; --*inlen; - *inbuf = ipt+1; + *inbuf = (char *) ipt+1; } else { if ( *inlen==1 ) return( (size_t) -1 ); @@ -469,7 +727,7 @@ return( (size_t) -1 ); *((*outbuf)++) = ch>>8; *((*outbuf)++) = ch&0xff; } - *outlen -= 2; + *outlen -= sizeof(unichar_t); ++char_cnt; } } else if ( cd->from==e_sjis ) { @@ -511,11 +769,11 @@ return( (size_t) -1 ); *((*outbuf)++) = ch>>8; *((*outbuf)++) = ch&0xff; } - *outlen -= 2; + *outlen -= sizeof(unichar_t); ++char_cnt; } } else if ( cd->from==e_utf8 ) { - while ( *inlen>0 && *outlen>1 ) { + while ( *inlen>0 && *outlen>sizeof(unichar_t) ) { unsigned char *ipt = (unsigned char *) *inbuf; int ch = *ipt; if ( ch <= 127 ) { @@ -556,7 +814,227 @@ return( (size_t) -1 ); *((*outbuf)++) = ch>>8; *((*outbuf)++) = ch&0xff; } - *outlen -= 2; + *outlen -= sizeof(unichar_t); + ++char_cnt; + } + } else { + fprintf( stderr, "Unexpected encoding\n" ); +return( (size_t) -1 ); + } + } else if ( cd->to==e_ucs4 ) { + const unichar_t *table; + if ( cd->from<e_first2byte ) { + table = unicode_from_alphabets[cd->from]; + while ( *inlen>0 && *outlen>1 ) { + unichar_t ch = table[ *(unsigned char *) ((*inbuf)++)]; + --*inlen; + if ( endian==end_little ) { + *((*outbuf)++) = 0; + *((*outbuf)++) = 0; + *((*outbuf)++) = ch&0xff; + *((*outbuf)++) = ch>>8; + } else { + *((*outbuf)++) = ch>>8; + *((*outbuf)++) = ch&0xff; + *((*outbuf)++) = 0; + *((*outbuf)++) = 0; + } + *outlen -= sizeof(unichar_t); + ++char_cnt; + } + } else if ( cd->from==e_jis || cd->from==e_jis2 || + cd->from==e_jiskorean || cd->from==e_jisgb ) { + table = cd->from==e_jisgb ? unicode_from_gb2312 : + cd->from==e_jiskorean ? unicode_from_ksc5601 : + cd->from==e_jis ? unicode_from_jis208 : + unicode_from_jis212; + while ( *inlen>1 && *outlen>1 ) { + unsigned char *ipt = (unsigned char *) *inbuf; + int ch; + if ( *ipt<0x21 || *ipt>0x7e || ipt[1]<0x21 || ipt[1]>0x7e ) +return( (size_t) -1 ); + ch = (*ipt-0x21)*94 + (ipt[1]-0x21); + ch = table[ch]; + *inlen -= 2; + *inbuf = (char *) ipt+2; + if ( endian==end_little ) { + *((*outbuf)++) = 0; + *((*outbuf)++) = 0; + *((*outbuf)++) = ch&0xff; + *((*outbuf)++) = ch>>8; + } else { + *((*outbuf)++) = ch>>8; + *((*outbuf)++) = ch&0xff; + *((*outbuf)++) = 0; + *((*outbuf)++) = 0; + } + *outlen -= sizeof(unichar_t); + ++char_cnt; + } + if ( *inlen==1 && *outlen>0 ) +return( (size_t) -1 ); /* Incomplete multi-byte sequence */ + } else if ( cd->from==e_wansung || cd->from==e_jisgbpk ) { + table = cd->from==e_jisgbpk ? unicode_from_gb2312 : + unicode_from_ksc5601 ; + while ( *inlen>0 && *outlen>1 ) { + unsigned char *ipt = (unsigned char *) *inbuf; + int ch; + if ( *ipt<0x7f ) { + ch = *ipt; + --*inlen; + *inbuf = (char *) ipt+1; + } else { + if ( *ipt<0xa1 || *ipt>0xfe || ipt[1]<0xa1 || ipt[1]>0xfe || + *inlen==1 ) +return( (size_t) -1 ); + ch = (*ipt-0xa1)*94 + (ipt[1]-0xa1); + ch = table[ch]; + *inlen -= 2;; + *inbuf = (char *) ipt+2; + } + if ( endian==end_little ) { + *((*outbuf)++) = 0; + *((*outbuf)++) = 0; + *((*outbuf)++) = ch&0xff; + *((*outbuf)++) = ch>>8; + } else { + *((*outbuf)++) = ch>>8; + *((*outbuf)++) = ch&0xff; + *((*outbuf)++) = 0; + *((*outbuf)++) = 0; + } + *outlen -= sizeof(unichar_t); + ++char_cnt; + } + } else if ( cd->from==e_johab || cd->from==e_big5 || cd->from==e_big5hkscs ) { + int offset; + if ( cd->from==e_big5 ) { + offset = 0xa100; + table = unicode_from_big5; + } else if ( cd->from==e_big5hkscs ) { + offset = 0x8100; + table = unicode_from_big5hkscs; + } else { + offset = 0x8400; + table = unicode_from_johab; + } + while ( *inlen>0 && *outlen>1 ) { + unsigned char *ipt = (unsigned char *) *inbuf; + int ch; + if ( *ipt<0x7f ) { + ch = *ipt; + --*inlen; + *inbuf = (char *) ipt+1; + } else { + if ( *inlen==1 ) +return( (size_t) -1 ); + ch = (*ipt<<8) | ipt[1]; + if ( ch<offset ) +return( (size_t) -1 ); + ch -= offset; + ch = table[ch]; + *inlen -= 2; + *inbuf = (char *) ipt+2; + } + if ( endian==end_little ) { + *((*outbuf)++) = 0; + *((*outbuf)++) = 0; + *((*outbuf)++) = ch&0xff; + *((*outbuf)++) = ch>>8; + } else { + *((*outbuf)++) = ch>>8; + *((*outbuf)++) = ch&0xff; + *((*outbuf)++) = 0; + *((*outbuf)++) = 0; + } + *outlen -= sizeof(unichar_t); + ++char_cnt; + } + } else if ( cd->from==e_sjis ) { + while ( *inlen>0 && *outlen>1 ) { + unsigned char *ipt = (unsigned char *) *inbuf; + int ch1 = *ipt; + if ( ch1<127 || ( ch1>=161 && ch1<=223 )) { + ch = unicode_from_jis201[ch1]; + *inbuf = (char *) ipt+1; + --*inlen; + } else if ( *inlen==1 ) +return( (size_t) -1 ); + else { + int ch2 = ipt[1]; + if ( ch1 >= 129 && ch1<= 159 ) + ch1 -= 112; + else + ch1 -= 176; + ch1 <<= 1; + if ( ch2>=159 ) + ch2-= 126; + else if ( ch2>127 ) { + --ch1; + ch2 -= 32; + } else { + --ch1; + ch2 -= 31; + } + if ( ch1-0x21>=94 || ch2-0x21>=94 ) +return( (size_t) -1 ); + ch = unicode_from_jis208[(ch1-0x21)*94+(ch2-0x21)]; + *inlen -= 2; + *inbuf = (char *) ipt+2; + } + if ( endian==end_little ) { + *((*outbuf)++) = 0; + *((*outbuf)++) = 0; + *((*outbuf)++) = ch&0xff; + *((*outbuf)++) = ch>>8; + } else { + *((*outbuf)++) = ch>>8; + *((*outbuf)++) = ch&0xff; + *((*outbuf)++) = 0; + *((*outbuf)++) = 0; + } + *outlen -= sizeof(unichar_t); + ++char_cnt; + } + } else if ( cd->from==e_utf8 ) { + while ( *inlen>0 && *outlen>sizeof(unichar_t) ) { + unsigned char *ipt = (unsigned char *) *inbuf; + int ch = *ipt; + if ( ch <= 127 ) { + *inbuf = (char *) ipt+1; + --*inlen; + } else if ( ch<=0xdf ) { + if ( *inlen<2 || ipt[1]<0x80 ) +return( (size_t) -1 ); + ch = ((ch&0x1f)<<6) | (ipt[1] &0x3f); + *inlen -= 2; + *inbuf = (char *) ipt+2; + } else if ( ch<=0xef ) { + if ( *inlen<3 || ipt[1]<0x80 || ipt[2]<0x80 ) +return( (size_t) -1 ); + ch = ((ch&0x1f)<<12) | ((ipt[1] &0x3f)<<6) | (ipt[2]&0x3f); + *inlen -= 3; + *inbuf = (char *) ipt+3; + } else { + int w,w2; + w = ( ((*ipt&0x7)<<2) | ((ipt[1]&0x30)>>4) )-1; + w = (w<<6) | ((ipt[1]&0xf)<<2) | ((ipt[2]&0x30)>>4); + w2 = ((ipt[2]&0xf)<<6) | (ipt[3]&0x3f); + ch = w*0x400 + w2 + 0x10000; + *inbuf = (char *) ipt+4; + } + if ( endian==end_little ) { + *((*outbuf)++) = ch&0xff; + *((*outbuf)++) = ch>>8; + *((*outbuf)++) = ch>>16; + *((*outbuf)++) = ch>>24; + } else { + *((*outbuf)++) = ch>>24; + *((*outbuf)++) = ch>>16; + *((*outbuf)++) = ch>>8; + *((*outbuf)++) = ch&0xff; + } + *outlen -= sizeof(unichar_t); ++char_cnt; } } else { @@ -572,6 +1050,10 @@ return( (size_t) -1 ); **outbuf = '\0'; if ( *outlen>1 ) (*outbuf)[1] = '\0'; + if ( cd->to==e_ucs4 && *outlen>3 ) { + (*outbuf)[2] = '\0'; + (*outbuf)[3] = '\0'; + } } return( char_cnt ); } diff --git a/Unicode/unialt.c b/Unicode/unialt.c index a667ed75..52b85814 100644 --- a/Unicode/unialt.c +++ b/Unicode/unialt.c @@ -3254,15 +3254,15 @@ static const unichar_t str_facb[] = { 0x980b, 0 }; static const unichar_t str_facc[] = { 0x983b, 0 }; static const unichar_t str_facd[] = { 0x9b12, 0 }; static const unichar_t str_face[] = { 0x9f9c, 0 }; -static const unichar_t str_facf[] = { 0x284a, 0 }; -static const unichar_t str_fad0[] = { 0x2844, 0 }; -static const unichar_t str_fad1[] = { 0x33d5, 0 }; +static const unichar_t str_facf[] = { 0x2284a, 0 }; +static const unichar_t str_fad0[] = { 0x22844, 0 }; +static const unichar_t str_fad1[] = { 0x233d5, 0 }; static const unichar_t str_fad2[] = { 0x3b9d, 0 }; static const unichar_t str_fad3[] = { 0x4018, 0 }; static const unichar_t str_fad4[] = { 0x4039, 0 }; -static const unichar_t str_fad5[] = { 0x5249, 0 }; -static const unichar_t str_fad6[] = { 0x5cd0, 0 }; -static const unichar_t str_fad7[] = { 0x7ed3, 0 }; +static const unichar_t str_fad5[] = { 0x25249, 0 }; +static const unichar_t str_fad6[] = { 0x25cd0, 0 }; +static const unichar_t str_fad7[] = { 0x27ed3, 0 }; static const unichar_t str_fad8[] = { 0x9f43, 0 }; static const unichar_t str_fad9[] = { 0x9f8e, 0 }; static const unichar_t str_fb00[] = { 0x0066, 0x0066, 0 }; diff --git a/Unicode/ustring.c b/Unicode/ustring.c index e6bde158..c1a594fd 100644 --- a/Unicode/ustring.c +++ b/Unicode/ustring.c @@ -476,7 +476,7 @@ return( ubuf ); unichar_t *utf82u_strncpy(unichar_t *ubuf,const char *utf8buf,int len) { unichar_t *upt=ubuf, *uend=ubuf+len-1; const uint8 *pt = (const uint8 *) utf8buf, *end = pt+strlen(utf8buf); - int w; + int w, w2; while ( pt<end && *pt!='\0' && upt<uend ) { if ( *pt<=127 ) @@ -487,6 +487,7 @@ unichar_t *utf82u_strncpy(unichar_t *ubuf,const char *utf8buf,int len) { } else if ( *pt<=0xef ) { *upt = ((*pt&0xf)<<12) | ((pt[1]&0x3f)<<6) | (pt[2]&0x3f); pt += 3; +#ifdef UNICHAR_16 } else if ( upt+1<uend ) { /* Um... I don't support surrogates */ w = ( ((*pt&0x7)<<2) | ((pt[1]&0x30)>>4) )-1; @@ -496,6 +497,13 @@ unichar_t *utf82u_strncpy(unichar_t *ubuf,const char *utf8buf,int len) { } else { /* no space for surrogate */ pt += 4; +#else + } else { + w = ( ((*pt&0x7)<<2) | ((pt[1]&0x30)>>4) )-1; + w = (w<<6) | ((pt[1]&0xf)<<2) | ((pt[2]&0x30)>>4); + w2 = ((pt[2]&0xf)<<6) | (pt[3]&0x3f); + *upt = w*0x400 + w2 + 0x10000; +#endif } ++upt; } @@ -507,8 +515,9 @@ unichar_t *utf82u_strcpy(unichar_t *ubuf,const char *utf8buf) { return( utf82u_strncpy(ubuf,utf8buf,strlen(utf8buf)+1)); } -int32 *utf82u32_strncpy(int32 *ubuf,const char *utf8buf,int len) { - int32 *upt=ubuf, *uend=ubuf+len-1; +# ifdef UNICHAR_16 +uint32 *utf82u32_strncpy(uint32 *ubuf,const char *utf8buf,int len) { + uint32 *upt=ubuf, *uend=ubuf+len-1; const uint8 *pt = (const uint8 *) utf8buf; int w, w2; @@ -534,9 +543,9 @@ int32 *utf82u32_strncpy(int32 *ubuf,const char *utf8buf,int len) { return( ubuf ); } -char *u322utf8_strncpy(char *utf8buf, const int32 *ubuf,int len) { +char *u322utf8_strncpy(char *utf8buf, const uint32 *ubuf,int len) { uint8 *pt=(uint8 *) utf8buf, *end=(uint8 *) utf8buf+len-1; - const int32 *upt = ubuf; + const uint32 *upt = ubuf; while ( *upt!='\0' && pt<end ) { if ( *upt<=127 ) @@ -568,7 +577,7 @@ char *u322utf8_strncpy(char *utf8buf, const int32 *ubuf,int len) { return( utf8buf ); } -char *u322utf8_copy(const int32 *ubuf) { +char *u322utf8_copy(const uint32 *ubuf) { int i, len; char *buf; @@ -584,6 +593,7 @@ char *u322utf8_copy(const int32 *ubuf) { buf = galloc(len+1); return( u322utf8_strncpy(buf,ubuf,len+1)); } +#endif unichar_t *utf82u_copyn(const char *utf8buf,int len) { unichar_t *ubuf = galloc((len+1)*sizeof(unichar_t)); @@ -606,17 +616,19 @@ void utf82u_strcat(unichar_t *to,const char *from) { utf82u_strcpy(to+u_strlen(to),from); } -int32 *utf82u32_copy(const char *utf8buf) { +#ifdef UNICHAR_16 +uint32 *utf82u32_copy(const char *utf8buf) { int len; - int32 *ubuf; + uint32 *ubuf; if ( utf8buf==NULL ) return( NULL ); len = strlen(utf8buf); - ubuf = galloc((len+1)*sizeof(int32)); + ubuf = galloc((len+1)*sizeof(uint32)); return( utf82u32_strncpy(ubuf,utf8buf,len+1)); } +#endif char *u2utf8_strcpy(char *utf8buf,const unichar_t *ubuf) { char *pt = utf8buf; @@ -627,6 +639,7 @@ char *u2utf8_strcpy(char *utf8buf,const unichar_t *ubuf) { else if ( *ubuf<0x800 ) { *pt++ = 0xc0 | (*ubuf>>6); *pt++ = 0x80 | (*ubuf&0x3f); +#ifdef UNICHAR_16 } else if ( *ubuf>=0xd800 && *ubuf<0xdc00 && ubuf[1]>=0xdc00 && ubuf[1]<0xe000 ) { int u = ((*ubuf>>6)&0xf)+1, y = ((*ubuf&3)<<4) | ((ubuf[1]>>6)&0xf); *pt++ = 0xf0 | (u>>2); @@ -637,6 +650,19 @@ char *u2utf8_strcpy(char *utf8buf,const unichar_t *ubuf) { *pt++ = 0xe0 | (*ubuf>>12); *pt++ = 0x80 | ((*ubuf>>6)&0x3f); *pt++ = 0x80 | (*ubuf&0x3f); +#else + } else if ( *ubuf < 0x10000 ) { + *pt++ = 0xe0 | (*ubuf>>12); + *pt++ = 0x80 | ((*ubuf>>6)&0x3f); + *pt++ = 0x80 | (*ubuf&0x3f); + } else { + uint32 val = *ubuf-0x10000; + int u = ((val&0xf0000)>>16)+1, z=(val&0x0f000)>>12, y=(val&0x00fc0)>>6, x=val&0x0003f; + *pt++ = 0xf0 | (u>>2); + *pt++ = 0x80 | ((u&3)<<4) | z; + *pt++ = 0x80 | y; + *pt++ = 0x80 | x; +#endif } ++ubuf; } |