summaryrefslogtreecommitdiff
path: root/Unicode
diff options
context:
space:
mode:
authorpfaedit <pfaedit>2007-05-28 14:13:18 +0000
committerpfaedit <pfaedit>2007-05-28 14:13:18 +0000
commitd6691effa993ea2c8afef2579124b9216bafb0fe (patch)
tree3099154a9332927a065e9a388b9bdf8a0850571f /Unicode
parentd1aae3c0ebdd90f5c00953d43e1427ee4b38df8f (diff)
Switch from USC2 to USC4 internally.
Diffstat (limited to 'Unicode')
-rw-r--r--Unicode/Makefile.dynamic.in4
-rw-r--r--Unicode/Makefile.in4
-rw-r--r--Unicode/backtrns.c26
-rw-r--r--Unicode/cjk.c32
-rw-r--r--Unicode/dump.c26
-rw-r--r--Unicode/gwwiconv.c510
-rw-r--r--Unicode/unialt.c12
-rw-r--r--Unicode/ustring.c44
8 files changed, 583 insertions, 75 deletions
diff --git a/Unicode/Makefile.dynamic.in b/Unicode/Makefile.dynamic.in
index 499d52e4..ec2966c3 100644
--- a/Unicode/Makefile.dynamic.in
+++ b/Unicode/Makefile.dynamic.in
@@ -7,8 +7,8 @@ libdir = @libdir@
VPATH = @srcdir@
bindir = @bindir@
-GU_VERSION=2
-GU_REVISION=3
+GU_VERSION=3
+GU_REVISION=0
GU_AGE=0
LIBTOOL = @LIBTOOL@
diff --git a/Unicode/Makefile.in b/Unicode/Makefile.in
index 499d52e4..ec2966c3 100644
--- a/Unicode/Makefile.in
+++ b/Unicode/Makefile.in
@@ -7,8 +7,8 @@ libdir = @libdir@
VPATH = @srcdir@
bindir = @bindir@
-GU_VERSION=2
-GU_REVISION=3
+GU_VERSION=3
+GU_REVISION=0
GU_AGE=0
LIBTOOL = @LIBTOOL@
diff --git a/Unicode/backtrns.c b/Unicode/backtrns.c
index 1c1a2b96..f9402da5 100644
--- a/Unicode/backtrns.c
+++ b/Unicode/backtrns.c
@@ -20,18 +20,18 @@ static const unsigned long unicode_backtrans_0[] = {
0x000001, 0x000001, 0x000001, 0x000001, 0x000001, 0x000001, 0x000001, 0x000001,
0x000001, 0x000001, 0x000001, 0x000001, 0x000001, 0x000001, 0x000001, 0x000001,
0x000001, 0x000001, 0x000001, 0x000001, 0x000001, 0x000001, 0x000001, 0x000001,
- 0x037fff, 0x8c32101, 0x2032981, 0x20339c5, 0x9c109af, 0x203a181, 0x4309c1, 0xfa33bdf,
- 0xda301cf, 0x4779c1, 0x8c32101, 0x88329c1, 0x0729c1, 0x033bff, 0x473981, 0x6432309,
- 0xfa76bcf, 0xfa729c1, 0x88369c5, 0x88329c5, 0x8a3018f, 0x032985, 0x8a33981, 0x60369c5,
- 0x8c3018f, 0x8832981, 0x8c32101, 0x88329c1, 0x8830981, 0x88309c5, 0x8830981, 0x8c32101,
+ 0x037fff, 0x8c32101, 0x2032981, 0x20339c5, 0x9c109af, 0x203a181, 0x4109c1, 0xfa33bdf,
+ 0xda301cf, 0x4779c1, 0x8c32101, 0x88329c1, 0x0729c1, 0x013bff, 0x473981, 0x6432309,
+ 0xfa76bcf, 0xfa729c1, 0x88169c5, 0x88129c5, 0x8a3018f, 0x032985, 0x8a33981, 0x60369c5,
+ 0x8c3018f, 0x8812981, 0x8c32101, 0x88329c1, 0x8810981, 0x88109c5, 0x8810981, 0x8c32101,
0x4433105, 0x443330f, 0x43330f, 0x433309, 0x433b0f, 0x433b09, 0x8c33b09, 0x433107,
0x4433105, 0x4433b0f, 0x4433105, 0x43330f, 0x433105, 0x43330f, 0x43330f, 0x433305,
- 0x8832201, 0x433105, 0x4433105, 0x4433b07, 0x43330f, 0x433b09, 0x433b0f, 0xfa7298f,
- 0x8c33b09, 0x433105, 0x433b0f, 0x433b0d, 0x433b0f, 0x433203, 0x8c32201, 0x8c33b0f,
+ 0x8812201, 0x433105, 0x4433105, 0x4433b07, 0x43330f, 0x433b09, 0x433b0f, 0xfa5298f,
+ 0x8c33b09, 0x433105, 0x433b0f, 0x433b0d, 0x433b0f, 0x413203, 0x8c12201, 0x8c33b0f,
0x5433105, 0x543330f, 0x43330f, 0x433309, 0x433b0f, 0x433b09, 0x8c33b09, 0x433107,
0x5433105, 0x5433b0f, 0x5433105, 0x43330f, 0x5433105, 0x543330f, 0x43330f, 0x433305,
- 0x8c32201, 0x433105, 0x5433105, 0x5433b07, 0x43330f, 0x433b09, 0x433b0f, 0xfa7698f,
- 0xcc33b09, 0x5433105, 0x5433b0f, 0x433b0d, 0x5433b0f, 0x433203, 0x8c32201, 0x433101
+ 0x8c12201, 0x433105, 0x5433105, 0x5433b07, 0x43330f, 0x433b09, 0x433b0f, 0xfa7698f,
+ 0xcc33b09, 0x5433105, 0x5433b0f, 0x433b0d, 0x5433b0f, 0x413203, 0x8c12201, 0x433101
};
static const unsigned long unicode_backtrans_1[] = {
@@ -43,14 +43,14 @@ static const unsigned long unicode_backtrans_1[] = {
0x400208, 0x400208, 0x400a08, 0x5400a08, 0x000000, 0x000000, 0x400a08, 0x400208,
0x400104, 0x8c20104, 0x8c00000, 0x8c00000, 0x400004, 0x400004, 0x400a08, 0x400a08,
0x8c00208, 0x400002, 0x400002, 0x400a08, 0x400a08, 0x400002, 0x400002, 0x8c00000,
- 0x8c00000, 0x8c20802, 0x8c20802, 0x400802, 0x1400802, 0x400a08, 0x400a08, 0x400002,
+ 0x8c00000, 0x8c00802, 0x8c00802, 0x400802, 0x1400802, 0x400a08, 0x400a08, 0x400002,
0x1400002, 0x8c00000, 0x8c00208, 0xcc00208, 0x4400a08, 0x5400a08, 0x000000, 0x000000,
0x400002, 0x400002, 0x8c32000, 0xcc32000, 0x400002, 0x400002, 0x400808, 0x400808,
0x400002, 0x400002, 0x400002, 0x400002, 0x400004, 0x400004, 0x400106, 0x400106,
- 0x432a0a, 0x432a0a, 0x400002, 0x400002, 0x400002, 0x400002, 0x8c00208, 0x8c00208,
+ 0x412a0a, 0x412a0a, 0x400002, 0x400002, 0x400002, 0x400002, 0x8c00208, 0x8c00208,
0x400208, 0x400208, 0x400208, 0x5400208, 0x400004, 0x400004, 0x400002, 0x400002,
0x400002, 0x400002, 0x400a08, 0x400a08, 0x401000, 0x401000, 0x401000, 0x401000,
- 0x433000, 0x400802, 0x400802, 0x400806, 0x400806, 0x422a0a, 0x422a0a, 0x000000,
+ 0x433000, 0x400802, 0x400802, 0x400806, 0x400806, 0x402a0a, 0x402a0a, 0x000000,
0x000000, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000,
0x000000, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000,
0x000000, 0x000000, 0x070000, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000,
@@ -126,7 +126,7 @@ static const unsigned long unicode_backtrans_3[] = {
0x400040, 0xfa40040, 0xfa40040, 0xfa40040, 0xfa40040, 0xfa40040, 0xfa40040, 0xfa40040,
0xfa40040, 0xfa40040, 0xfa40040, 0xfa40040, 0xfa40040, 0xfa40040, 0xfa40040, 0xfa40040,
0xfa40040, 0xfa40040, 0x000000, 0xfa40040, 0xfa40040, 0xfa40040, 0xfa40040, 0xfa40040,
- 0xfa40040, 0xfa40040, 0x400040, 0x400040, 0x400040, 0x400040, 0x400040, 0x400040,
+ 0xfa40040, 0xfa60040, 0x400040, 0x400040, 0x400040, 0x400040, 0x400040, 0x400040,
0x400040, 0xfa40040, 0xfa40040, 0xfa40040, 0xfa40040, 0xfa40040, 0xfa40040, 0xfa40040,
0xfa40040, 0xfa40040, 0xfa40040, 0xfa40040, 0xfa40040, 0xfa40040, 0xfa40040, 0xfa40040,
0xfa60040, 0xfa40040, 0x440040, 0xfa40040, 0xfa40040, 0xfa40040, 0xfa40040, 0xfa40040,
@@ -354,7 +354,7 @@ static const unsigned long unicode_backtrans_21[] = {
0x000000, 0xe800000, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000,
0x000000, 0x040000, 0x000000, 0x8800000, 0x000000, 0x000000, 0xdc00010, 0x000000,
0x040000, 0x000000, 0x000000, 0x000000, 0x040000, 0x000000, 0x000000, 0x000000,
- 0x000000, 0xc800000, 0x8c70000, 0x000000, 0x000000, 0x000000, 0x8820000, 0x000000,
+ 0x000000, 0xc800000, 0x8c70000, 0x000000, 0x000000, 0x000000, 0x8800000, 0x000000,
0x000000, 0x000000, 0x000000, 0x8a00000, 0x000000, 0x000000, 0x000000, 0x000000,
0x000000, 0x000000, 0x000000, 0x000000, 0x000000, 0x040000, 0x000000, 0x000000,
0x000000, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000,
diff --git a/Unicode/cjk.c b/Unicode/cjk.c
index 963545c9..84bdf121 100644
--- a/Unicode/cjk.c
+++ b/Unicode/cjk.c
@@ -1107,8 +1107,8 @@ const unichar_t unicode_from_jis208[] = {
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x500d, 0x4204, 0x0a14, 0x4213,
- 0x9fa5, 0x0000, 0x1c2a, 0x0000, 0xea08, 0xbfff, 0xf04f, 0x4204
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x9fa5, 0x1c2a, 0xbfffe018, 0x4204f04f,
+ 0x804ef70, 0x0000, 0x2288, 0x804f240, 0x42130a14, 0x40015360, 0xbfffe038, 0x804bcc9
};
const unichar_t unicode_from_jis212[] = {
@@ -2216,8 +2216,8 @@ const unichar_t unicode_from_jis212[] = {
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x2003, 0x3001, 0x3002, 0xff0c, 0xff0e, 0x30fb, 0xff1a, 0xff1b
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x2003, 0x3001, 0x3002, 0xff0c,
+ 0xff0e, 0x30fb, 0xff1a, 0xff1b, 0xff1f, 0xff01, 0x309b, 0x309c
};
static const unsigned short jis_from_unicode_0[] = {
@@ -5804,7 +5804,7 @@ static const unsigned short * const jis_from_unicode_[] = {
jis_from_unicode_ff
};
-struct charmap2 jis_from_unicode = { 0, 255, (unsigned short **) jis_from_unicode_, (unsigned short *) unicode_from_jis212 };
+struct charmap2 jis_from_unicode = { 0, 255, (unsigned short **) jis_from_unicode_, (unichar_t *) unicode_from_jis212 };
const unichar_t unicode_from_big5[] = {
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
@@ -8879,7 +8879,7 @@ const unichar_t unicode_from_big5[] = {
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0xfe4f, 0x0000, 0xffff, 0xffff, 0x00ff, 0x0000, 0x0000, 0x0000
+ 0xfe4f, 0xffffffff, 0x00ff, 0x0000, 0x0100, 0x0002, 0x6000, 0x804f240
};
static const unsigned short big5_from_unicode_0[] = {
@@ -12711,7 +12711,7 @@ static const unsigned short * const big5_from_unicode_[] = {
big5_from_unicode_ff
};
-struct charmap2 big5_from_unicode = { 0, 255, (unsigned short **) big5_from_unicode_, (unsigned short *) unicode_from_big5 };
+struct charmap2 big5_from_unicode = { 0, 255, (unsigned short **) big5_from_unicode_, (unichar_t *) unicode_from_big5 };
const unichar_t unicode_from_big5hkscs[] = {
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
@@ -16810,7 +16810,7 @@ const unichar_t unicode_from_big5hkscs[] = {
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x79d4, 0x0000, 0xfefe, 0x0000, 0x00ff, 0x0000, 0x0000, 0x0000
+ 0x79d4, 0xfefe, 0x00ff, 0x0000, 0x0100, 0x0005, 0x8000, 0x804f240
};
static const unsigned short big5hkscs_from_unicode_0[] = {
@@ -22532,7 +22532,7 @@ static const unsigned short * const big5hkscs_from_unicode_[] = {
big5hkscs_from_unicode_ff
};
-struct charmap2 big5hkscs_from_unicode = { 0, 255, (unsigned short **) big5hkscs_from_unicode_, (unsigned short *) unicode_from_big5hkscs };
+struct charmap2 big5hkscs_from_unicode = { 0, 255, (unsigned short **) big5hkscs_from_unicode_, (unichar_t *) unicode_from_big5hkscs };
const unichar_t unicode_from_ksc5601[] = {
0x3164, 0x3001, 0x3002, 0x30fb, 0x2025, 0x22ef, 0x00a8, 0x3003,
@@ -23639,8 +23639,8 @@ const unichar_t unicode_from_ksc5601[] = {
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xffffffff,
+ 0xffffffff, 0xffffffff, 0x0000, 0x0000, 0x79d4, 0x0004, 0x2288, 0x804f240
};
static unsigned short ksc5601_from_unicode_0[] = {
@@ -28907,7 +28907,7 @@ static const unsigned short * const ksc5601_from_unicode_[] = {
ksc5601_from_unicode_ff
};
-struct charmap2 ksc5601_from_unicode = { 0, 255, (unsigned short **) ksc5601_from_unicode_, (unsigned short *) unicode_from_ksc5601 };
+struct charmap2 ksc5601_from_unicode = { 0, 255, (unsigned short **) ksc5601_from_unicode_, (unichar_t *) unicode_from_ksc5601 };
const unichar_t unicode_from_johab[] = {
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
@@ -38145,7 +38145,7 @@ static const unsigned short * const johab_from_unicode_[] = {
johab_from_unicode_ff
};
-struct charmap2 johab_from_unicode = { 0, 255, (unsigned short **) johab_from_unicode_, (unsigned short *) unicode_from_johab };
+struct charmap2 johab_from_unicode = { 0, 255, (unsigned short **) johab_from_unicode_, (unichar_t *) unicode_from_johab };
const unichar_t unicode_from_gb2312[] = {
0x3000, 0x3001, 0x3002, 0x30fb, 0x02c9, 0x02c7, 0x00a8, 0x3003,
@@ -39252,8 +39252,8 @@ const unichar_t unicode_from_gb2312[] = {
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff,
- 0x9f44, 0x0000, 0xffff, 0xffff, 0x00ff, 0x0000, 0x0000, 0x0000
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x9f44, 0xffffffff, 0x00ff, 0x0000,
+ 0x0100, 0x0003, 0x2288, 0x804f240, 0x0100, 0x0005, 0x00f8, 0x42130a14
};
static unsigned short gb2312_from_unicode_0[] = {
@@ -43015,5 +43015,5 @@ static const unsigned short * const gb2312_from_unicode_[] = {
gb2312_from_unicode_ff
};
-struct charmap2 gb2312_from_unicode = { 0, 255, (unsigned short **) gb2312_from_unicode_, (unsigned short *) unicode_from_gb2312 };
+struct charmap2 gb2312_from_unicode = { 0, 255, (unsigned short **) gb2312_from_unicode_, (unichar_t *) unicode_from_gb2312 };
diff --git a/Unicode/dump.c b/Unicode/dump.c
index e2d9808d..5bd76ddb 100644
--- a/Unicode/dump.c
+++ b/Unicode/dump.c
@@ -367,7 +367,7 @@ static void dumpjis(FILE *output,FILE *header) {
continue;
}
if ( table[_unicode>>8]==NULL )
- table[_unicode>>8] = calloc(256,2);
+ table[_unicode>>8] = calloc(256,sizeof(unichar_t));
table[_unicode>>8][_unicode&0xff] = _orig;
_orig -= 0x2121;
_orig = (_orig>>8)*94 + (_orig&0xff);
@@ -406,7 +406,7 @@ static void dumpjis(FILE *output,FILE *header) {
continue;
}
if ( table[_unicode>>8]==NULL )
- table[_unicode>>8] = calloc(256,2);
+ table[_unicode>>8] = calloc(256,sizeof(unichar_t));
if ( table[_unicode>>8][_unicode&0xff]==0 )
table[_unicode>>8][_unicode&0xff] = _orig|0x8000;
else
@@ -471,7 +471,7 @@ static void dumpjis(FILE *output,FILE *header) {
fprintf( output, " u_allzeros,\n" );
fprintf( output, "};\n\n" );
fprintf( header, "extern struct charmap2 jis_from_unicode;\n" );
- fprintf( output, "struct charmap2 jis_from_unicode = { %d, %d, (unsigned short **) jis_from_unicode_, (unsigned short *) unicode_from_%s };\n\n",
+ fprintf( output, "struct charmap2 jis_from_unicode = { %d, %d, (unsigned short **) jis_from_unicode_, (unichar_t *) unicode_from_%s };\n\n",
first, last, cjknames[j]);
for ( k=first; k<=last; ++k )
@@ -528,7 +528,7 @@ static void dumpbig5(FILE *output,FILE *header) {
}
unicode[_orig-0xa100] = _unicode;
if ( table[_unicode>>8]==NULL )
- table[_unicode>>8] = calloc(256,2);
+ table[_unicode>>8] = calloc(256,sizeof(unichar_t));
table[_unicode>>8][_unicode&0xff] = _orig;
if ( used[_unicode>>8]==NULL ) {
used[_unicode>>8] = calloc(256,sizeof(long));
@@ -572,7 +572,7 @@ static void dumpbig5(FILE *output,FILE *header) {
fprintf( output, " u_allzeros,\n" );
fprintf( output, "};\n\n" );
fprintf( header, "extern struct charmap2 %s_from_unicode;\n", cjknames[j]);
- fprintf( output, "struct charmap2 %s_from_unicode = { %d, %d, (unsigned short **) %s_from_unicode_, (unsigned short *) unicode_from_%s };\n\n",
+ fprintf( output, "struct charmap2 %s_from_unicode = { %d, %d, (unsigned short **) %s_from_unicode_, (unichar_t *) unicode_from_%s };\n\n",
cjknames[j], first, last, cjknames[j], cjknames[j]);
for ( k=first; k<=last; ++k )
@@ -610,7 +610,7 @@ static void dumpbig5hkscs(FILE *output,FILE *header) {
}
unicode[_orig-0x8100] = _unicode;
if ( table[_unicode>>8]==NULL )
- table[_unicode>>8] = calloc(256,2);
+ table[_unicode>>8] = calloc(256,sizeof(unichar_t));
table[_unicode>>8][_unicode&0xff] = _orig;
if ( used[_unicode>>8]==NULL ) {
used[_unicode>>8] = calloc(256,sizeof(long));
@@ -654,7 +654,7 @@ static void dumpbig5hkscs(FILE *output,FILE *header) {
fprintf( output, " u_allzeros,\n" );
fprintf( output, "};\n\n" );
fprintf( header, "extern struct charmap2 %s_from_unicode;\n", cjknames[j]);
- fprintf( output, "struct charmap2 %s_from_unicode = { %d, %d, (unsigned short **) %s_from_unicode_, (unsigned short *) unicode_from_%s };\n\n",
+ fprintf( output, "struct charmap2 %s_from_unicode = { %d, %d, (unsigned short **) %s_from_unicode_, (unichar_t *) unicode_from_%s };\n\n",
cjknames[j], first, last, cjknames[j], cjknames[j]);
for ( k=first; k<=last; ++k )
@@ -702,7 +702,7 @@ static void dumpWansung(FILE *output,FILE *header) {
}
if ( _orig>=0x2121 && (_orig&0xff)>=0x21 && _orig<=0x7e7e && (_orig&0xff)<=0x7e ) {
if ( table[_unicode>>8]==NULL )
- table[_unicode>>8] = calloc(256,2);
+ table[_unicode>>8] = calloc(256,sizeof(unichar_t));
table[_unicode>>8][_unicode&0xff] = _orig;
_orig -= 0x2121;
_orig = (_orig>>8)*94 + (_orig&0xff);
@@ -718,7 +718,7 @@ static void dumpWansung(FILE *output,FILE *header) {
}
if ( _johab>=0x8431 && _johab<=0xf9fe ) {
if ( jtable[_unicode>>8]==NULL )
- jtable[_unicode>>8] = calloc(256,2);
+ jtable[_unicode>>8] = calloc(256,sizeof(unichar_t));
jtable[_unicode>>8][_unicode&0xff] = _johab;
_johab -= 0x8400;
junicode[_johab] = _unicode;
@@ -766,7 +766,7 @@ static void dumpWansung(FILE *output,FILE *header) {
fprintf( output, " u_allzeros,\n" );
fprintf( output, "};\n\n" );
fprintf( header, "extern struct charmap2 %s_from_unicode;\n", cjknames[j]);
- fprintf( output, "struct charmap2 %s_from_unicode = { %d, %d, (unsigned short **) %s_from_unicode_, (unsigned short *) unicode_from_%s };\n\n",
+ fprintf( output, "struct charmap2 %s_from_unicode = { %d, %d, (unsigned short **) %s_from_unicode_, (unichar_t *) unicode_from_%s };\n\n",
cjknames[j], first, last, cjknames[j], cjknames[j]);
if ( first==-1 )
@@ -812,7 +812,7 @@ static void dumpWansung(FILE *output,FILE *header) {
fprintf( output, " u_allzeros,\n" );
fprintf( output, "};\n\n" );
fprintf( header, "extern struct charmap2 johab_from_unicode;\n" );
- fprintf( output, "struct charmap2 johab_from_unicode = { %d, %d, (unsigned short **) johab_from_unicode_, (unsigned short *) unicode_from_johab };\n\n",
+ fprintf( output, "struct charmap2 johab_from_unicode = { %d, %d, (unsigned short **) johab_from_unicode_, (unichar_t *) unicode_from_johab };\n\n",
first, last );
if ( first==-1 )
@@ -855,7 +855,7 @@ static void dumpgb2312(FILE *output,FILE *header) {
continue;
}
if ( table[_unicode>>8]==NULL )
- table[_unicode>>8] = calloc(256,2);
+ table[_unicode>>8] = calloc(256,sizeof(unichar_t));
table[_unicode>>8][_unicode&0xff] = _orig;
_orig -= 0x2121;
_orig = (_orig>>8)*94 + (_orig&0xff);
@@ -902,7 +902,7 @@ static void dumpgb2312(FILE *output,FILE *header) {
fprintf( output, " u_allzeros,\n" );
fprintf( output, "};\n\n" );
fprintf( header, "extern struct charmap2 %s_from_unicode;\n", cjknames[j]);
- fprintf( output, "struct charmap2 %s_from_unicode = { %d, %d, (unsigned short **) %s_from_unicode_, (unsigned short *) unicode_from_%s };\n\n",
+ fprintf( output, "struct charmap2 %s_from_unicode = { %d, %d, (unsigned short **) %s_from_unicode_, (unichar_t *) unicode_from_%s };\n\n",
cjknames[j], first, last, cjknames[j], cjknames[j]);
if ( first==-1 )
diff --git a/Unicode/gwwiconv.c b/Unicode/gwwiconv.c
index bd5360ca..0a4c5512 100644
--- a/Unicode/gwwiconv.c
+++ b/Unicode/gwwiconv.c
@@ -35,7 +35,8 @@
#ifndef HAVE_ICONV_H
-/* I have written an limited iconv which will convert either to or from UCS2 */
+/* I have written an limited iconv which will convert either to or from unichar_t */
+/* (either UCS2 or UCS4) */
/* it will not convert latin1 to latin2, but latin1->UCS2, UCS2->latin2 */
/* it uses the encodings built into libgunicode for systems with no iconv */
/* (ie. macs before 10.3, perhaps others) */
@@ -66,6 +67,10 @@ static enum encoding name_to_enc(const char *encname) {
{ "UCS-2-INTERNAL", e_unicode },
{ "ISO-10646/UCS2", e_unicode },
{ "ISO-10646/USC2", e_unicode }, /* Old typo */
+ { "UCS4", e_ucs4 },
+ { "UCS-4", e_ucs4 },
+ { "UCS-4-INTERNAL", e_ucs4 },
+ { "ISO-10646/UCS4", e_ucs4 },
{ "iso8859-1", e_iso8859_1 },
{ "iso8859-2", e_iso8859_2 },
{ "iso8859-3", e_iso8859_3 },
@@ -132,9 +137,15 @@ gww_iconv_t gww_iconv_open(const char *toenc,const char *fromenc) {
if ( stuff.from==(enum encoding) -1 || stuff.to==(enum encoding) -1 ) {
/*fprintf( stderr, "Unknown encoding\n" );*/
return( (iconv_t)(-1) );
+#ifdef UNICHAR_16
} else if ( stuff.from!=e_unicode && stuff.to!=e_unicode ) {
fprintf( stderr, "Bad call to gww_iconv_open, neither arg is UCS2\n" );
return( (iconv_t)(-1) );
+#else
+ } else if ( stuff.from!=e_ucs4 && stuff.to!=e_ucs4 ) {
+ fprintf( stderr, "Bad call to gww_iconv_open, neither arg is UCS4\n" );
+return( (iconv_t)(-1) );
+#endif
}
ret = galloc(sizeof(struct gww_iconv_t));
@@ -168,11 +179,34 @@ return( (size_t) -1 );
int min = *inlen < *outlen ? *inlen : *outlen;
min &= ~1;
memcpy(*inbuf,*outbuf,min);
- char_cnt = min/2;
+ char_cnt = min/sizeof(short);
*inbuf += min; *outbuf += min;
*inlen -= min; *outlen -= min;
if ( *inlen==1 && *outlen>0 )
return( (size_t) -1 ); /* Incomplete multi-byte sequence */
+ } else if ( cd->to==e_ucs4 ) {
+ int min = *inlen/sizeof(short) < *outlen/sizeof(int32) ? *inlen/sizeof(short) : *outlen/sizeof(int32);
+ int highch, lowch;
+ if ( endian == end_little ) {
+ while ( *inlen>=sizeof(short) && *outlen>=sizeof(int32) ) {
+ highch = ((unsigned char *) *inbuf)[1], lowch = *(unsigned char *) *inbuf;
+ ((uint8 *) outbuf)[3] = 0; ((uint8 *) outbuf)[2] = 0;
+ ((uint8 *) outbuf)[1] = highch; ((uint8 *) outbuf)[0] = lowch;
+ outbuf += sizeof(int32); inbuf += sizeof(short);
+ *outlen -= sizeof(int32); *inlen -= sizeof(short);
+ }
+ } else {
+ while ( *inlen>=sizeof(short) && *outlen>=sizeof(int32) ) {
+ highch = ((unsigned char *) *inbuf)[0], lowch = ((unsigned char *) *inbuf)[1];
+ ((uint8 *) outbuf)[0] = 0; ((uint8 *) outbuf)[1] = 0;
+ ((uint8 *) outbuf)[2] = highch; ((uint8 *) outbuf)[3] = lowch;
+ outbuf += sizeof(int32); inbuf += sizeof(short);
+ *outlen -= sizeof(int32); *inlen -= sizeof(short);
+ }
+ }
+ char_cnt = min;
+ if ( *inlen==1 && *outlen>0 )
+return( (size_t) -1 ); /* Incomplete multi-byte sequence */
} else if ( cd->to<e_first2byte ) {
struct charmap *table = NULL;
table = alphabets_from_unicode[cd->to];
@@ -198,7 +232,7 @@ return( (size_t) -1 );
struct charmap2 *table = cd->to==e_johab ? &johab_from_unicode :
cd->to==e_big5 ? &big5_from_unicode :
&big5hkscs_from_unicode;
- unichar_t *plane;
+ unsigned short *plane;
while ( *inlen>1 && *outlen>0 ) {
int highch, lowch;
if ( endian == end_little ) {
@@ -229,7 +263,7 @@ return( (size_t) -1 );
} else if ( cd->to==e_wansung || cd->to==e_jisgbpk ) {
struct charmap2 *table = cd->to==e_wansung ? &ksc5601_from_unicode :
&gb2312_from_unicode;
- unichar_t *plane;
+ unsigned short *plane;
while ( *inlen>1 && *outlen>0 ) {
int highch, lowch;
if ( endian == end_little ) {
@@ -259,7 +293,7 @@ return( (size_t) -1 );
}
} else if ( cd->to==e_sjis ) {
unsigned char *plane1;
- unichar_t *plane;
+ unsigned short *plane;
while ( *inlen>1 && *outlen>0 ) {
int highch, lowch;
if ( endian == end_little ) {
@@ -298,7 +332,7 @@ return( (size_t) -1 );
struct charmap2 *table = cd->to==e_jisgb ? &gb2312_from_unicode :
cd->to==e_jiskorean ? &ksc5601_from_unicode :
&jis_from_unicode;
- unichar_t *plane;
+ unsigned short *plane;
while ( *inlen>1 && *outlen>1 ) {
int highch, lowch;
if ( endian == end_little ) {
@@ -359,6 +393,230 @@ return( (size_t) -1 );
fprintf( stderr, "Unexpected encoding\n" );
return( (size_t) -1 );
}
+ } else if ( cd->from==e_ucs4 ) {
+ if ( cd->to==e_unicode ) {
+ int min = *inlen/sizeof(int32) < *outlen/sizeof(int16) ? *inlen/sizeof(int32) : *outlen/sizeof(int16);
+ int highch, lowch;
+ if ( endian == end_little ) {
+ while ( *inlen>=sizeof(short) && *outlen>=sizeof(int32) ) {
+ highch = ((unsigned char *) *inbuf)[1], lowch = *(unsigned char *) *inbuf;
+ ((uint8 *) outbuf)[1] = highch; ((uint8 *) outbuf)[0] = lowch;
+ outbuf += sizeof(int16); inbuf += sizeof(int32);
+ *outlen -= sizeof(int16); *inlen -= sizeof(int32);
+ }
+ } else {
+ while ( *inlen>=sizeof(short) && *outlen>=sizeof(int32) ) {
+ highch = ((unsigned char *) *inbuf)[2], lowch = ((unsigned char *) *inbuf)[3];
+ ((uint8 *) outbuf)[0] = highch; ((uint8 *) outbuf)[1] = lowch;
+ outbuf += sizeof(int16); inbuf += sizeof(int32);
+ *outlen -= sizeof(int16); *inlen -= sizeof(int32);
+ }
+ }
+ char_cnt = min;
+ if ( *inlen>0 && *outlen>0 )
+return( (size_t) -1 ); /* Incomplete multi-byte sequence */
+ } else if ( cd->to<e_first2byte ) {
+ struct charmap *table = NULL;
+ table = alphabets_from_unicode[cd->to];
+ while ( *inlen>1 && *outlen>0 ) {
+ int highch, lowch;
+ if ( endian == end_little ) {
+ highch = ((unsigned char *) *inbuf)[1], lowch = *(unsigned char *) *inbuf;
+ } else {
+ highch = ((unsigned char *) *inbuf)[2], lowch = ((unsigned char *) *inbuf)[3];
+ }
+ if ( highch>=table->first && highch<=table->last &&
+ (plane = table->table[highch])!=NULL &&
+ (ch=plane[lowch])!=0 ) {
+ *((*outbuf)++) = ch;
+ -- *outlen;
+ *inlen -= 4;
+ *inbuf += 4;
+ ++char_cnt;
+ } else
+return( (size_t) -1 );
+ }
+ } else if ( cd->to==e_johab || cd->to==e_big5 || cd->to==e_big5hkscs ) {
+ struct charmap2 *table = cd->to==e_johab ? &johab_from_unicode :
+ cd->to==e_big5 ? &big5_from_unicode :
+ &big5hkscs_from_unicode;
+ unsigned short *plane;
+ while ( *inlen>1 && *outlen>0 ) {
+ int highch, lowch;
+ if ( endian == end_little ) {
+ highch = ((unsigned char *) *inbuf)[1], lowch = *(unsigned char *) *inbuf;
+ } else {
+ highch = ((unsigned char *) *inbuf)[2], lowch = ((unsigned char *) *inbuf)[3];
+ }
+ if ( highch==0 && lowch<=0x80 ) {
+ *((*outbuf)++) = highch;
+ --*outlen;
+ *inlen-=4;
+ *inbuf+=4;
+ ++char_cnt;
+ } else if ( *outlen==1 )
+return( (size_t) -1 );
+ else if ( highch>=table->first && highch<=table->last &&
+ (plane = table->table[highch])!=NULL &&
+ (ch=plane[lowch])!=0 ) {
+ *((*outbuf)++) = (ch>>8);
+ *((*outbuf)++) = (ch&0xff);
+ *outlen -= 2;
+ *inlen -= 4;
+ *inbuf += 4;
+ ++char_cnt;
+ } else
+return( (size_t) -1 );
+ }
+ } else if ( cd->to==e_wansung || cd->to==e_jisgbpk ) {
+ struct charmap2 *table = cd->to==e_wansung ? &ksc5601_from_unicode :
+ &gb2312_from_unicode;
+ unsigned short *plane;
+ while ( *inlen>1 && *outlen>0 ) {
+ int highch, lowch;
+ if ( endian == end_little ) {
+ highch = ((unsigned char *) *inbuf)[1], lowch = *(unsigned char *) *inbuf;
+ } else {
+ highch = ((unsigned char *) *inbuf)[2], lowch = ((unsigned char *) *inbuf)[3];
+ }
+ if ( highch==0 && lowch<=0x80 ) {
+ *((*outbuf)++) = lowch;
+ --*outlen;
+ *inlen-=4;
+ *inbuf+=4;
+ ++char_cnt;
+ } else if ( *outlen==1 )
+return( (size_t) -1 );
+ else if ( highch>=table->first && highch<=table->last &&
+ (plane = table->table[highch])!=NULL &&
+ (ch=plane[lowch])!=0 ) {
+ *((*outbuf)++) = (ch>>8)+0x80;
+ *((*outbuf)++) = (ch&0xff)+0x80;
+ *outlen -= 2;
+ *inlen -= 4;
+ *inbuf += 4;
+ ++char_cnt;
+ } else
+return( (size_t) -1 );
+ }
+ } else if ( cd->to==e_sjis ) {
+ unsigned char *plane1;
+ unsigned short *plane;
+ while ( *inlen>1 && *outlen>0 ) {
+ int highch, lowch;
+ if ( endian == end_little ) {
+ highch = ((unsigned char *) *inbuf)[1], lowch = *(unsigned char *) *inbuf;
+ } else {
+ highch = ((unsigned char *) *inbuf)[2], lowch = ((unsigned char *) *inbuf)[3];
+ }
+ if (( highch>=jis201_from_unicode.first && highch<=jis201_from_unicode.last &&
+ (plane1 = jis201_from_unicode.table[highch-jis201_from_unicode.first])!=NULL &&
+ (ch=plane1[lowch])!=0 ) ||
+ ( highch==0 && (ch=lowch)<' ' )) { /* control chars not mapped in jis201 */
+ *((*outbuf)++) = ch;
+ --*outlen;
+ *inlen-=4;
+ *inbuf+=4;
+ ++char_cnt;
+ } else if ( *outlen==1 )
+return( (size_t) -1 );
+ else if ( highch>=jis_from_unicode.first && highch<=jis_from_unicode.last &&
+ (plane = jis_from_unicode.table[highch-jis_from_unicode.first])!=NULL &&
+ (ch=plane[lowch])!=0 && ch<0x8000 ) { /* no jis212 */
+ int j1 = ch>>8, j2 = ch&0xff;
+ int ro = j1<95 ? 112 : 176;
+ int co = (j1&1) ? (j2>95?32:31) : 126;
+ *((*outbuf)++) = ((j1+1)>>1)+ro;
+ *((*outbuf)++) = j2+co;
+ *outlen -= 2;
+ *inlen -= 4;
+ *inbuf += 4;
+ ++char_cnt;
+ } else
+return( (size_t) -1 );
+ }
+ } else if ( cd->to==e_jis || cd->to==e_jis2 ||
+ cd->to==e_jiskorean || cd->to==e_jisgb ) {
+ struct charmap2 *table = cd->to==e_jisgb ? &gb2312_from_unicode :
+ cd->to==e_jiskorean ? &ksc5601_from_unicode :
+ &jis_from_unicode;
+ unsigned short *plane;
+ while ( *inlen>1 && *outlen>1 ) {
+ int highch, lowch;
+ if ( endian == end_little ) {
+ highch = ((unsigned char *) *inbuf)[1], lowch = *(unsigned char *) *inbuf;
+ } else {
+ highch = ((unsigned char *) *inbuf)[2], lowch = ((unsigned char *) *inbuf)[3];
+ }
+ if ( highch>=table->first && highch<=table->last &&
+ (plane = table->table[highch])!=NULL &&
+ (ch=plane[lowch])!=0 ) {
+ if ( ch>=0x8000 ) {
+ if ( cd->to!=e_jis2 )
+return( (size_t) -1 );
+ ch -= 0x8000;
+ } else {
+ if ( cd->to==e_jis2 )
+return( (size_t) -1 );
+ }
+ *((*outbuf)++) = (ch>>8);
+ *((*outbuf)++) = (ch&0xff);
+ *outlen -= 2;
+ *inlen -= 4;
+ *inbuf += 4;
+ ++char_cnt;
+ } else
+return( (size_t) -1 );
+ }
+ } else if ( cd->to==e_utf8 ) {
+ while ( *inlen>1 && *outlen>0 ) {
+ int uch;
+ if ( endian == end_little ) {
+ uch = (((unsigned char *) *inbuf)[3]<<24) |
+ (((unsigned char *) *inbuf)[2]<<16) |
+ (((unsigned char *) *inbuf)[1]<<8) |
+ (*((unsigned char *) *inbuf));
+ } else {
+ uch = (*((unsigned char *) *inbuf)<<24) |
+ (((unsigned char *) *inbuf)[1]<<16) |
+ (((unsigned char *) *inbuf)[2]<<8) |
+ (((unsigned char *) *inbuf)[3]);
+ }
+ if ( uch < 0x80 ) {
+ *((*outbuf)++) = uch;
+ --*outlen;
+ } else if ( uch<0x800 ) {
+ if ( *outlen==1 )
+return( (size_t) -1 );
+ *((*outbuf)++) = 0xc0 | (uch>>6);
+ *((*outbuf)++) = 0x80 | (uch&0x3f);
+ *outlen-=2;
+ } else if ( uch < 0x10000 ) {
+ if ( *outlen<=2 )
+return( (size_t) -1 );
+ *((*outbuf)++) = 0xe0 | (uch>>12);
+ *((*outbuf)++) = 0x80 | ((uch>>6)&0x3f);
+ *((*outbuf)++) = 0x80 | (uch&0x3f);
+ *outlen-=3;
+ } else {
+ uint32 val = uch-0x10000;
+ int u = ((val&0xf0000)>>16)+1, z=(val&0x0f000)>>12, y=(val&0x00fc0)>>6, x=val&0x0003f;
+ if ( *outlen<=3 )
+return( (size_t) -1 );
+ *(*outbuf)++ = 0xf0 | (u>>2);
+ *(*outbuf)++ = 0x80 | ((u&3)<<4) | z;
+ *(*outbuf)++ = 0x80 | y;
+ *(*outbuf)++ = 0x80 | x;
+ *outlen-=4;
+ }
+ *inbuf += 4;
+ *inlen -= 4;
+ ++char_cnt;
+ }
+ } else {
+ fprintf( stderr, "Unexpected encoding\n" );
+return( (size_t) -1 );
+ }
} else if ( cd->to==e_unicode ) {
const unichar_t *table;
if ( cd->from<e_first2byte ) {
@@ -373,7 +631,7 @@ return( (size_t) -1 );
*((*outbuf)++) = ch>>8;
*((*outbuf)++) = ch&0xff;
}
- *outlen -= 2;
+ *outlen -= sizeof(unichar_t);
++char_cnt;
}
} else if ( cd->from==e_jis || cd->from==e_jis2 ||
@@ -398,7 +656,7 @@ return( (size_t) -1 );
*((*outbuf)++) = ch>>8;
*((*outbuf)++) = ch&0xff;
}
- *outlen -= 2;
+ *outlen -= sizeof(unichar_t);
++char_cnt;
}
if ( *inlen==1 && *outlen>0 )
@@ -429,7 +687,7 @@ return( (size_t) -1 );
*((*outbuf)++) = ch>>8;
*((*outbuf)++) = ch&0xff;
}
- *outlen -= 2;
+ *outlen -= sizeof(unichar_t);
++char_cnt;
}
} else if ( cd->from==e_johab || cd->from==e_big5 || cd->from==e_big5hkscs ) {
@@ -450,7 +708,7 @@ return( (size_t) -1 );
if ( *ipt<0x7f ) {
ch = *ipt;
--*inlen;
- *inbuf = ipt+1;
+ *inbuf = (char *) ipt+1;
} else {
if ( *inlen==1 )
return( (size_t) -1 );
@@ -469,7 +727,7 @@ return( (size_t) -1 );
*((*outbuf)++) = ch>>8;
*((*outbuf)++) = ch&0xff;
}
- *outlen -= 2;
+ *outlen -= sizeof(unichar_t);
++char_cnt;
}
} else if ( cd->from==e_sjis ) {
@@ -511,11 +769,11 @@ return( (size_t) -1 );
*((*outbuf)++) = ch>>8;
*((*outbuf)++) = ch&0xff;
}
- *outlen -= 2;
+ *outlen -= sizeof(unichar_t);
++char_cnt;
}
} else if ( cd->from==e_utf8 ) {
- while ( *inlen>0 && *outlen>1 ) {
+ while ( *inlen>0 && *outlen>sizeof(unichar_t) ) {
unsigned char *ipt = (unsigned char *) *inbuf;
int ch = *ipt;
if ( ch <= 127 ) {
@@ -556,7 +814,227 @@ return( (size_t) -1 );
*((*outbuf)++) = ch>>8;
*((*outbuf)++) = ch&0xff;
}
- *outlen -= 2;
+ *outlen -= sizeof(unichar_t);
+ ++char_cnt;
+ }
+ } else {
+ fprintf( stderr, "Unexpected encoding\n" );
+return( (size_t) -1 );
+ }
+ } else if ( cd->to==e_ucs4 ) {
+ const unichar_t *table;
+ if ( cd->from<e_first2byte ) {
+ table = unicode_from_alphabets[cd->from];
+ while ( *inlen>0 && *outlen>1 ) {
+ unichar_t ch = table[ *(unsigned char *) ((*inbuf)++)];
+ --*inlen;
+ if ( endian==end_little ) {
+ *((*outbuf)++) = 0;
+ *((*outbuf)++) = 0;
+ *((*outbuf)++) = ch&0xff;
+ *((*outbuf)++) = ch>>8;
+ } else {
+ *((*outbuf)++) = ch>>8;
+ *((*outbuf)++) = ch&0xff;
+ *((*outbuf)++) = 0;
+ *((*outbuf)++) = 0;
+ }
+ *outlen -= sizeof(unichar_t);
+ ++char_cnt;
+ }
+ } else if ( cd->from==e_jis || cd->from==e_jis2 ||
+ cd->from==e_jiskorean || cd->from==e_jisgb ) {
+ table = cd->from==e_jisgb ? unicode_from_gb2312 :
+ cd->from==e_jiskorean ? unicode_from_ksc5601 :
+ cd->from==e_jis ? unicode_from_jis208 :
+ unicode_from_jis212;
+ while ( *inlen>1 && *outlen>1 ) {
+ unsigned char *ipt = (unsigned char *) *inbuf;
+ int ch;
+ if ( *ipt<0x21 || *ipt>0x7e || ipt[1]<0x21 || ipt[1]>0x7e )
+return( (size_t) -1 );
+ ch = (*ipt-0x21)*94 + (ipt[1]-0x21);
+ ch = table[ch];
+ *inlen -= 2;
+ *inbuf = (char *) ipt+2;
+ if ( endian==end_little ) {
+ *((*outbuf)++) = 0;
+ *((*outbuf)++) = 0;
+ *((*outbuf)++) = ch&0xff;
+ *((*outbuf)++) = ch>>8;
+ } else {
+ *((*outbuf)++) = ch>>8;
+ *((*outbuf)++) = ch&0xff;
+ *((*outbuf)++) = 0;
+ *((*outbuf)++) = 0;
+ }
+ *outlen -= sizeof(unichar_t);
+ ++char_cnt;
+ }
+ if ( *inlen==1 && *outlen>0 )
+return( (size_t) -1 ); /* Incomplete multi-byte sequence */
+ } else if ( cd->from==e_wansung || cd->from==e_jisgbpk ) {
+ table = cd->from==e_jisgbpk ? unicode_from_gb2312 :
+ unicode_from_ksc5601 ;
+ while ( *inlen>0 && *outlen>1 ) {
+ unsigned char *ipt = (unsigned char *) *inbuf;
+ int ch;
+ if ( *ipt<0x7f ) {
+ ch = *ipt;
+ --*inlen;
+ *inbuf = (char *) ipt+1;
+ } else {
+ if ( *ipt<0xa1 || *ipt>0xfe || ipt[1]<0xa1 || ipt[1]>0xfe ||
+ *inlen==1 )
+return( (size_t) -1 );
+ ch = (*ipt-0xa1)*94 + (ipt[1]-0xa1);
+ ch = table[ch];
+ *inlen -= 2;;
+ *inbuf = (char *) ipt+2;
+ }
+ if ( endian==end_little ) {
+ *((*outbuf)++) = 0;
+ *((*outbuf)++) = 0;
+ *((*outbuf)++) = ch&0xff;
+ *((*outbuf)++) = ch>>8;
+ } else {
+ *((*outbuf)++) = ch>>8;
+ *((*outbuf)++) = ch&0xff;
+ *((*outbuf)++) = 0;
+ *((*outbuf)++) = 0;
+ }
+ *outlen -= sizeof(unichar_t);
+ ++char_cnt;
+ }
+ } else if ( cd->from==e_johab || cd->from==e_big5 || cd->from==e_big5hkscs ) {
+ int offset;
+ if ( cd->from==e_big5 ) {
+ offset = 0xa100;
+ table = unicode_from_big5;
+ } else if ( cd->from==e_big5hkscs ) {
+ offset = 0x8100;
+ table = unicode_from_big5hkscs;
+ } else {
+ offset = 0x8400;
+ table = unicode_from_johab;
+ }
+ while ( *inlen>0 && *outlen>1 ) {
+ unsigned char *ipt = (unsigned char *) *inbuf;
+ int ch;
+ if ( *ipt<0x7f ) {
+ ch = *ipt;
+ --*inlen;
+ *inbuf = (char *) ipt+1;
+ } else {
+ if ( *inlen==1 )
+return( (size_t) -1 );
+ ch = (*ipt<<8) | ipt[1];
+ if ( ch<offset )
+return( (size_t) -1 );
+ ch -= offset;
+ ch = table[ch];
+ *inlen -= 2;
+ *inbuf = (char *) ipt+2;
+ }
+ if ( endian==end_little ) {
+ *((*outbuf)++) = 0;
+ *((*outbuf)++) = 0;
+ *((*outbuf)++) = ch&0xff;
+ *((*outbuf)++) = ch>>8;
+ } else {
+ *((*outbuf)++) = ch>>8;
+ *((*outbuf)++) = ch&0xff;
+ *((*outbuf)++) = 0;
+ *((*outbuf)++) = 0;
+ }
+ *outlen -= sizeof(unichar_t);
+ ++char_cnt;
+ }
+ } else if ( cd->from==e_sjis ) {
+ while ( *inlen>0 && *outlen>1 ) {
+ unsigned char *ipt = (unsigned char *) *inbuf;
+ int ch1 = *ipt;
+ if ( ch1<127 || ( ch1>=161 && ch1<=223 )) {
+ ch = unicode_from_jis201[ch1];
+ *inbuf = (char *) ipt+1;
+ --*inlen;
+ } else if ( *inlen==1 )
+return( (size_t) -1 );
+ else {
+ int ch2 = ipt[1];
+ if ( ch1 >= 129 && ch1<= 159 )
+ ch1 -= 112;
+ else
+ ch1 -= 176;
+ ch1 <<= 1;
+ if ( ch2>=159 )
+ ch2-= 126;
+ else if ( ch2>127 ) {
+ --ch1;
+ ch2 -= 32;
+ } else {
+ --ch1;
+ ch2 -= 31;
+ }
+ if ( ch1-0x21>=94 || ch2-0x21>=94 )
+return( (size_t) -1 );
+ ch = unicode_from_jis208[(ch1-0x21)*94+(ch2-0x21)];
+ *inlen -= 2;
+ *inbuf = (char *) ipt+2;
+ }
+ if ( endian==end_little ) {
+ *((*outbuf)++) = 0;
+ *((*outbuf)++) = 0;
+ *((*outbuf)++) = ch&0xff;
+ *((*outbuf)++) = ch>>8;
+ } else {
+ *((*outbuf)++) = ch>>8;
+ *((*outbuf)++) = ch&0xff;
+ *((*outbuf)++) = 0;
+ *((*outbuf)++) = 0;
+ }
+ *outlen -= sizeof(unichar_t);
+ ++char_cnt;
+ }
+ } else if ( cd->from==e_utf8 ) {
+ while ( *inlen>0 && *outlen>sizeof(unichar_t) ) {
+ unsigned char *ipt = (unsigned char *) *inbuf;
+ int ch = *ipt;
+ if ( ch <= 127 ) {
+ *inbuf = (char *) ipt+1;
+ --*inlen;
+ } else if ( ch<=0xdf ) {
+ if ( *inlen<2 || ipt[1]<0x80 )
+return( (size_t) -1 );
+ ch = ((ch&0x1f)<<6) | (ipt[1] &0x3f);
+ *inlen -= 2;
+ *inbuf = (char *) ipt+2;
+ } else if ( ch<=0xef ) {
+ if ( *inlen<3 || ipt[1]<0x80 || ipt[2]<0x80 )
+return( (size_t) -1 );
+ ch = ((ch&0x1f)<<12) | ((ipt[1] &0x3f)<<6) | (ipt[2]&0x3f);
+ *inlen -= 3;
+ *inbuf = (char *) ipt+3;
+ } else {
+ int w,w2;
+ w = ( ((*ipt&0x7)<<2) | ((ipt[1]&0x30)>>4) )-1;
+ w = (w<<6) | ((ipt[1]&0xf)<<2) | ((ipt[2]&0x30)>>4);
+ w2 = ((ipt[2]&0xf)<<6) | (ipt[3]&0x3f);
+ ch = w*0x400 + w2 + 0x10000;
+ *inbuf = (char *) ipt+4;
+ }
+ if ( endian==end_little ) {
+ *((*outbuf)++) = ch&0xff;
+ *((*outbuf)++) = ch>>8;
+ *((*outbuf)++) = ch>>16;
+ *((*outbuf)++) = ch>>24;
+ } else {
+ *((*outbuf)++) = ch>>24;
+ *((*outbuf)++) = ch>>16;
+ *((*outbuf)++) = ch>>8;
+ *((*outbuf)++) = ch&0xff;
+ }
+ *outlen -= sizeof(unichar_t);
++char_cnt;
}
} else {
@@ -572,6 +1050,10 @@ return( (size_t) -1 );
**outbuf = '\0';
if ( *outlen>1 )
(*outbuf)[1] = '\0';
+ if ( cd->to==e_ucs4 && *outlen>3 ) {
+ (*outbuf)[2] = '\0';
+ (*outbuf)[3] = '\0';
+ }
}
return( char_cnt );
}
diff --git a/Unicode/unialt.c b/Unicode/unialt.c
index a667ed75..52b85814 100644
--- a/Unicode/unialt.c
+++ b/Unicode/unialt.c
@@ -3254,15 +3254,15 @@ static const unichar_t str_facb[] = { 0x980b, 0 };
static const unichar_t str_facc[] = { 0x983b, 0 };
static const unichar_t str_facd[] = { 0x9b12, 0 };
static const unichar_t str_face[] = { 0x9f9c, 0 };
-static const unichar_t str_facf[] = { 0x284a, 0 };
-static const unichar_t str_fad0[] = { 0x2844, 0 };
-static const unichar_t str_fad1[] = { 0x33d5, 0 };
+static const unichar_t str_facf[] = { 0x2284a, 0 };
+static const unichar_t str_fad0[] = { 0x22844, 0 };
+static const unichar_t str_fad1[] = { 0x233d5, 0 };
static const unichar_t str_fad2[] = { 0x3b9d, 0 };
static const unichar_t str_fad3[] = { 0x4018, 0 };
static const unichar_t str_fad4[] = { 0x4039, 0 };
-static const unichar_t str_fad5[] = { 0x5249, 0 };
-static const unichar_t str_fad6[] = { 0x5cd0, 0 };
-static const unichar_t str_fad7[] = { 0x7ed3, 0 };
+static const unichar_t str_fad5[] = { 0x25249, 0 };
+static const unichar_t str_fad6[] = { 0x25cd0, 0 };
+static const unichar_t str_fad7[] = { 0x27ed3, 0 };
static const unichar_t str_fad8[] = { 0x9f43, 0 };
static const unichar_t str_fad9[] = { 0x9f8e, 0 };
static const unichar_t str_fb00[] = { 0x0066, 0x0066, 0 };
diff --git a/Unicode/ustring.c b/Unicode/ustring.c
index e6bde158..c1a594fd 100644
--- a/Unicode/ustring.c
+++ b/Unicode/ustring.c
@@ -476,7 +476,7 @@ return( ubuf );
unichar_t *utf82u_strncpy(unichar_t *ubuf,const char *utf8buf,int len) {
unichar_t *upt=ubuf, *uend=ubuf+len-1;
const uint8 *pt = (const uint8 *) utf8buf, *end = pt+strlen(utf8buf);
- int w;
+ int w, w2;
while ( pt<end && *pt!='\0' && upt<uend ) {
if ( *pt<=127 )
@@ -487,6 +487,7 @@ unichar_t *utf82u_strncpy(unichar_t *ubuf,const char *utf8buf,int len) {
} else if ( *pt<=0xef ) {
*upt = ((*pt&0xf)<<12) | ((pt[1]&0x3f)<<6) | (pt[2]&0x3f);
pt += 3;
+#ifdef UNICHAR_16
} else if ( upt+1<uend ) {
/* Um... I don't support surrogates */
w = ( ((*pt&0x7)<<2) | ((pt[1]&0x30)>>4) )-1;
@@ -496,6 +497,13 @@ unichar_t *utf82u_strncpy(unichar_t *ubuf,const char *utf8buf,int len) {
} else {
/* no space for surrogate */
pt += 4;
+#else
+ } else {
+ w = ( ((*pt&0x7)<<2) | ((pt[1]&0x30)>>4) )-1;
+ w = (w<<6) | ((pt[1]&0xf)<<2) | ((pt[2]&0x30)>>4);
+ w2 = ((pt[2]&0xf)<<6) | (pt[3]&0x3f);
+ *upt = w*0x400 + w2 + 0x10000;
+#endif
}
++upt;
}
@@ -507,8 +515,9 @@ unichar_t *utf82u_strcpy(unichar_t *ubuf,const char *utf8buf) {
return( utf82u_strncpy(ubuf,utf8buf,strlen(utf8buf)+1));
}
-int32 *utf82u32_strncpy(int32 *ubuf,const char *utf8buf,int len) {
- int32 *upt=ubuf, *uend=ubuf+len-1;
+# ifdef UNICHAR_16
+uint32 *utf82u32_strncpy(uint32 *ubuf,const char *utf8buf,int len) {
+ uint32 *upt=ubuf, *uend=ubuf+len-1;
const uint8 *pt = (const uint8 *) utf8buf;
int w, w2;
@@ -534,9 +543,9 @@ int32 *utf82u32_strncpy(int32 *ubuf,const char *utf8buf,int len) {
return( ubuf );
}
-char *u322utf8_strncpy(char *utf8buf, const int32 *ubuf,int len) {
+char *u322utf8_strncpy(char *utf8buf, const uint32 *ubuf,int len) {
uint8 *pt=(uint8 *) utf8buf, *end=(uint8 *) utf8buf+len-1;
- const int32 *upt = ubuf;
+ const uint32 *upt = ubuf;
while ( *upt!='\0' && pt<end ) {
if ( *upt<=127 )
@@ -568,7 +577,7 @@ char *u322utf8_strncpy(char *utf8buf, const int32 *ubuf,int len) {
return( utf8buf );
}
-char *u322utf8_copy(const int32 *ubuf) {
+char *u322utf8_copy(const uint32 *ubuf) {
int i, len;
char *buf;
@@ -584,6 +593,7 @@ char *u322utf8_copy(const int32 *ubuf) {
buf = galloc(len+1);
return( u322utf8_strncpy(buf,ubuf,len+1));
}
+#endif
unichar_t *utf82u_copyn(const char *utf8buf,int len) {
unichar_t *ubuf = galloc((len+1)*sizeof(unichar_t));
@@ -606,17 +616,19 @@ void utf82u_strcat(unichar_t *to,const char *from) {
utf82u_strcpy(to+u_strlen(to),from);
}
-int32 *utf82u32_copy(const char *utf8buf) {
+#ifdef UNICHAR_16
+uint32 *utf82u32_copy(const char *utf8buf) {
int len;
- int32 *ubuf;
+ uint32 *ubuf;
if ( utf8buf==NULL )
return( NULL );
len = strlen(utf8buf);
- ubuf = galloc((len+1)*sizeof(int32));
+ ubuf = galloc((len+1)*sizeof(uint32));
return( utf82u32_strncpy(ubuf,utf8buf,len+1));
}
+#endif
char *u2utf8_strcpy(char *utf8buf,const unichar_t *ubuf) {
char *pt = utf8buf;
@@ -627,6 +639,7 @@ char *u2utf8_strcpy(char *utf8buf,const unichar_t *ubuf) {
else if ( *ubuf<0x800 ) {
*pt++ = 0xc0 | (*ubuf>>6);
*pt++ = 0x80 | (*ubuf&0x3f);
+#ifdef UNICHAR_16
} else if ( *ubuf>=0xd800 && *ubuf<0xdc00 && ubuf[1]>=0xdc00 && ubuf[1]<0xe000 ) {
int u = ((*ubuf>>6)&0xf)+1, y = ((*ubuf&3)<<4) | ((ubuf[1]>>6)&0xf);
*pt++ = 0xf0 | (u>>2);
@@ -637,6 +650,19 @@ char *u2utf8_strcpy(char *utf8buf,const unichar_t *ubuf) {
*pt++ = 0xe0 | (*ubuf>>12);
*pt++ = 0x80 | ((*ubuf>>6)&0x3f);
*pt++ = 0x80 | (*ubuf&0x3f);
+#else
+ } else if ( *ubuf < 0x10000 ) {
+ *pt++ = 0xe0 | (*ubuf>>12);
+ *pt++ = 0x80 | ((*ubuf>>6)&0x3f);
+ *pt++ = 0x80 | (*ubuf&0x3f);
+ } else {
+ uint32 val = *ubuf-0x10000;
+ int u = ((val&0xf0000)>>16)+1, z=(val&0x0f000)>>12, y=(val&0x00fc0)>>6, x=val&0x0003f;
+ *pt++ = 0xf0 | (u>>2);
+ *pt++ = 0x80 | ((u&3)<<4) | z;
+ *pt++ = 0x80 | y;
+ *pt++ = 0x80 | x;
+#endif
}
++ubuf;
}