summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuo Jinghua <sunmoon1997@gmail.com>2010-01-07 23:00:22 +0800
committerLuo Jinghua <sunmoon1997@gmail.com>2010-01-07 23:00:22 +0800
commit8e2d99abd7bb55112898bd90e3affb660b4915a0 (patch)
treeca216b6fde912109edadafe8ebefa0cbefaf306d
parent601fa11a8eba8d032c1491f79d0974f24fba7613 (diff)
uniconv: try to improve the interface
-rw-r--r--converter.c9
-rw-r--r--converter.h11
-rw-r--r--multibytecodec.c18
-rw-r--r--multibytecodec.h7
-rw-r--r--singlebytecodec.c83
-rw-r--r--tableconverter.c42
-rw-r--r--uconv.c2
-rw-r--r--uniconv.c50
-rw-r--r--uniconv.h37
-rw-r--r--utfconverter.c17
10 files changed, 181 insertions, 95 deletions
diff --git a/converter.c b/converter.c
index b69c870..01d9bdb 100644
--- a/converter.c
+++ b/converter.c
@@ -24,10 +24,17 @@ converter_open(const char *charset)
return NULL;
}
-void converter_close(struct converter *conv)
+void
+converter_close(struct converter *conv)
{
if (!conv)
return;
+
conv->close(conv);
}
+void converter_reset(struct converter *conv)
+{
+ if (conv->reset)
+ conv->reset(conv);
+}
diff --git a/converter.h b/converter.h
index 236e668..c2e09ff 100644
--- a/converter.h
+++ b/converter.h
@@ -17,8 +17,12 @@ struct converter {
size_t inbytesleft,
uc_char_t **outbuf,
size_t outleft);
- void
- (*close)(struct converter *conv);
+
+ void
+ (*reset)(struct converter *conv);
+
+ void
+ (*close)(struct converter *conv);
};
struct converter*
@@ -27,4 +31,7 @@ converter_open(const char *charset);
void
converter_close(struct converter *converter);
+void
+converter_reset(struct converter *converter);
+
#endif
diff --git a/multibytecodec.c b/multibytecodec.c
index 85d2e7f..58ee647 100644
--- a/multibytecodec.c
+++ b/multibytecodec.c
@@ -142,23 +142,26 @@ mbcs_decode_init(MultibyteCodecState *state)
state->codec->decinit(&state->state, state->codec->config);
}
-void
+int
mbcs_decode_reset(MultibyteCodecState *state)
{
if (state->codec->decreset)
- state->codec->decreset(&state->state, state->codec->config);
+ return state->codec->decreset(&state->state,
+ state->codec->config);
+ return 0;
}
int
mbcs_encode(MultibyteCodecState *state,
const ucs4_t** inbuf, size_t inlen,
- char** outbuf, size_t outlen)
+ char** outbuf, size_t outlen,
+ int flags)
{
return state->codec->encode(&state->state,
state->codec->config,
inbuf, inlen,
(unsigned char **)outbuf, outlen,
- MBENC_FLUSH);
+ flags);
}
void
@@ -168,11 +171,12 @@ mbcs_encode_init(MultibyteCodecState *state)
state->codec->encinit(&state->state, state->codec->config);
}
-void
+int
mbcs_encode_reset(MultibyteCodecState *state,
char **outbuf, int outleft)
{
if (state->codec->encreset)
- state->codec->encreset(&state->state, state->codec->config,
- (unsigned char **)outbuf, outleft);
+ return state->codec->encreset(&state->state, state->codec->config,
+ (unsigned char **)outbuf, outleft);
+ return 0;
}
diff --git a/multibytecodec.h b/multibytecodec.h
index 1fc4000..f0902cf 100644
--- a/multibytecodec.h
+++ b/multibytecodec.h
@@ -85,13 +85,13 @@ mbcs_init(MultibyteCodecState *state,
void
mbcs_decode_init(MultibyteCodecState *state);
-void
+int
mbcs_decode_reset(MultibyteCodecState *state);
void
mbcs_encode_init(MultibyteCodecState *state);
-void
+int
mbcs_encode_reset(MultibyteCodecState *state,
char **outbuf, int outleft);
@@ -103,7 +103,8 @@ mbcs_decode(MultibyteCodecState *state,
int
mbcs_encode(MultibyteCodecState *state,
const ucs4_t** inbuf, size_t inlen,
- char** outbuf, size_t outlen);
+ char** outbuf, size_t outlen,
+ int flags);
#ifdef __cplusplus
}
diff --git a/singlebytecodec.c b/singlebytecodec.c
index a0c099b..3607580 100644
--- a/singlebytecodec.c
+++ b/singlebytecodec.c
@@ -7,33 +7,31 @@
int
sbcs_init(SingleByteCodecState *state, const char *encoding)
{
- SingleByteCodecState **codecs = __uniconv_get_single_byte_codecs();
- size_t i;
+ SingleByteCodecState **codecs = __uniconv_get_single_byte_codecs();
+ size_t i;
- if (!codecs)
- return -1;
- for (i = 0; codecs[i]; i++)
- {
- if (!strcmp(codecs[i]->encoding, encoding))
- {
- *state = *codecs[i];
- return 0;
- }
+ if (!codecs)
+ return -1;
+ for (i = 0; codecs[i]; i++) {
+ if (!strcmp(codecs[i]->encoding, encoding)) {
+ *state = *codecs[i];
+ return 0;
}
+ }
- return -1;
+ return -1;
}
static int
sbcs_encode_char(SingleByteCodecState *state,
uc_char_t unichar)
{
- size_t i;
+ size_t i;
- for (i = 0; i < state->encoding_map_size; i++)
- if (state->encoding_map[i].from == unichar)
- return state->encoding_map[i].to;
- return -1;
+ for (i = 0; i < state->encoding_map_size; i++)
+ if (state->encoding_map[i].from == unichar)
+ return state->encoding_map[i].to;
+ return -1;
}
int
@@ -43,19 +41,19 @@ sbcs_encode(SingleByteCodecState *state,
char **outbuf,
size_t outleft)
{
- for (; inleft; inleft--, outleft--)
- {
- int result = sbcs_encode_char(state, **inbuf);
- if (result < 0)
- return UNICONV_EILSEQ;
- if (!outleft)
- return UNICONV_E2BIG;
- **outbuf = (char)result;
- (*outbuf)++;
- (*inbuf)++;
- }
+ for (; inleft; inleft--, outleft--)
+ {
+ int result = sbcs_encode_char(state, **inbuf);
+ if (result < 0)
+ return UNICONV_EILSEQ;
+ if (!outleft)
+ return UNICONV_E2BIG;
+ **outbuf = (char)result;
+ (*outbuf)++;
+ (*inbuf)++;
+ }
- return 0;
+ return 0;
}
int
@@ -65,21 +63,20 @@ sbcs_decode(SingleByteCodecState *state,
uc_char_t **outbuf,
size_t outleft)
{
- const uc_uint8_t **uinbuf = (const uc_uint8_t **)inbuf;
+ const uc_uint8_t **uinbuf = (const uc_uint8_t **)inbuf;
- for (; inleft; inleft--, outleft--)
- {
- unsigned index = **uinbuf;
+ for (; inleft; inleft--, outleft--)
+ {
+ unsigned index = **uinbuf;
- if (index >= state->decoding_table_size)
- return UNICONV_EILSEQ;
- if (!outleft)
- return UNICONV_E2BIG;
- **outbuf = state->decoding_table[index];
- (*outbuf)++;
- (*uinbuf)++;
- }
+ if (index >= state->decoding_table_size)
+ return UNICONV_EILSEQ;
+ if (!outleft)
+ return UNICONV_E2BIG;
+ **outbuf = state->decoding_table[index];
+ (*outbuf)++;
+ (*uinbuf)++;
+ }
- return 0;
+ return 0;
}
-
diff --git a/tableconverter.c b/tableconverter.c
index 2708470..25562b9 100644
--- a/tableconverter.c
+++ b/tableconverter.c
@@ -31,20 +31,24 @@ tabconverter_encode(struct converter *suc,
struct tabconverter *uc = (struct tabconverter*)suc;
int ret;
- if (!uc)
- return UNICONV_EINVAL;
-
if (uc->singlebyte) {
+ /* flush */
+ if (!inbuf)
+ return UNICONV_SUCCESS;
+
ret = sbcs_encode(&uc->sstate, inbuf, inleft, outbuf, outbytesleft);
} else {
- mbcs_encode_init(&uc->mstate);
- ret = mbcs_encode(&uc->mstate, inbuf, inleft, outbuf, outbytesleft);
+ /* flush */
+ if (!inbuf)
+ ret = mbcs_encode_reset(&uc->mstate, outbuf, outbytesleft);
+ else
+ ret = mbcs_encode(&uc->mstate, inbuf, inleft, outbuf, outbytesleft, 0);
if (ret == MBERR_TOOFEW)
+ ret = UNICONV_EINVAL;
+ else if (ret == MBERR_TOOSMALL)
ret = UNICONV_E2BIG;
- else if (ret == MBERR_TOOSMALL || ret > 0)
- ret = UNICONV_EILSEQ;
else if (ret < 0)
- ret = UNICONV_EINVAL;
+ ret = UNICONV_EILSEQ;
}
return ret;
@@ -60,25 +64,32 @@ tabconverter_decode(struct converter *suc,
struct tabconverter *uc = (struct tabconverter*)suc;
int ret;
- if (!uc)
- return UNICONV_EINVAL;
-
if (uc->singlebyte) {
ret = sbcs_decode(&uc->sstate, inbuf, inbytesleft, outbuf, outleft);
} else {
- mbcs_decode_init(&uc->mstate);
ret = mbcs_decode(&uc->mstate, inbuf, inbytesleft, outbuf, outleft);
if (ret == MBERR_TOOFEW)
+ ret = UNICONV_EINVAL;
+ else if (ret == MBERR_TOOSMALL)
ret = UNICONV_E2BIG;
- else if (ret == MBERR_TOOSMALL || ret > 0)
- ret = UNICONV_EILSEQ;
else if (ret < 0)
- ret = UNICONV_EINVAL;
+ ret = UNICONV_EILSEQ;
}
return ret;
}
+static void
+tabconverter_reset(struct converter *suc)
+{
+ struct tabconverter *uc = (struct tabconverter *)suc;
+
+ if (uc->singlebyte) {
+ mbcs_encode_init(&uc->mstate);
+ mbcs_decode_init(&uc->mstate);
+ }
+}
+
struct converter*
tabconverter_open(const char *charset)
{
@@ -107,5 +118,6 @@ tabconverter_open(const char *charset)
conv->base.encode = tabconverter_encode;
conv->base.decode = tabconverter_decode;
conv->base.close = tabconverter_close;
+ conv->base.reset = tabconverter_reset;
return &conv->base;
}
diff --git a/uconv.c b/uconv.c
index 94fd812..80c4866 100644
--- a/uconv.c
+++ b/uconv.c
@@ -42,7 +42,7 @@ int main(int argc, char **argv)
inp = inbuffer;
outp = outbuffer;
ret = uniconv_conv(conv, (const char **)&inp, inlen, &outp, sizeof(outbuffer));
- if (ret == UNICONV_E2BIG && inlen < sizeof(inbuffer)) {
+ if (ret == UNICONV_EINVAL && inlen < sizeof(inbuffer)) {
size_t extralen = fread(inbuffer + inlen, 1, 1, infp);
if (!extralen)
break;
diff --git a/uniconv.c b/uniconv.c
index 30c0f54..bbbdb2b 100644
--- a/uniconv.c
+++ b/uniconv.c
@@ -101,28 +101,38 @@ uniconv_conv(uniconv_t *uc,
if (!uc)
return UNICONV_EBADF;
- if (!inbuf || !outbuf)
- return UNICONV_EINVAL;
-
- if (inleft < UNICONV_MAX_LOCAL)
- ucs4 = local_ucs4;
- else
- ucs4 = malloc(sizeof(uc_char_t) * inleft);
- if (!ucs4)
- return UNICONV_EINVAL;
-
- inucs4 = ucs4;
- ucs4len = inleft;
- ret = uc->from->decode(uc->from, inbuf, inleft, &inucs4, ucs4len);
- if (ret)
- goto error_decode;
-
- ucs4len = inucs4 - ucs4;
- inucs4 = ucs4;
- ret = uc->to->encode(uc->to, (const ucs4_t **)&inucs4, ucs4len, outbuf, outleft);
+ /* reset converter */
+ if (!inbuf && !outbuf) {
+ converter_reset(uc->from);
+ converter_reset(uc->to);
+ return UNICONV_SUCCESS;
+ }
+
+ /* converting/pushing input data */
+ if (inbuf) {
+ if (inleft < UNICONV_MAX_LOCAL)
+ ucs4 = local_ucs4;
+ else
+ ucs4 = malloc(sizeof(uc_char_t) * inleft);
+ if (!ucs4)
+ return UNICONV_EINVAL;
+
+ inucs4 = ucs4;
+ ucs4len = inleft;
+ ret = uc->from->decode(uc->from, inbuf, inleft, &inucs4, ucs4len);
+ if (ret)
+ goto error_decode;
+
+ ucs4len = inucs4 - ucs4;
+ inucs4 = ucs4;
+ ret = uc->to->encode(uc->to, (const ucs4_t **)&inucs4, ucs4len, outbuf, outleft);
+ } else {
+ /* converting pending data in buffer */
+ ret = uc->to->encode(uc->to, NULL, 0, outbuf, outleft);
+ }
error_decode:
- if (ucs4 != local_ucs4)
+ if (ucs4 && ucs4 != local_ucs4)
free (ucs4);
return ret;
}
diff --git a/uniconv.h b/uniconv.h
index 2840a17..b90096e 100644
--- a/uniconv.h
+++ b/uniconv.h
@@ -7,6 +7,9 @@
extern "C" {
#endif
+ /**
+ * the conversion descriptor
+ */
struct _uniconv;
typedef struct _uniconv uniconv_t;
@@ -16,9 +19,40 @@ extern "C" {
#define UNICONV_EINVAL (-3)
#define UNICONV_EBADF (-4)
+ /**
+ * open a descriptor for converting /from/ charset to /to/ charset
+ *
+ * @param from the from charset
+ * @param to the to charset
+ *
+ * @return a conversion descriptor, %NULL in case of error.
+ */
uniconv_t*
uniconv_open(const char *from, const char *to);
+ /**
+ * peform charset converting
+ *
+ * If both inbuf and outbuf set to %NULL, then the descriptor
+ * state will be reset. If only the inbuf set to %NULL, then the
+ * descriptor will reset its shift state and write any pending
+ * data to outbuf.
+ *
+ * @param inbuf a pointer to input buffer
+ * @param inleft the size of input buffer
+ * @param outbuf a pointer to output buffer
+ * @param outleft the size of output buffer
+ *
+ * @return the number of characters converted in a non-reversible
+ * way during this call.
+ *
+ * In case of error, it returns
+ *
+ * UNICONV_E2BIG There is not sufficient room at *outbuf.
+ * UNICONV_EILSEQ An invalid multibyte sequence has been encountered in the input.
+ * UNICONV_EINVAL An incomplete multibyte sequence has been encountered in the input.
+ *
+ */
int
uniconv_conv(uniconv_t *uc,
const char **inbuf,
@@ -26,6 +60,9 @@ extern "C" {
char **outbuf,
size_t outleft);
+ /**
+ * close the conversion descriptor
+ */
void
uniconv_close(uniconv_t *uc);
diff --git a/utfconverter.c b/utfconverter.c
index 9818c27..04a8199 100644
--- a/utfconverter.c
+++ b/utfconverter.c
@@ -21,6 +21,9 @@ utf8_encode(struct converter *conv,
{
size_t i;
+ if (!inbuf)
+ return UNICONV_SUCCESS;
+
for (i = 0; i < inleft; i++) {
int seqlen = ucs4toutf8(**inbuf, NULL);
if (seqlen < 0)
@@ -48,7 +51,7 @@ utf8_decode(struct converter *conv,
uc_char_t unichar;
int seqlen = ucs4fromutf8(*inbuf, &unichar, inleft);
if (seqlen == -2)
- return UNICONV_E2BIG;
+ return UNICONV_EINVAL;
else if (seqlen < 0)
return UNICONV_EILSEQ;
if (!outleft)
@@ -75,6 +78,9 @@ utf16_encode(struct converter *conv,
size_t i;
uc_uint16_t **soutbuf = (uc_uint16_t **)outbuf;
+ if (!inbuf)
+ return UNICONV_SUCCESS;
+
for (i = 0; i < inleft; i++) {
int seqlen = ucs4toutf16(**inbuf, NULL);
if (seqlen < 0)
@@ -105,7 +111,7 @@ utf16_decode(struct converter *conv,
int seqlen = ucs4fromutf16(*sinbuf, &unichar,
inleft / sizeof(uc_uint16_t));
if (seqlen == -2)
- return UNICONV_E2BIG;
+ return UNICONV_EINVAL;
else if (seqlen < 0)
return UNICONV_EILSEQ;
if (!outleft)
@@ -129,6 +135,9 @@ utf32_encode(struct converter *conv,
char **outbuf,
size_t outleft)
{
+ if (!inbuf)
+ return UNICONV_SUCCESS;
+
if (inleft * sizeof(uc_char_t) > outleft)
return UNICONV_E2BIG;
@@ -146,7 +155,7 @@ utf32_decode(struct converter *conv,
size_t outleft)
{
if (inleft & 3)
- return UNICONV_EILSEQ;
+ return UNICONV_EINVAL;
if (inleft > outleft)
return UNICONV_E2BIG;
@@ -185,5 +194,7 @@ utfconverter_open(const char *charset)
conv->base.decode = utf32_decode;
}
conv->base.close = utfconverter_close;
+ conv->base.reset = NULL;
+
return &conv->base;
}