diff options
author | Luo Jinghua <sunmoon1997@gmail.com> | 2010-01-07 23:00:22 +0800 |
---|---|---|
committer | Luo Jinghua <sunmoon1997@gmail.com> | 2010-01-07 23:00:22 +0800 |
commit | 8e2d99abd7bb55112898bd90e3affb660b4915a0 (patch) | |
tree | ca216b6fde912109edadafe8ebefa0cbefaf306d | |
parent | 601fa11a8eba8d032c1491f79d0974f24fba7613 (diff) |
uniconv: try to improve the interface
-rw-r--r-- | converter.c | 9 | ||||
-rw-r--r-- | converter.h | 11 | ||||
-rw-r--r-- | multibytecodec.c | 18 | ||||
-rw-r--r-- | multibytecodec.h | 7 | ||||
-rw-r--r-- | singlebytecodec.c | 83 | ||||
-rw-r--r-- | tableconverter.c | 42 | ||||
-rw-r--r-- | uconv.c | 2 | ||||
-rw-r--r-- | uniconv.c | 50 | ||||
-rw-r--r-- | uniconv.h | 37 | ||||
-rw-r--r-- | utfconverter.c | 17 |
10 files changed, 181 insertions, 95 deletions
diff --git a/converter.c b/converter.c index b69c870..01d9bdb 100644 --- a/converter.c +++ b/converter.c @@ -24,10 +24,17 @@ converter_open(const char *charset) return NULL; } -void converter_close(struct converter *conv) +void +converter_close(struct converter *conv) { if (!conv) return; + conv->close(conv); } +void converter_reset(struct converter *conv) +{ + if (conv->reset) + conv->reset(conv); +} diff --git a/converter.h b/converter.h index 236e668..c2e09ff 100644 --- a/converter.h +++ b/converter.h @@ -17,8 +17,12 @@ struct converter { size_t inbytesleft, uc_char_t **outbuf, size_t outleft); - void - (*close)(struct converter *conv); + + void + (*reset)(struct converter *conv); + + void + (*close)(struct converter *conv); }; struct converter* @@ -27,4 +31,7 @@ converter_open(const char *charset); void converter_close(struct converter *converter); +void +converter_reset(struct converter *converter); + #endif diff --git a/multibytecodec.c b/multibytecodec.c index 85d2e7f..58ee647 100644 --- a/multibytecodec.c +++ b/multibytecodec.c @@ -142,23 +142,26 @@ mbcs_decode_init(MultibyteCodecState *state) state->codec->decinit(&state->state, state->codec->config); } -void +int mbcs_decode_reset(MultibyteCodecState *state) { if (state->codec->decreset) - state->codec->decreset(&state->state, state->codec->config); + return state->codec->decreset(&state->state, + state->codec->config); + return 0; } int mbcs_encode(MultibyteCodecState *state, const ucs4_t** inbuf, size_t inlen, - char** outbuf, size_t outlen) + char** outbuf, size_t outlen, + int flags) { return state->codec->encode(&state->state, state->codec->config, inbuf, inlen, (unsigned char **)outbuf, outlen, - MBENC_FLUSH); + flags); } void @@ -168,11 +171,12 @@ mbcs_encode_init(MultibyteCodecState *state) state->codec->encinit(&state->state, state->codec->config); } -void +int mbcs_encode_reset(MultibyteCodecState *state, char **outbuf, int outleft) { if (state->codec->encreset) - state->codec->encreset(&state->state, state->codec->config, - (unsigned char **)outbuf, outleft); + return state->codec->encreset(&state->state, state->codec->config, + (unsigned char **)outbuf, outleft); + return 0; } diff --git a/multibytecodec.h b/multibytecodec.h index 1fc4000..f0902cf 100644 --- a/multibytecodec.h +++ b/multibytecodec.h @@ -85,13 +85,13 @@ mbcs_init(MultibyteCodecState *state, void mbcs_decode_init(MultibyteCodecState *state); -void +int mbcs_decode_reset(MultibyteCodecState *state); void mbcs_encode_init(MultibyteCodecState *state); -void +int mbcs_encode_reset(MultibyteCodecState *state, char **outbuf, int outleft); @@ -103,7 +103,8 @@ mbcs_decode(MultibyteCodecState *state, int mbcs_encode(MultibyteCodecState *state, const ucs4_t** inbuf, size_t inlen, - char** outbuf, size_t outlen); + char** outbuf, size_t outlen, + int flags); #ifdef __cplusplus } diff --git a/singlebytecodec.c b/singlebytecodec.c index a0c099b..3607580 100644 --- a/singlebytecodec.c +++ b/singlebytecodec.c @@ -7,33 +7,31 @@ int sbcs_init(SingleByteCodecState *state, const char *encoding) { - SingleByteCodecState **codecs = __uniconv_get_single_byte_codecs(); - size_t i; + SingleByteCodecState **codecs = __uniconv_get_single_byte_codecs(); + size_t i; - if (!codecs) - return -1; - for (i = 0; codecs[i]; i++) - { - if (!strcmp(codecs[i]->encoding, encoding)) - { - *state = *codecs[i]; - return 0; - } + if (!codecs) + return -1; + for (i = 0; codecs[i]; i++) { + if (!strcmp(codecs[i]->encoding, encoding)) { + *state = *codecs[i]; + return 0; } + } - return -1; + return -1; } static int sbcs_encode_char(SingleByteCodecState *state, uc_char_t unichar) { - size_t i; + size_t i; - for (i = 0; i < state->encoding_map_size; i++) - if (state->encoding_map[i].from == unichar) - return state->encoding_map[i].to; - return -1; + for (i = 0; i < state->encoding_map_size; i++) + if (state->encoding_map[i].from == unichar) + return state->encoding_map[i].to; + return -1; } int @@ -43,19 +41,19 @@ sbcs_encode(SingleByteCodecState *state, char **outbuf, size_t outleft) { - for (; inleft; inleft--, outleft--) - { - int result = sbcs_encode_char(state, **inbuf); - if (result < 0) - return UNICONV_EILSEQ; - if (!outleft) - return UNICONV_E2BIG; - **outbuf = (char)result; - (*outbuf)++; - (*inbuf)++; - } + for (; inleft; inleft--, outleft--) + { + int result = sbcs_encode_char(state, **inbuf); + if (result < 0) + return UNICONV_EILSEQ; + if (!outleft) + return UNICONV_E2BIG; + **outbuf = (char)result; + (*outbuf)++; + (*inbuf)++; + } - return 0; + return 0; } int @@ -65,21 +63,20 @@ sbcs_decode(SingleByteCodecState *state, uc_char_t **outbuf, size_t outleft) { - const uc_uint8_t **uinbuf = (const uc_uint8_t **)inbuf; + const uc_uint8_t **uinbuf = (const uc_uint8_t **)inbuf; - for (; inleft; inleft--, outleft--) - { - unsigned index = **uinbuf; + for (; inleft; inleft--, outleft--) + { + unsigned index = **uinbuf; - if (index >= state->decoding_table_size) - return UNICONV_EILSEQ; - if (!outleft) - return UNICONV_E2BIG; - **outbuf = state->decoding_table[index]; - (*outbuf)++; - (*uinbuf)++; - } + if (index >= state->decoding_table_size) + return UNICONV_EILSEQ; + if (!outleft) + return UNICONV_E2BIG; + **outbuf = state->decoding_table[index]; + (*outbuf)++; + (*uinbuf)++; + } - return 0; + return 0; } - diff --git a/tableconverter.c b/tableconverter.c index 2708470..25562b9 100644 --- a/tableconverter.c +++ b/tableconverter.c @@ -31,20 +31,24 @@ tabconverter_encode(struct converter *suc, struct tabconverter *uc = (struct tabconverter*)suc; int ret; - if (!uc) - return UNICONV_EINVAL; - if (uc->singlebyte) { + /* flush */ + if (!inbuf) + return UNICONV_SUCCESS; + ret = sbcs_encode(&uc->sstate, inbuf, inleft, outbuf, outbytesleft); } else { - mbcs_encode_init(&uc->mstate); - ret = mbcs_encode(&uc->mstate, inbuf, inleft, outbuf, outbytesleft); + /* flush */ + if (!inbuf) + ret = mbcs_encode_reset(&uc->mstate, outbuf, outbytesleft); + else + ret = mbcs_encode(&uc->mstate, inbuf, inleft, outbuf, outbytesleft, 0); if (ret == MBERR_TOOFEW) + ret = UNICONV_EINVAL; + else if (ret == MBERR_TOOSMALL) ret = UNICONV_E2BIG; - else if (ret == MBERR_TOOSMALL || ret > 0) - ret = UNICONV_EILSEQ; else if (ret < 0) - ret = UNICONV_EINVAL; + ret = UNICONV_EILSEQ; } return ret; @@ -60,25 +64,32 @@ tabconverter_decode(struct converter *suc, struct tabconverter *uc = (struct tabconverter*)suc; int ret; - if (!uc) - return UNICONV_EINVAL; - if (uc->singlebyte) { ret = sbcs_decode(&uc->sstate, inbuf, inbytesleft, outbuf, outleft); } else { - mbcs_decode_init(&uc->mstate); ret = mbcs_decode(&uc->mstate, inbuf, inbytesleft, outbuf, outleft); if (ret == MBERR_TOOFEW) + ret = UNICONV_EINVAL; + else if (ret == MBERR_TOOSMALL) ret = UNICONV_E2BIG; - else if (ret == MBERR_TOOSMALL || ret > 0) - ret = UNICONV_EILSEQ; else if (ret < 0) - ret = UNICONV_EINVAL; + ret = UNICONV_EILSEQ; } return ret; } +static void +tabconverter_reset(struct converter *suc) +{ + struct tabconverter *uc = (struct tabconverter *)suc; + + if (uc->singlebyte) { + mbcs_encode_init(&uc->mstate); + mbcs_decode_init(&uc->mstate); + } +} + struct converter* tabconverter_open(const char *charset) { @@ -107,5 +118,6 @@ tabconverter_open(const char *charset) conv->base.encode = tabconverter_encode; conv->base.decode = tabconverter_decode; conv->base.close = tabconverter_close; + conv->base.reset = tabconverter_reset; return &conv->base; } @@ -42,7 +42,7 @@ int main(int argc, char **argv) inp = inbuffer; outp = outbuffer; ret = uniconv_conv(conv, (const char **)&inp, inlen, &outp, sizeof(outbuffer)); - if (ret == UNICONV_E2BIG && inlen < sizeof(inbuffer)) { + if (ret == UNICONV_EINVAL && inlen < sizeof(inbuffer)) { size_t extralen = fread(inbuffer + inlen, 1, 1, infp); if (!extralen) break; @@ -101,28 +101,38 @@ uniconv_conv(uniconv_t *uc, if (!uc) return UNICONV_EBADF; - if (!inbuf || !outbuf) - return UNICONV_EINVAL; - - if (inleft < UNICONV_MAX_LOCAL) - ucs4 = local_ucs4; - else - ucs4 = malloc(sizeof(uc_char_t) * inleft); - if (!ucs4) - return UNICONV_EINVAL; - - inucs4 = ucs4; - ucs4len = inleft; - ret = uc->from->decode(uc->from, inbuf, inleft, &inucs4, ucs4len); - if (ret) - goto error_decode; - - ucs4len = inucs4 - ucs4; - inucs4 = ucs4; - ret = uc->to->encode(uc->to, (const ucs4_t **)&inucs4, ucs4len, outbuf, outleft); + /* reset converter */ + if (!inbuf && !outbuf) { + converter_reset(uc->from); + converter_reset(uc->to); + return UNICONV_SUCCESS; + } + + /* converting/pushing input data */ + if (inbuf) { + if (inleft < UNICONV_MAX_LOCAL) + ucs4 = local_ucs4; + else + ucs4 = malloc(sizeof(uc_char_t) * inleft); + if (!ucs4) + return UNICONV_EINVAL; + + inucs4 = ucs4; + ucs4len = inleft; + ret = uc->from->decode(uc->from, inbuf, inleft, &inucs4, ucs4len); + if (ret) + goto error_decode; + + ucs4len = inucs4 - ucs4; + inucs4 = ucs4; + ret = uc->to->encode(uc->to, (const ucs4_t **)&inucs4, ucs4len, outbuf, outleft); + } else { + /* converting pending data in buffer */ + ret = uc->to->encode(uc->to, NULL, 0, outbuf, outleft); + } error_decode: - if (ucs4 != local_ucs4) + if (ucs4 && ucs4 != local_ucs4) free (ucs4); return ret; } @@ -7,6 +7,9 @@ extern "C" { #endif + /** + * the conversion descriptor + */ struct _uniconv; typedef struct _uniconv uniconv_t; @@ -16,9 +19,40 @@ extern "C" { #define UNICONV_EINVAL (-3) #define UNICONV_EBADF (-4) + /** + * open a descriptor for converting /from/ charset to /to/ charset + * + * @param from the from charset + * @param to the to charset + * + * @return a conversion descriptor, %NULL in case of error. + */ uniconv_t* uniconv_open(const char *from, const char *to); + /** + * peform charset converting + * + * If both inbuf and outbuf set to %NULL, then the descriptor + * state will be reset. If only the inbuf set to %NULL, then the + * descriptor will reset its shift state and write any pending + * data to outbuf. + * + * @param inbuf a pointer to input buffer + * @param inleft the size of input buffer + * @param outbuf a pointer to output buffer + * @param outleft the size of output buffer + * + * @return the number of characters converted in a non-reversible + * way during this call. + * + * In case of error, it returns + * + * UNICONV_E2BIG There is not sufficient room at *outbuf. + * UNICONV_EILSEQ An invalid multibyte sequence has been encountered in the input. + * UNICONV_EINVAL An incomplete multibyte sequence has been encountered in the input. + * + */ int uniconv_conv(uniconv_t *uc, const char **inbuf, @@ -26,6 +60,9 @@ extern "C" { char **outbuf, size_t outleft); + /** + * close the conversion descriptor + */ void uniconv_close(uniconv_t *uc); diff --git a/utfconverter.c b/utfconverter.c index 9818c27..04a8199 100644 --- a/utfconverter.c +++ b/utfconverter.c @@ -21,6 +21,9 @@ utf8_encode(struct converter *conv, { size_t i; + if (!inbuf) + return UNICONV_SUCCESS; + for (i = 0; i < inleft; i++) { int seqlen = ucs4toutf8(**inbuf, NULL); if (seqlen < 0) @@ -48,7 +51,7 @@ utf8_decode(struct converter *conv, uc_char_t unichar; int seqlen = ucs4fromutf8(*inbuf, &unichar, inleft); if (seqlen == -2) - return UNICONV_E2BIG; + return UNICONV_EINVAL; else if (seqlen < 0) return UNICONV_EILSEQ; if (!outleft) @@ -75,6 +78,9 @@ utf16_encode(struct converter *conv, size_t i; uc_uint16_t **soutbuf = (uc_uint16_t **)outbuf; + if (!inbuf) + return UNICONV_SUCCESS; + for (i = 0; i < inleft; i++) { int seqlen = ucs4toutf16(**inbuf, NULL); if (seqlen < 0) @@ -105,7 +111,7 @@ utf16_decode(struct converter *conv, int seqlen = ucs4fromutf16(*sinbuf, &unichar, inleft / sizeof(uc_uint16_t)); if (seqlen == -2) - return UNICONV_E2BIG; + return UNICONV_EINVAL; else if (seqlen < 0) return UNICONV_EILSEQ; if (!outleft) @@ -129,6 +135,9 @@ utf32_encode(struct converter *conv, char **outbuf, size_t outleft) { + if (!inbuf) + return UNICONV_SUCCESS; + if (inleft * sizeof(uc_char_t) > outleft) return UNICONV_E2BIG; @@ -146,7 +155,7 @@ utf32_decode(struct converter *conv, size_t outleft) { if (inleft & 3) - return UNICONV_EILSEQ; + return UNICONV_EINVAL; if (inleft > outleft) return UNICONV_E2BIG; @@ -185,5 +194,7 @@ utfconverter_open(const char *charset) conv->base.decode = utf32_decode; } conv->base.close = utfconverter_close; + conv->base.reset = NULL; + return &conv->base; } |