diff options
Diffstat (limited to 'utfconverter.c')
-rw-r--r-- | utfconverter.c | 188 |
1 files changed, 188 insertions, 0 deletions
diff --git a/utfconverter.c b/utfconverter.c new file mode 100644 index 0000000..039c39f --- /dev/null +++ b/utfconverter.c @@ -0,0 +1,188 @@ +#include "converter.h" +#include "utfconverter.h" +#include "uniconv.h" +#include "unicode.h" + +#include <string.h> +#include <stdlib.h> +#include <stdio.h> +#include <assert.h> + +struct utfconverter { + struct converter base; +}; + +static int +utf8_encode(struct converter *conv, + const uc_char_t **inbuf, + size_t inleft, + char **outbuf, + size_t outleft) +{ + size_t i; + + for (i = 0; i < inleft; i++) { + int seqlen = ucs4toutf8(**inbuf, NULL); + if (seqlen < 0) + return UNICONV_EILSEQ; + if (seqlen > outleft) + return UNICONV_E2BIG; + + ucs4toutf8(**inbuf, *outbuf); + (*inbuf) += 1; + (*outbuf) += seqlen; + outleft -= seqlen; + } + + return UNICONV_SUCCESS; +} + +static int +utf8_decode(struct converter *conv, + const char **inbuf, + size_t inleft, + uc_char_t **outbuf, + size_t outleft) +{ + while (inleft) { + uc_char_t unichar; + int seqlen = ucs4fromutf8(*inbuf, &unichar, inleft); + if (seqlen == -2) + return UNICONV_E2BIG; + else if (seqlen < 0) + return UNICONV_EILSEQ; + if (!outleft) + return UNICONV_E2BIG; + + **outbuf = unichar; + (*outbuf) += 1; + outleft -= 1; + + (*inbuf) += seqlen; + inleft -= seqlen; + } + + return UNICONV_SUCCESS; +} + +static int +utf16_encode(struct converter *conv, + const uc_char_t **inbuf, + size_t inleft, + char **outbuf, + size_t outleft) +{ + size_t i; + short **soutbuf = (short **)outbuf; + + for (i = 0; i < inleft; i++) { + int seqlen = ucs4toutf16(**inbuf, NULL); + if (seqlen < 0) + return UNICONV_EILSEQ; + if (seqlen * sizeof(short) > outleft) + return UNICONV_E2BIG; + + ucs4toutf16(**inbuf, *soutbuf); + (*inbuf) += 1; + (*soutbuf) += seqlen; + outleft -= seqlen; + } + + return UNICONV_SUCCESS; +} + +static int +utf16_decode(struct converter *conv, + const char **inbuf, + size_t inleft, + uc_char_t **outbuf, + size_t outleft) +{ + const short **sinbuf = (const short**)inbuf; + + while (inleft) { + uc_char_t unichar; + int seqlen = ucs4fromutf16(*sinbuf, &unichar, inleft / sizeof(short)); + if (seqlen == -2) + return UNICONV_E2BIG; + else if (seqlen < 0) + return UNICONV_EILSEQ; + if (!outleft) + return UNICONV_E2BIG; + + **outbuf = unichar; + (*outbuf) += 1; + outleft -= 1; + + (*sinbuf) += seqlen; + inleft -= seqlen * sizeof(short); + } + + return UNICONV_SUCCESS; +} + +static int +utf32_encode(struct converter *conv, + const uc_char_t **inbuf, + size_t inleft, + char **outbuf, + size_t outleft) +{ + if (inleft * sizeof(uc_char_t) > outleft) + return UNICONV_E2BIG; + + memcpy(*outbuf, *inbuf, inleft * sizeof(uc_char_t)); + (*inbuf) += inleft; + (*outbuf) += inleft * sizeof(uc_char_t); + return UNICONV_SUCCESS; +} + +static int +utf32_decode(struct converter *conv, + const char **inbuf, + size_t inleft, + uc_char_t **outbuf, + size_t outleft) +{ + if (inleft & 3) + return UNICONV_EILSEQ; + if (inleft > outleft) + return UNICONV_E2BIG; + + memcpy(*outbuf, *inbuf, inleft); + (*inbuf) += inleft; + (*outbuf) += inleft / sizeof(uc_char_t); + return UNICONV_SUCCESS; +} + +static void +utfconverter_close(struct converter *conv) +{ + free(conv); +} + +struct converter * +utfconverter_open(const char *charset) +{ + struct utfconverter *conv; + + if (strcmp(charset, "utf_8") && strcmp(charset, "utf_16") && + strcmp(charset, "utf_32")) + return NULL; + + conv = malloc(sizeof(struct utfconverter)); + if (!conv) + return NULL; + if (!strcmp(charset, "utf_8")) { + conv->base.encode = utf8_encode; + conv->base.decode = utf8_decode; + } else if (!strcmp(charset, "utf_16")) { + conv->base.encode = utf16_encode; + conv->base.decode = utf16_decode; + } else if (!strcmp(charset, "utf_32")) { + conv->base.encode = utf32_encode; + conv->base.decode = utf32_decode; + } + conv->base.close = utfconverter_close; + return &conv->base; +} |