summaryrefslogtreecommitdiff
path: root/uniconv.c
blob: e73efa776a2e72c6c2a20bb1bb0b230923210094 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
#include "uniconv.h"
#include "uniconvint.h"
#include "multibytecodec.h"
#include "singlebytecodec.h"
#include "charsetalias.h"
#include "converter.h"

#include <stdlib.h>
#include <string.h>
#include <stdio.h>

struct _uniconv {
    struct converter *from;
    struct converter *to;
};

static char
uniconv_lower(char c)
{
    if (c >= 'A' && c <= 'Z')
	return c - 'A' + 'a';
    return c;
}

static const char*
uniconv_canonical_charset(const char *charset,
			  char *buf, size_t bufsz)
{
    size_t i;
    size_t len = strlen(charset);

    if (len + 1  > bufsz)
	return NULL;
    for (i = 0; i < len + 1; i++) {
	if (charset[i] == '-')
	    buf[i] = '_';
	else
	    buf[i] = uniconv_lower(charset[i]);
    }

    return get_canonical_charset(buf);
}

uniconv_t*
uniconv_open(const char *to, const char *from)
{
    char frombuf[64];
    char tobuf[64];
    uniconv_t *uc;
    struct converter *fc, *tc;

    if (!from || !to)
	return NULL;

    from = uniconv_canonical_charset(from, frombuf, sizeof(frombuf));
    to = uniconv_canonical_charset(to, tobuf, sizeof(tobuf));
    if (!from || !to)
	return NULL;

    fc = converter_open(from);
    tc = converter_open(to);
    if (!fc || !tc)
	goto close_conv;

    uc = malloc(sizeof(uniconv_t));
    if (!uc)
	goto close_conv;
    uc->from = fc;
    uc->to = tc;
    return uc;
 close_conv:
    converter_close(fc);
    converter_close(tc);
    return NULL;
}

void
uniconv_close(uniconv_t *uc)
{
    if (!uc)
	return;

    converter_close(uc->from);
    converter_close(uc->to);
    free(uc);
}

#define UNICONV_MAX_LOCAL 2048
int
uniconv_conv(uniconv_t *uc,
	     const char **inbuf,
	     size_t *inleft,
	     char **outbuf,
	     size_t *outleft)
{
    int ret;
    size_t ucs4len;
    const char *saved_inbuf;
    const char *saved_outbuf;

    if (!uc)
	return UNICONV_EBADF;

    /* reset converter */
    if (!inbuf && !outbuf) {
	converter_reset(uc->from);
	converter_reset(uc->to);
	return UNICONV_SUCCESS;
    }

    /* converting/pushing input data */
    if (inbuf) {
	uc_char_t ucs4;
	uc_char_t *ucsbuf;

	saved_inbuf = *inbuf;
	saved_outbuf = *outbuf;
	ret = UNICONV_SUCCESS;
	while (*inleft) {
	    size_t i = 0;
	    const char *abuf;
	    const char *aoutbuf;

	    /* decode a char at a time */
	    for (i = 1; i <= *inleft; i++) {
		ucsbuf = &ucs4;
		abuf = *inbuf;
		ret = uc->from->decode(uc->from, &abuf, i, &ucsbuf, 1);
		if (ret < 0 && ret == UNICONV_EINVAL)
		    continue;
		break;
	    }
	    if (ret == UNICONV_E2BIG)
		ret = UNICONV_EILSEQ;
	    if (ret < 0)
		return ret;

	    ucs4len = ucsbuf - &ucs4;
	    ucsbuf = &ucs4;
	    aoutbuf = *outbuf;
	    ret = uc->to->encode(uc->to, (const ucs4_t **)&ucsbuf, ucs4len,
				 outbuf, *outleft);
	    *outleft -= *outbuf - aoutbuf;
	    if (ret < 0)
		break;
	    *inleft -= i;
	    (*inbuf) += i;
	}
    } else {
	/* converting pending data in buffer */
	saved_inbuf = NULL;
	saved_outbuf = *outbuf;
	ret = uc->to->encode(uc->to, NULL, 0, outbuf, *outleft);
	*outleft -= *outbuf - saved_outbuf;
    }

    return ret;
}