summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuo Jinghua <sunmoon1997@gmail.com>2010-01-09 00:31:23 +0800
committerLuo Jinghua <sunmoon1997@gmail.com>2010-01-09 00:31:23 +0800
commitee958d0cef0cb03f071dea7e180a76823da327cd (patch)
tree00d690fdccf543626996d035fbc12758579a9e64
parent03da7d1dadfe9f3f2ed080e29adfc448c21fbff9 (diff)
uniconv: parse //ignore and //translit extension in `to' charset
-rw-r--r--uniconv.c47
1 files changed, 43 insertions, 4 deletions
diff --git a/uniconv.c b/uniconv.c
index d38269e..b0a1ccb 100644
--- a/uniconv.c
+++ b/uniconv.c
@@ -10,9 +10,15 @@
#include <stdio.h>
#include <assert.h>
+#define UNICONV_MODE_IGNORE (1 << 0)
+#define UNICONV_MODE_TRANSLIT (1 << 1)
+
struct _uniconv {
struct converter *from;
struct converter *to;
+ char from_charset[64];
+ char to_charset[64];
+ int mode;
};
static char
@@ -23,9 +29,22 @@ uniconv_lower(char c)
return c;
}
+static char *
+uniconv_strrstr(const char *str, const char *token)
+{
+ int len = strlen(token);
+ const char *p = str + strlen(str);
+
+ while (str <= --p)
+ if (p[0] == token[0] && strncmp(p, token, len) == 0)
+ return (char *)p;
+ return NULL;
+}
+
static const char*
uniconv_canonical_charset(const char *charset,
- char *buf, size_t bufsz)
+ char *buf, size_t bufsz,
+ int *retmode)
{
size_t i;
size_t len = strlen(charset);
@@ -39,6 +58,19 @@ uniconv_canonical_charset(const char *charset,
buf[i] = uniconv_lower(charset[i]);
}
+ if (retmode) {
+ char *p;
+
+ for (p = uniconv_strrstr(buf, "//"); p;
+ p = uniconv_strrstr(buf, "//")) {
+ if (!strcmp(buf + 2, "ignore"))
+ *retmode |= UNICONV_MODE_IGNORE;
+ else if (!strcmp(buf + 2, "translit"))
+ *retmode |= UNICONV_MODE_TRANSLIT;
+ *p = '\0';
+ }
+ }
+
return get_canonical_charset(buf);
}
@@ -46,18 +78,22 @@ uniconv_t*
uniconv_open(const char *to, const char *from)
{
char frombuf[64];
- char tobuf[64];
+ char tobuf[64 * 2];
+ int mode;
uniconv_t *uc;
struct converter *fc, *tc;
if (!from || !to)
return NULL;
- from = uniconv_canonical_charset(from, frombuf, sizeof(frombuf));
- to = uniconv_canonical_charset(to, tobuf, sizeof(tobuf));
+ mode = 0;
+ from = uniconv_canonical_charset(from, frombuf, sizeof(frombuf), NULL);
+ to = uniconv_canonical_charset(to, tobuf, sizeof(tobuf), &mode);
if (!from || !to)
return NULL;
+ if (strlen(from) > 63 || strlen(to) > 63)
+ return NULL;
fc = converter_open(from);
tc = converter_open(to);
if (!fc || !tc)
@@ -68,6 +104,9 @@ uniconv_open(const char *to, const char *from)
goto close_conv;
uc->from = fc;
uc->to = tc;
+ uc->mode = mode;
+ strcpy(uc->from_charset, from);
+ strcpy(uc->to_charset, to);
return uc;
close_conv:
converter_close(fc);