diff options
author | Luo Jinghua <sunmoon1997@gmail.com> | 2010-01-09 00:31:23 +0800 |
---|---|---|
committer | Luo Jinghua <sunmoon1997@gmail.com> | 2010-01-09 00:31:23 +0800 |
commit | ee958d0cef0cb03f071dea7e180a76823da327cd (patch) | |
tree | 00d690fdccf543626996d035fbc12758579a9e64 | |
parent | 03da7d1dadfe9f3f2ed080e29adfc448c21fbff9 (diff) |
uniconv: parse //ignore and //translit extension in `to' charset
-rw-r--r-- | uniconv.c | 47 |
1 files changed, 43 insertions, 4 deletions
@@ -10,9 +10,15 @@ #include <stdio.h> #include <assert.h> +#define UNICONV_MODE_IGNORE (1 << 0) +#define UNICONV_MODE_TRANSLIT (1 << 1) + struct _uniconv { struct converter *from; struct converter *to; + char from_charset[64]; + char to_charset[64]; + int mode; }; static char @@ -23,9 +29,22 @@ uniconv_lower(char c) return c; } +static char * +uniconv_strrstr(const char *str, const char *token) +{ + int len = strlen(token); + const char *p = str + strlen(str); + + while (str <= --p) + if (p[0] == token[0] && strncmp(p, token, len) == 0) + return (char *)p; + return NULL; +} + static const char* uniconv_canonical_charset(const char *charset, - char *buf, size_t bufsz) + char *buf, size_t bufsz, + int *retmode) { size_t i; size_t len = strlen(charset); @@ -39,6 +58,19 @@ uniconv_canonical_charset(const char *charset, buf[i] = uniconv_lower(charset[i]); } + if (retmode) { + char *p; + + for (p = uniconv_strrstr(buf, "//"); p; + p = uniconv_strrstr(buf, "//")) { + if (!strcmp(buf + 2, "ignore")) + *retmode |= UNICONV_MODE_IGNORE; + else if (!strcmp(buf + 2, "translit")) + *retmode |= UNICONV_MODE_TRANSLIT; + *p = '\0'; + } + } + return get_canonical_charset(buf); } @@ -46,18 +78,22 @@ uniconv_t* uniconv_open(const char *to, const char *from) { char frombuf[64]; - char tobuf[64]; + char tobuf[64 * 2]; + int mode; uniconv_t *uc; struct converter *fc, *tc; if (!from || !to) return NULL; - from = uniconv_canonical_charset(from, frombuf, sizeof(frombuf)); - to = uniconv_canonical_charset(to, tobuf, sizeof(tobuf)); + mode = 0; + from = uniconv_canonical_charset(from, frombuf, sizeof(frombuf), NULL); + to = uniconv_canonical_charset(to, tobuf, sizeof(tobuf), &mode); if (!from || !to) return NULL; + if (strlen(from) > 63 || strlen(to) > 63) + return NULL; fc = converter_open(from); tc = converter_open(to); if (!fc || !tc) @@ -68,6 +104,9 @@ uniconv_open(const char *to, const char *from) goto close_conv; uc->from = fc; uc->to = tc; + uc->mode = mode; + strcpy(uc->from_charset, from); + strcpy(uc->to_charset, to); return uc; close_conv: converter_close(fc); |