diff options
author | Behdad Esfahbod <behdad@behdad.org> | 2014-07-10 16:06:23 -0400 |
---|---|---|
committer | Behdad Esfahbod <behdad@behdad.org> | 2014-07-10 16:06:23 -0400 |
commit | d6bcbea27bd51f4e0a8e5b0cc35b2fdb8d1896d2 (patch) | |
tree | 13a8fc14fe56758addc09704eb40fd5d28667098 /charset | |
parent | 54b4496e9c2368c53f1cff57347c1f395328a142 (diff) |
Support 4-byte UTF-8 sequences
Patch from Christina Brien <chris.brien@cisco.com>.
Diffstat (limited to 'charset')
-rw-r--r-- | charset/fribidi-char-sets-utf8.c | 11 |
1 files changed, 10 insertions, 1 deletions
diff --git a/charset/fribidi-char-sets-utf8.c b/charset/fribidi-char-sets-utf8.c index 95a7628..dac66b3 100644 --- a/charset/fribidi-char-sets-utf8.c +++ b/charset/fribidi-char-sets-utf8.c @@ -65,13 +65,22 @@ fribidi_utf8_to_unicode ( *us++ = ((*s & 0x1f) << 6) + (*(s + 1) & 0x3f); s += 2; } - else /* 3 byte */ + else if (ch <= 0xef) /* 3 byte */ { *us++ = ((int) (*s & 0x0f) << 12) + ((*(s + 1) & 0x3f) << 6) + (*(s + 2) & 0x3f); s += 3; } + else /* 4 byte */ + { + *us++ = + ((int) (*s & 0x07) << 18) + + ((*(s + 1) & 0x3f) << 12) + + ((*(s + 2) & 0x3f) << 6) + + ((*(s + 3) & 0x3f) << 0); + s += 4; + } length++; } return (length); |