diff options
author | Tor Lillqvist <tml@iki.fi> | 2000-02-02 23:39:32 +0000 |
---|---|---|
committer | Tor Lillqvist <tml@src.gnome.org> | 2000-02-02 23:39:32 +0000 |
commit | c22cf34e923b81962da554c8edeb9366f044ae8c (patch) | |
tree | 3198ab0b76436d25848c1720be73d0214d16b799 /gstrfuncs.c | |
parent | 86b2741c1e272d35ecba107ddf9d5b873773ad57 (diff) |
glib.h New functions for conversion between UTF-8 and the encoding
2000-02-01 Tor Lillqvist <tml@iki.fi>
* glib.h
* gstrfuncs.c (g_filename_to_utf8, g_filename_from_utf8): New
functions for conversion between UTF-8 and the encoding expected
by C runtime functions like open() and stat(), and returned by
readdir().
Implement them on Win32 where we use the system "ANSI" codepage,
which might be single-byte or double-byte. On Unix, just skip the
issue for now and provide dummy implementations that return a copy
of the argument.
* README.win32
* build-dll
* glib.def: Minor updates.
Diffstat (limited to 'gstrfuncs.c')
-rw-r--r-- | gstrfuncs.c | 224 |
1 files changed, 224 insertions, 0 deletions
diff --git a/gstrfuncs.c b/gstrfuncs.c index e549260d7..6df775774 100644 --- a/gstrfuncs.c +++ b/gstrfuncs.c @@ -42,6 +42,11 @@ #include <signal.h> #endif #include "glib.h" + +#ifdef G_OS_WIN32 +#include <windows.h> +#endif + /* do not include <unistd.h> in this place since it * inteferes with g_strsignal() on some OSes */ @@ -1068,6 +1073,225 @@ g_strescape (const gchar *source, return dest; } +/* + * g_filename_to_utf8 + * + * Converts a string which is in the encoding used for file names by + * the C runtime (usually the same as that used by the operating + * system) in the current locale into a UTF-8 string. + */ + +gchar * +g_filename_to_utf8 (const gchar *opsysstring) +{ +#ifdef G_OS_WIN32 + + gint i, clen, wclen, first; + const gint len = strlen (opsysstring); + wchar_t *wcs, wc; + gchar *result, *bp; + const wchar_t *wcp; + + wcs = g_new (wchar_t, len); + wclen = MultiByteToWideChar (CP_ACP, 0, opsysstring, len, wcs, len); + + wcp = wcs; + clen = 0; + for (i = 0; i < wclen; i++) + { + wc = *wcp++; + + if (wc < 0x80) + clen += 1; + else if (wc < 0x800) + clen += 2; + else if (wc < 0x10000) + clen += 3; + else if (wc < 0x200000) + clen += 4; + else if (wc < 0x4000000) + clen += 5; + else + clen += 6; + } + + result = g_malloc (clen + 1); + + wcp = wcs; + bp = result; + for (i = 0; i < wclen; i++) + { + wc = *wcp++; + + if (wc < 0x80) + { + first = 0; + clen = 1; + } + else if (wc < 0x800) + { + first = 0xc0; + clen = 2; + } + else if (wc < 0x10000) + { + first = 0xe0; + clen = 3; + } + else if (wc < 0x200000) + { + first = 0xf0; + clen = 4; + } + else if (wc < 0x4000000) + { + first = 0xf8; + clen = 5; + } + else + { + first = 0xfc; + clen = 6; + } + + /* Woo-hoo! */ + switch (clen) + { + case 6: bp[5] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */ + case 5: bp[4] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */ + case 4: bp[3] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */ + case 3: bp[2] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */ + case 2: bp[1] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */ + case 1: bp[0] = wc | first; + } + + bp += clen; + } + *bp = 0; + + g_free (wcs); + + return result; + +#else + + return g_strdup (opsysstring); + +#endif +} + +/* + * g_filename_from_utf8 + * + * The reverse of g_filename_to_utf8. + */ + +gchar * +g_filename_from_utf8 (const gchar *utf8string) +{ +#ifdef G_OS_WIN32 + + gint i, mask, clen, wclen, mblen; + const gint len = strlen (utf8string); + wchar_t *wcs, *wcp; + gchar *result; + guchar *cp, *end, c; + gint n; + + /* First convert to wide chars */ + cp = (guchar *) utf8string; + end = cp + len; + n = 0; + wcs = g_new (wchar_t, len + 1); + wcp = wcs; + while (cp != end) + { + mask = 0; + c = *cp; + + if (c < 0x80) + { + clen = 1; + mask = 0x7f; + } + else if ((c & 0xe0) == 0xc0) + { + clen = 2; + mask = 0x1f; + } + else if ((c & 0xf0) == 0xe0) + { + clen = 3; + mask = 0x0f; + } + else if ((c & 0xf8) == 0xf0) + { + clen = 4; + mask = 0x07; + } + else if ((c & 0xfc) == 0xf8) + { + clen = 5; + mask = 0x03; + } + else if ((c & 0xfc) == 0xfc) + { + clen = 6; + mask = 0x01; + } + else + { + g_free (wcs); + return NULL; + } + + if (cp + clen > end) + { + g_free (wcs); + return NULL; + } + + *wcp = (cp[0] & mask); + for (i = 1; i < clen; i++) + { + if ((cp[i] & 0xc0) != 0x80) + { + g_free (wcs); + return NULL; + } + *wcp <<= 6; + *wcp |= (cp[i] & 0x3f); + } + + cp += clen; + wcp++; + n++; + } + if (cp != end) + { + g_free (wcs); + return NULL; + } + + /* n is the number of wide chars constructed */ + + /* Convert to a string in the current ANSI codepage */ + + result = g_new (gchar, 3 * n + 1); + mblen = WideCharToMultiByte (CP_ACP, 0, wcs, n, result, 3*n, NULL, NULL); + result[mblen] = 0; + g_free (wcs); + + return result; + +#else + + return g_strdup (utf8string); + +#endif +} + + /* blame Elliot for these next five routines */ gchar* g_strchug (gchar *string) |