diff options
author | Will Thompson <will.thompson@collabora.co.uk> | 2010-10-10 00:26:53 +0100 |
---|---|---|
committer | Will Thompson <will.thompson@collabora.co.uk> | 2012-10-29 10:17:35 +0000 |
commit | 79425a010af79070a6b93c25deb5690cc72daf87 (patch) | |
tree | 34839aeef218c62fff27a9aa009453085d2c1c5a /src | |
parent | 3dc023fd1a745504ed9035ebba9501bf916f7a9d (diff) |
Sanitize incoming messages to remove UTF-8 non-characters.
https://bugs.freedesktop.org/show_bug.cgi?id=30741
Diffstat (limited to 'src')
-rw-r--r-- | src/idle-connection.c | 41 |
1 files changed, 41 insertions, 0 deletions
diff --git a/src/idle-connection.c b/src/idle-connection.c index 3a64922..c9e1829 100644 --- a/src/idle-connection.c +++ b/src/idle-connection.c @@ -1391,6 +1391,37 @@ static gboolean idle_connection_hton(IdleConnection *obj, const gchar *input, gc return TRUE; } +#define U_FFFD_REPLACEMENT_CHARACTER_UTF8 "\357\277\275" + +static gchar * +idle_salvage_utf8 (gchar *supposed_utf8, gssize bytes) +{ + GString *salvaged = g_string_sized_new (bytes); + const gchar *end; + gchar *ret; + gsize ret_len; + + while (!g_utf8_validate (supposed_utf8, bytes, &end)) { + gssize valid_bytes = end - supposed_utf8; + + g_string_append_len (salvaged, supposed_utf8, valid_bytes); + g_string_append_len (salvaged, U_FFFD_REPLACEMENT_CHARACTER_UTF8, 3); + + supposed_utf8 += (valid_bytes + 1); + bytes -= (valid_bytes + 1); + } + + g_string_append_len (salvaged, supposed_utf8, bytes); + + ret_len = salvaged->len; + ret = g_string_free (salvaged, FALSE); + + /* It had better be valid now… */ + g_return_val_if_fail (g_utf8_validate (ret, ret_len, NULL), ret); + return ret; +} + + static gchar * idle_connection_ntoh(IdleConnection *obj, const gchar *input) { IdleConnectionPrivate *priv = IDLE_CONNECTION_GET_PRIVATE(obj); @@ -1415,6 +1446,16 @@ idle_connection_ntoh(IdleConnection *obj, const gchar *input) { if (*p & (1 << 7)) *p = '?'; } + } else if (!g_utf8_validate (ret, bytes_written, NULL)) { + /* Annoyingly g_convert(UTF-8, UTF-8) doesn't filter out well-formed + * non-characters, so we have to do some further processing. + */ + gchar *salvaged; + + IDLE_DEBUG("Invalid UTF-8, salvaging what we can..."); + salvaged = idle_salvage_utf8(ret, bytes_written); + g_free(ret); + ret = salvaged; } return ret; |