summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorWill Thompson <will.thompson@collabora.co.uk>2010-10-10 00:26:53 +0100
committerWill Thompson <will.thompson@collabora.co.uk>2012-10-29 10:17:35 +0000
commit79425a010af79070a6b93c25deb5690cc72daf87 (patch)
tree34839aeef218c62fff27a9aa009453085d2c1c5a /src
parent3dc023fd1a745504ed9035ebba9501bf916f7a9d (diff)
Sanitize incoming messages to remove UTF-8 non-characters.
https://bugs.freedesktop.org/show_bug.cgi?id=30741
Diffstat (limited to 'src')
-rw-r--r--src/idle-connection.c41
1 files changed, 41 insertions, 0 deletions
diff --git a/src/idle-connection.c b/src/idle-connection.c
index 3a64922..c9e1829 100644
--- a/src/idle-connection.c
+++ b/src/idle-connection.c
@@ -1391,6 +1391,37 @@ static gboolean idle_connection_hton(IdleConnection *obj, const gchar *input, gc
return TRUE;
}
+#define U_FFFD_REPLACEMENT_CHARACTER_UTF8 "\357\277\275"
+
+static gchar *
+idle_salvage_utf8 (gchar *supposed_utf8, gssize bytes)
+{
+ GString *salvaged = g_string_sized_new (bytes);
+ const gchar *end;
+ gchar *ret;
+ gsize ret_len;
+
+ while (!g_utf8_validate (supposed_utf8, bytes, &end)) {
+ gssize valid_bytes = end - supposed_utf8;
+
+ g_string_append_len (salvaged, supposed_utf8, valid_bytes);
+ g_string_append_len (salvaged, U_FFFD_REPLACEMENT_CHARACTER_UTF8, 3);
+
+ supposed_utf8 += (valid_bytes + 1);
+ bytes -= (valid_bytes + 1);
+ }
+
+ g_string_append_len (salvaged, supposed_utf8, bytes);
+
+ ret_len = salvaged->len;
+ ret = g_string_free (salvaged, FALSE);
+
+ /* It had better be valid now… */
+ g_return_val_if_fail (g_utf8_validate (ret, ret_len, NULL), ret);
+ return ret;
+}
+
+
static gchar *
idle_connection_ntoh(IdleConnection *obj, const gchar *input) {
IdleConnectionPrivate *priv = IDLE_CONNECTION_GET_PRIVATE(obj);
@@ -1415,6 +1446,16 @@ idle_connection_ntoh(IdleConnection *obj, const gchar *input) {
if (*p & (1 << 7))
*p = '?';
}
+ } else if (!g_utf8_validate (ret, bytes_written, NULL)) {
+ /* Annoyingly g_convert(UTF-8, UTF-8) doesn't filter out well-formed
+ * non-characters, so we have to do some further processing.
+ */
+ gchar *salvaged;
+
+ IDLE_DEBUG("Invalid UTF-8, salvaging what we can...");
+ salvaged = idle_salvage_utf8(ret, bytes_written);
+ g_free(ret);
+ ret = salvaged;
}
return ret;