diff options
author | Simon McVittie <simon.mcvittie@collabora.co.uk> | 2013-04-22 18:18:30 +0100 |
---|---|---|
committer | Simon McVittie <simon.mcvittie@collabora.co.uk> | 2013-04-24 14:45:42 +0100 |
commit | 3e0498048df554bfaa30c42aef1220f2b7135ed3 (patch) | |
tree | 3575a29c91e0db20e1ec8be4d6ebeb1a9b1c273f | |
parent | b0dc6e3fb9754354b4b29476217c77277d16dd23 (diff) |
messages/invalid-utf8.py: amend test-case to work under GLib 2.36
Reviewed-by: Guillaume Desmottes <guillaume.desmottes@collabora.co.uk>
-rw-r--r-- | tests/twisted/messages/invalid-utf8.py | 29 |
1 files changed, 20 insertions, 9 deletions
diff --git a/tests/twisted/messages/invalid-utf8.py b/tests/twisted/messages/invalid-utf8.py index 9f3d057..a48c2f4 100644 --- a/tests/twisted/messages/invalid-utf8.py +++ b/tests/twisted/messages/invalid-utf8.py @@ -1,27 +1,31 @@ # coding=utf-8 """ -Test that incoming messages containing well-formed but invalid UTF-8 code -points don't make Idle fall off the bus. This is a regression test for -<https://bugs.freedesktop.org/show_bug.cgi?id=30741>. +Test that incoming messages containing invalid UTF-8 +don't make Idle fall off the bus. This is a regression test for +bugs similar to <https://bugs.freedesktop.org/show_bug.cgi?id=30741>. """ from idletest import exec_test from servicetest import assertEquals +import re def test(q, bus, conn, stream): conn.Connect() q.expect('dbus-signal', signal='StatusChanged', args=[0, 1]) test_with_message(q, stream, ["I'm no ", " Buddhist"]) - # Check that valid exotic characters don't get lost - test_with_message(q, stream, [u"björk"] * 5) + test_with_message(q, stream, [u"björk"] * 3) test_with_message(q, stream, ["", "lolllllll"]) test_with_message(q, stream, ["hello", ""]) test_with_message(q, stream, "I am a stabbing robot".split(" ")) -# This is the UTF-8 encoding of U+FDD2, which is not a valid Unicode character. -WELL_FORMED_BUT_INVALID_UTF8_BYTES = "\xef\xb7\x92" +# This is the UTF-8 encoding of U+D800, which is not valid +# (not even as a noncharacter). We previously did this test with +# noncharacters, but Unicode Corrigendum #9 explicitly allows noncharacters +# to be interchanged, GLib 2.36 allows them when validating UTF-8, +# and D-Bus 1.6.10 will do likewise. +WELL_FORMED_BUT_INVALID_UTF8_BYTES = "\xed\xa0\x80" def test_with_message(q, stream, parts): invalid_utf8 = WELL_FORMED_BUT_INVALID_UTF8_BYTES.join( @@ -42,10 +46,17 @@ def test_with_message(q, stream, parts): # Don't make any assumption about how many U+FFFD REPLACEMENT CHARACTERs # are used to replace surprising bytes. - received_parts = [ part for part in content.split(u"\ufffd") + received_parts = [ part for part in re.split(u"\ufffd|\\?", content) if part != u'' ] - assertEquals(filter(lambda s: s != u'', parts), received_parts) + + if parts[0] == u'björk': + # The valid UTF-8 gets lost in transit, because we fall back + # to assuming ASCII when g_convert() fails (this didn't happen + # when we tested with noncharacters - oh well). + assertEquals(['bj', 'rk', 'bj', 'rk', 'bj', 'rk'], received_parts) + else: + assertEquals(filter(lambda s: s != u'', parts), received_parts) if __name__ == '__main__': exec_test(test) |