summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarkus Armbruster <armbru@redhat.com>2018-08-23 18:39:45 +0200
committerMarkus Armbruster <armbru@redhat.com>2018-08-24 20:26:37 +0200
commit340db1ed82f8ced40a3e778c08963005369e2926 (patch)
treec1fef7097ff604764f708120567b7aa608ff80be
parenta2ec6be72b80770b063cf08c95c78f0d36705355 (diff)
json: Reject unescaped control characters
Fix the lexer to reject unescaped control characters in JSON strings, in accordance with RFC 8259 "The JavaScript Object Notation (JSON) Data Interchange Format". Bonus: we now recover more nicely from unclosed strings. E.g. {"one: 1}\n{"two": 2} now recovers cleanly after the newline, where before the lexer remained confused until the next unpaired double quote or lexical error. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Eric Blake <eblake@redhat.com> Message-Id: <20180823164025.12553-19-armbru@redhat.com>
-rw-r--r--qobject/json-lexer.c4
-rw-r--r--tests/check-qjson.c6
-rw-r--r--tests/qmp-test.c4
3 files changed, 5 insertions, 9 deletions
diff --git a/qobject/json-lexer.c b/qobject/json-lexer.c
index 7c0875d225..e85e9a78ff 100644
--- a/qobject/json-lexer.c
+++ b/qobject/json-lexer.c
@@ -115,7 +115,7 @@ static const uint8_t json_lexer[][256] = {
['u'] = IN_DQ_UCODE0,
},
[IN_DQ_STRING] = {
- [1 ... 0xBF] = IN_DQ_STRING,
+ [0x20 ... 0xBF] = IN_DQ_STRING,
[0xC2 ... 0xF4] = IN_DQ_STRING,
['\\'] = IN_DQ_STRING_ESCAPE,
['"'] = JSON_STRING,
@@ -155,7 +155,7 @@ static const uint8_t json_lexer[][256] = {
['u'] = IN_SQ_UCODE0,
},
[IN_SQ_STRING] = {
- [1 ... 0xBF] = IN_SQ_STRING,
+ [0x20 ... 0xBF] = IN_SQ_STRING,
[0xC2 ... 0xF4] = IN_SQ_STRING,
['\\'] = IN_SQ_STRING_ESCAPE,
['\''] = JSON_STRING,
diff --git a/tests/check-qjson.c b/tests/check-qjson.c
index 1688b2f5c1..f1405ad47a 100644
--- a/tests/check-qjson.c
+++ b/tests/check-qjson.c
@@ -202,11 +202,7 @@ static void utf8_string(void)
"\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F"
"\x10\x11\x12\x13\x14\x15\x16\x17"
"\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F",
- /* bug: not corrected (valid UTF-8, but invalid JSON) */
- "\x01\x02\x03\x04\x05\x06\x07"
- "\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F"
- "\x10\x11\x12\x13\x14\x15\x16\x17"
- "\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F",
+ NULL,
"\\u0001\\u0002\\u0003\\u0004\\u0005\\u0006\\u0007"
"\\b\\t\\n\\u000B\\f\\r\\u000E\\u000F"
"\\u0010\\u0011\\u0012\\u0013\\u0014\\u0015\\u0016\\u0017"
diff --git a/tests/qmp-test.c b/tests/qmp-test.c
index 5edc97f63f..7b3ba17c4a 100644
--- a/tests/qmp-test.c
+++ b/tests/qmp-test.c
@@ -86,9 +86,9 @@ static void test_malformed(QTestState *qts)
g_assert(recovered(qts));
/* lexical error: control character in string */
- qtest_qmp_send_raw(qts, "{'execute': 'nonexistent', 'id':'\n'}");
+ qtest_qmp_send_raw(qts, "{'execute': 'nonexistent', 'id':'\n");
resp = qtest_qmp_receive(qts);
- g_assert_cmpstr(get_error_class(resp), ==, "CommandNotFound"); /* BUG */
+ g_assert_cmpstr(get_error_class(resp), ==, "GenericError");
qobject_unref(resp);
g_assert(recovered(qts));