diff options
author | Anthony Liguori <aliguori@us.ibm.com> | 2009-11-11 10:39:23 -0600 |
---|---|---|
committer | Anthony Liguori <aliguori@us.ibm.com> | 2009-11-17 08:49:39 -0600 |
commit | 4a5fcab7acc55495d0a617eeebbbeaebc1872dbf (patch) | |
tree | d9af15127fa09c0498948ef7e96440fa53ead113 /json-parser.c | |
parent | d7ff3acbb4496ecae92d7ce1d52579d55b19da2f (diff) |
Add a JSON parser
This is the third and final stage of the JSON parser. It parses lexical tokens
performing grammar validation and creating the final QObject representation. It
uses a recursive decent parser.
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
Diffstat (limited to 'json-parser.c')
-rw-r--r-- | json-parser.c | 568 |
1 files changed, 568 insertions, 0 deletions
diff --git a/json-parser.c b/json-parser.c new file mode 100644 index 000000000..7624c0f9b --- /dev/null +++ b/json-parser.c @@ -0,0 +1,568 @@ +/* + * JSON Parser + * + * Copyright IBM, Corp. 2009 + * + * Authors: + * Anthony Liguori <aliguori@us.ibm.com> + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include <stdbool.h> + +#include "qemu-common.h" +#include "qstring.h" +#include "qint.h" +#include "qdict.h" +#include "qlist.h" +#include "qfloat.h" +#include "qbool.h" +#include "json-parser.h" +#include "json-lexer.h" + +typedef struct JSONParserContext +{ +} JSONParserContext; + +#define BUG_ON(cond) assert(!(cond)) + +/** + * TODO + * + * 0) make errors meaningful again + * 1) add geometry information to tokens + * 3) should we return a parsed size? + * 4) deal with premature EOI + */ + +static QObject *parse_value(JSONParserContext *ctxt, QList **tokens, va_list *ap); + +/** + * Token manipulators + * + * tokens are dictionaries that contain a type, a string value, and geometry information + * about a token identified by the lexer. These are routines that make working with + * these objects a bit easier. + */ +static const char *token_get_value(QObject *obj) +{ + return qdict_get_str(qobject_to_qdict(obj), "token"); +} + +static JSONTokenType token_get_type(QObject *obj) +{ + return qdict_get_int(qobject_to_qdict(obj), "type"); +} + +static int token_is_operator(QObject *obj, char op) +{ + const char *val; + + if (token_get_type(obj) != JSON_OPERATOR) { + return 0; + } + + val = token_get_value(obj); + + return (val[0] == op) && (val[1] == 0); +} + +static int token_is_keyword(QObject *obj, const char *value) +{ + if (token_get_type(obj) != JSON_KEYWORD) { + return 0; + } + + return strcmp(token_get_value(obj), value) == 0; +} + +static int token_is_escape(QObject *obj, const char *value) +{ + if (token_get_type(obj) != JSON_ESCAPE) { + return 0; + } + + return (strcmp(token_get_value(obj), value) == 0); +} + +/** + * Error handler + */ +static void parse_error(JSONParserContext *ctxt, QObject *token, const char *msg, ...) +{ + fprintf(stderr, "parse error: %s\n", msg); +} + +/** + * String helpers + * + * These helpers are used to unescape strings. + */ +static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length) +{ + if (wchar <= 0x007F) { + BUG_ON(buffer_length < 2); + + buffer[0] = wchar & 0x7F; + buffer[1] = 0; + } else if (wchar <= 0x07FF) { + BUG_ON(buffer_length < 3); + + buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F); + buffer[1] = 0x80 | (wchar & 0x3F); + buffer[2] = 0; + } else { + BUG_ON(buffer_length < 4); + + buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F); + buffer[1] = 0x80 | ((wchar >> 6) & 0x3F); + buffer[2] = 0x80 | (wchar & 0x3F); + buffer[3] = 0; + } +} + +static int hex2decimal(char ch) +{ + if (ch >= '0' && ch <= '9') { + return (ch - '0'); + } else if (ch >= 'a' && ch <= 'f') { + return 10 + (ch - 'a'); + } else if (ch >= 'A' && ch <= 'F') { + return 10 + (ch - 'A'); + } + + return -1; +} + +/** + * parse_string(): Parse a json string and return a QObject + * + * string + * "" + * " chars " + * chars + * char + * char chars + * char + * any-Unicode-character- + * except-"-or-\-or- + * control-character + * \" + * \\ + * \/ + * \b + * \f + * \n + * \r + * \t + * \u four-hex-digits + */ +static QString *qstring_from_escaped_str(JSONParserContext *ctxt, QObject *token) +{ + const char *ptr = token_get_value(token); + QString *str; + int double_quote = 1; + + if (*ptr == '"') { + double_quote = 1; + } else { + double_quote = 0; + } + ptr++; + + str = qstring_new(); + while (*ptr && + ((double_quote && *ptr != '"') || (!double_quote && *ptr != '\''))) { + if (*ptr == '\\') { + ptr++; + + switch (*ptr) { + case '"': + qstring_append(str, "\""); + ptr++; + break; + case '\'': + qstring_append(str, "'"); + ptr++; + break; + case '\\': + qstring_append(str, "\\"); + ptr++; + break; + case '/': + qstring_append(str, "/"); + ptr++; + break; + case 'b': + qstring_append(str, "\b"); + ptr++; + break; + case 'n': + qstring_append(str, "\n"); + ptr++; + break; + case 'r': + qstring_append(str, "\r"); + ptr++; + break; + case 't': + qstring_append(str, "\t"); + ptr++; + break; + case 'u': { + uint16_t unicode_char = 0; + char utf8_char[4]; + int i = 0; + + ptr++; + + for (i = 0; i < 4; i++) { + if (qemu_isxdigit(*ptr)) { + unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4); + } else { + parse_error(ctxt, token, + "invalid hex escape sequence in string"); + goto out; + } + ptr++; + } + + wchar_to_utf8(unicode_char, utf8_char, sizeof(utf8_char)); + qstring_append(str, utf8_char); + } break; + default: + parse_error(ctxt, token, "invalid escape sequence in string"); + goto out; + } + } else { + char dummy[2]; + + dummy[0] = *ptr++; + dummy[1] = 0; + + qstring_append(str, dummy); + } + } + + ptr++; + + return str; + +out: + QDECREF(str); + return NULL; +} + +/** + * Parsing rules + */ +static int parse_pair(JSONParserContext *ctxt, QDict *dict, QList **tokens, va_list *ap) +{ + QObject *key, *token = NULL, *value, *peek; + QList *working = qlist_copy(*tokens); + + peek = qlist_peek(working); + key = parse_value(ctxt, &working, ap); + if (qobject_type(key) != QTYPE_QSTRING) { + parse_error(ctxt, peek, "key is not a string in object"); + goto out; + } + + token = qlist_pop(working); + if (!token_is_operator(token, ':')) { + parse_error(ctxt, token, "missing : in object pair"); + goto out; + } + + value = parse_value(ctxt, &working, ap); + if (value == NULL) { + parse_error(ctxt, token, "Missing value in dict"); + goto out; + } + + qdict_put_obj(dict, qstring_get_str(qobject_to_qstring(key)), value); + + qobject_decref(token); + qobject_decref(key); + QDECREF(*tokens); + *tokens = working; + + return 0; + +out: + qobject_decref(token); + qobject_decref(key); + QDECREF(working); + + return -1; +} + +static QObject *parse_object(JSONParserContext *ctxt, QList **tokens, va_list *ap) +{ + QDict *dict = NULL; + QObject *token, *peek; + QList *working = qlist_copy(*tokens); + + token = qlist_pop(working); + if (!token_is_operator(token, '{')) { + goto out; + } + qobject_decref(token); + token = NULL; + + dict = qdict_new(); + + peek = qlist_peek(working); + if (!token_is_operator(peek, '}')) { + if (parse_pair(ctxt, dict, &working, ap) == -1) { + goto out; + } + + token = qlist_pop(working); + while (!token_is_operator(token, '}')) { + if (!token_is_operator(token, ',')) { + parse_error(ctxt, token, "expected separator in dict"); + goto out; + } + qobject_decref(token); + token = NULL; + + if (parse_pair(ctxt, dict, &working, ap) == -1) { + goto out; + } + + token = qlist_pop(working); + } + qobject_decref(token); + token = NULL; + } else { + token = qlist_pop(working); + qobject_decref(token); + token = NULL; + } + + QDECREF(*tokens); + *tokens = working; + + return QOBJECT(dict); + +out: + qobject_decref(token); + QDECREF(working); + QDECREF(dict); + return NULL; +} + +static QObject *parse_array(JSONParserContext *ctxt, QList **tokens, va_list *ap) +{ + QList *list = NULL; + QObject *token, *peek; + QList *working = qlist_copy(*tokens); + + token = qlist_pop(working); + if (!token_is_operator(token, '[')) { + goto out; + } + qobject_decref(token); + token = NULL; + + list = qlist_new(); + + peek = qlist_peek(working); + if (!token_is_operator(peek, ']')) { + QObject *obj; + + obj = parse_value(ctxt, &working, ap); + if (obj == NULL) { + parse_error(ctxt, token, "expecting value"); + goto out; + } + + qlist_append_obj(list, obj); + + token = qlist_pop(working); + while (!token_is_operator(token, ']')) { + if (!token_is_operator(token, ',')) { + parse_error(ctxt, token, "expected separator in list"); + goto out; + } + + qobject_decref(token); + token = NULL; + + obj = parse_value(ctxt, &working, ap); + if (obj == NULL) { + parse_error(ctxt, token, "expecting value"); + goto out; + } + + qlist_append_obj(list, obj); + + token = qlist_pop(working); + } + + qobject_decref(token); + token = NULL; + } else { + token = qlist_pop(working); + qobject_decref(token); + token = NULL; + } + + QDECREF(*tokens); + *tokens = working; + + return QOBJECT(list); + +out: + qobject_decref(token); + QDECREF(working); + QDECREF(list); + return NULL; +} + +static QObject *parse_keyword(JSONParserContext *ctxt, QList **tokens) +{ + QObject *token, *ret; + QList *working = qlist_copy(*tokens); + + token = qlist_pop(working); + + if (token_get_type(token) != JSON_KEYWORD) { + goto out; + } + + if (token_is_keyword(token, "true")) { + ret = QOBJECT(qbool_from_int(true)); + } else if (token_is_keyword(token, "false")) { + ret = QOBJECT(qbool_from_int(false)); + } else { + parse_error(ctxt, token, "invalid keyword `%s'", token_get_value(token)); + goto out; + } + + qobject_decref(token); + QDECREF(*tokens); + *tokens = working; + + return ret; + +out: + qobject_decref(token); + QDECREF(working); + + return NULL; +} + +static QObject *parse_escape(JSONParserContext *ctxt, QList **tokens, va_list *ap) +{ + QObject *token = NULL, *obj; + QList *working = qlist_copy(*tokens); + + if (ap == NULL) { + goto out; + } + + token = qlist_pop(working); + + if (token_is_escape(token, "%p")) { + obj = va_arg(*ap, QObject *); + } else if (token_is_escape(token, "%i")) { + obj = QOBJECT(qbool_from_int(va_arg(*ap, int))); + } else if (token_is_escape(token, "%d")) { + obj = QOBJECT(qint_from_int(va_arg(*ap, int))); + } else if (token_is_escape(token, "%ld")) { + obj = QOBJECT(qint_from_int(va_arg(*ap, long))); + } else if (token_is_escape(token, "%lld")) { + obj = QOBJECT(qint_from_int(va_arg(*ap, long long))); + } else if (token_is_escape(token, "%s")) { + obj = QOBJECT(qstring_from_str(va_arg(*ap, const char *))); + } else if (token_is_escape(token, "%f")) { + obj = QOBJECT(qfloat_from_double(va_arg(*ap, double))); + } else { + goto out; + } + + qobject_decref(token); + QDECREF(*tokens); + *tokens = working; + + return obj; + +out: + qobject_decref(token); + QDECREF(working); + + return NULL; +} + +static QObject *parse_literal(JSONParserContext *ctxt, QList **tokens) +{ + QObject *token, *obj; + QList *working = qlist_copy(*tokens); + + token = qlist_pop(working); + switch (token_get_type(token)) { + case JSON_STRING: + obj = QOBJECT(qstring_from_escaped_str(ctxt, token)); + break; + case JSON_INTEGER: + obj = QOBJECT(qint_from_int(strtoll(token_get_value(token), NULL, 10))); + break; + case JSON_FLOAT: + /* FIXME dependent on locale */ + obj = QOBJECT(qfloat_from_double(strtod(token_get_value(token), NULL))); + break; + default: + goto out; + } + + qobject_decref(token); + QDECREF(*tokens); + *tokens = working; + + return obj; + +out: + qobject_decref(token); + QDECREF(working); + + return NULL; +} + +static QObject *parse_value(JSONParserContext *ctxt, QList **tokens, va_list *ap) +{ + QObject *obj; + + obj = parse_object(ctxt, tokens, ap); + if (obj == NULL) { + obj = parse_array(ctxt, tokens, ap); + } + if (obj == NULL) { + obj = parse_escape(ctxt, tokens, ap); + } + if (obj == NULL) { + obj = parse_keyword(ctxt, tokens); + } + if (obj == NULL) { + obj = parse_literal(ctxt, tokens); + } + + return obj; +} + +QObject *json_parser_parse(QList *tokens, va_list *ap) +{ + JSONParserContext ctxt = {}; + QList *working = qlist_copy(tokens); + QObject *result; + + result = parse_value(&ctxt, &working, ap); + + QDECREF(working); + + return result; +} |