diff options
author | Owen Taylor <otaylor@src.gnome.org> | 1998-06-10 23:21:14 +0000 |
---|---|---|
committer | Owen Taylor <otaylor@src.gnome.org> | 1998-06-10 23:21:14 +0000 |
commit | 2e0320d57e417f7d1c838d729a99545db2228e93 (patch) | |
tree | e916c0a314f8f7c5b249aedb2882ed27139fdec2 /gscanner.c |
Initial revision
Diffstat (limited to 'gscanner.c')
-rw-r--r-- | gscanner.c | 1551 |
1 files changed, 1551 insertions, 0 deletions
diff --git a/gscanner.c b/gscanner.c new file mode 100644 index 000000000..ccb696053 --- /dev/null +++ b/gscanner.c @@ -0,0 +1,1551 @@ +/* GLIB - Library of useful routines for C programming + * Copyright (C) 1995-1997 Peter Mattis, Spencer Kimball and Josh MacDonald + * + * GScanner: Flexible lexical scanner for general purpose. + * Copyright (C) 1997, 1998 Tim Janik + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ +#define __gscanner_c__ + +#include <stdlib.h> +#include <stdarg.h> +#include <string.h> +#include <stdio.h> +#include <unistd.h> +#include <errno.h> +#include <sys/types.h> /* needed for sys/stat.h */ +#include <sys/stat.h> +#include "glib.h" + + + +/* --- defines --- */ +#define to_lower(c) ( \ + (guchar) ( \ + ( (((guchar)(c))>='A' && ((guchar)(c))<='Z') * ('a'-'A') ) | \ + ( (((guchar)(c))>=192 && ((guchar)(c))<=214) * (224-192) ) | \ + ( (((guchar)(c))>=216 && ((guchar)(c))<=222) * (248-216) ) | \ + ((guchar)(c)) \ + ) \ +) + + +/* --- typedefs --- */ +typedef struct _GScannerHashVal GScannerHashVal; + +struct _GScannerHashVal +{ + gchar *key; + gpointer value; +}; + + + +/* --- variables --- */ +static GScannerConfig g_scanner_config_template = +{ + ( + " \t\n" + ) /* cset_skip_characters */, + ( + G_CSET_a_2_z + "_" + G_CSET_A_2_Z + ) /* cset_identifier_first */, + ( + G_CSET_a_2_z + "_0123456789" + G_CSET_A_2_Z + G_CSET_LATINS + G_CSET_LATINC + ) /* cset_identifier_nth */, + ( "#\n" ) /* cpair_comment_single */, + + FALSE /* case_sensitive */, + + TRUE /* skip_comment_multi */, + TRUE /* skip_comment_single */, + TRUE /* scan_comment_multi */, + TRUE /* scan_identifier */, + FALSE /* scan_identifier_1char */, + FALSE /* scan_identifier_NULL */, + TRUE /* scan_symbols */, + FALSE /* scan_binary */, + TRUE /* scan_octal */, + TRUE /* scan_float */, + TRUE /* scan_hex */, + FALSE /* scan_hex_dollar */, + TRUE /* scan_string_sq */, + TRUE /* scan_string_dq */, + TRUE /* numbers_2_int */, + FALSE /* int_2_float */, + FALSE /* identifier_2_string */, + TRUE /* char_2_token */, + FALSE /* symbol_2_token */, +}; + + +/* --- prototypes --- */ +extern char* g_vsprintf (gchar *fmt, va_list *args, va_list *args2); +static GScannerHashVal* g_scanner_lookup_internal (GScanner *scanner, + const gchar *symbol); +static void g_scanner_get_token_ll (GScanner *scanner, + GTokenType *token_p, + GValue *value_p, + guint *line_p, + guint *position_p); +static void g_scanner_get_token_i (GScanner *scanner, + GTokenType *token_p, + GValue *value_p, + guint *line_p, + guint *position_p); +static void g_scanner_free_value (GTokenType *token_p, + GValue *value_p); + +static inline +gint g_scanner_char_2_num (guchar c, + guchar base); +static guchar g_scanner_peek_next_char(GScanner *scanner); +static guchar g_scanner_get_char (GScanner *scanner, + guint *line_p, + guint *position_p); +static void g_scanner_msg_handler (GScanner *scanner, + gchar *message, + gint is_error); + + +/* --- functions --- */ +static gint +g_scanner_char_2_num (guchar c, + guchar base) +{ + if (c >= '0' && c <= '9') + c -= '0'; + else if (c >= 'A' && c <= 'Z') + c -= 'A' - 10; + else if (c >= 'a' && c <= 'z') + c -= 'a' - 10; + else + return -1; + + if (c < base) + return c; + + return -1; +} + +GScanner* +g_scanner_new (GScannerConfig *config_templ) +{ + register GScanner *scanner; + + if (!config_templ) + config_templ = &g_scanner_config_template; + + scanner = g_new0 (GScanner, 1); + + scanner->user_data = NULL; + scanner->input_name = NULL; + scanner->parse_errors = 0; + scanner->max_parse_errors = 0; + + scanner->config = g_new0 (GScannerConfig, 1); + + scanner->config->case_sensitive = config_templ->case_sensitive; + scanner->config->cset_skip_characters = config_templ->cset_skip_characters; + scanner->config->cset_identifier_first= config_templ->cset_identifier_first; + scanner->config->cset_identifier_nth = config_templ->cset_identifier_nth; + scanner->config->cpair_comment_single = config_templ->cpair_comment_single; + scanner->config->skip_comment_multi = config_templ->skip_comment_multi; + scanner->config->skip_comment_single = config_templ->skip_comment_single; + scanner->config->scan_comment_multi = config_templ->scan_comment_multi; + scanner->config->scan_identifier = config_templ->scan_identifier; + scanner->config->scan_identifier_1char= config_templ->scan_identifier_1char; + scanner->config->scan_identifier_NULL = config_templ->scan_identifier_NULL; + scanner->config->scan_symbols = config_templ->scan_symbols; + scanner->config->scan_binary = config_templ->scan_binary; + scanner->config->scan_octal = config_templ->scan_octal; + scanner->config->scan_float = config_templ->scan_float; + scanner->config->scan_hex = config_templ->scan_hex; + scanner->config->scan_hex_dollar = config_templ->scan_hex_dollar; + scanner->config->scan_string_sq = config_templ->scan_string_sq; + scanner->config->scan_string_dq = config_templ->scan_string_dq; + scanner->config->numbers_2_int = config_templ->numbers_2_int; + scanner->config->int_2_float = config_templ->int_2_float; + scanner->config->identifier_2_string = config_templ->identifier_2_string; + scanner->config->char_2_token = config_templ->char_2_token; + scanner->config->symbol_2_token = config_templ->symbol_2_token; + + scanner->token = G_TOKEN_NONE; + scanner->value.v_int = 0; + scanner->line = 1; + scanner->position = 0; + + scanner->next_token = G_TOKEN_NONE; + scanner->next_value.v_int = 0; + scanner->next_line = 1; + scanner->next_position = 0; + + scanner->symbol_table = g_hash_table_new (g_str_hash, g_str_equal); + scanner->text = NULL; + scanner->text_len = 0; + scanner->input_fd = -1; + scanner->peeked_char = -1; + + scanner->msg_handler = g_scanner_msg_handler; + + return scanner; +} + +static void +g_scanner_destroy_symbol_table_entry (gpointer key, + gpointer value, + gpointer user_data) +{ + g_free (key); + g_free (value); +} + +void +g_scanner_destroy (GScanner *scanner) +{ + g_return_if_fail (scanner != NULL); + + g_hash_table_foreach (scanner->symbol_table, + g_scanner_destroy_symbol_table_entry, NULL); + g_hash_table_destroy (scanner->symbol_table); + g_scanner_free_value (&scanner->token, &scanner->value); + g_scanner_free_value (&scanner->next_token, &scanner->next_value); + g_free (scanner->config); + g_free (scanner); +} + +static void +g_scanner_msg_handler (GScanner *scanner, + gchar *message, + gint is_error) +{ + g_return_if_fail (scanner != NULL); + + fprintf (stdout, "%s:%d: ", scanner->input_name, scanner->line); + if (is_error) + fprintf (stdout, "error: "); + fprintf (stdout, "%s\n", message); +} + +void +g_scanner_error (GScanner *scanner, + const gchar *format, + ...) +{ + g_return_if_fail (scanner != NULL); + g_return_if_fail (format != NULL); + + scanner->parse_errors++; + + if (scanner->msg_handler) + { + va_list args, args2; + gchar *string; + + va_start (args, format); + va_start (args2, format); + string = g_vsprintf ((gchar*) format, &args, &args2); + va_end (args); + va_end (args2); + + string = g_strdup (string); + + scanner->msg_handler (scanner, string, TRUE); + + g_free (string); + } +} + +void +g_scanner_warn (GScanner *scanner, + const gchar *format, + ...) +{ + g_return_if_fail (scanner != NULL); + g_return_if_fail (format != NULL); + + if (scanner->msg_handler) + { + va_list args, args2; + gchar *string; + + va_start (args, format); + va_start (args2, format); + string = g_vsprintf ((gchar*) format, &args, &args2); + va_end (args); + va_end (args2); + + string = g_strdup (string); + + scanner->msg_handler (scanner, string, FALSE); + + g_free (string); + } +} + +void +g_scanner_input_file (GScanner *scanner, + gint input_fd) +{ + g_return_if_fail (input_fd >= 0); + + scanner->token = G_TOKEN_NONE; + scanner->value.v_int = 0; + scanner->line = 1; + scanner->position = 0; + scanner->next_token = G_TOKEN_NONE; + + scanner->text = NULL; + scanner->text_len = 0; + scanner->input_fd = input_fd; + scanner->peeked_char = -1; +} + +void +g_scanner_input_text (GScanner *scanner, + const gchar *text, + guint text_len) +{ + g_return_if_fail (text != NULL); + + scanner->token = G_TOKEN_NONE; + scanner->value.v_int = 0; + scanner->line = 1; + scanner->position = 0; + scanner->next_token = G_TOKEN_NONE; + + scanner->text = text; + scanner->text_len = text_len; + scanner->input_fd = -1; + scanner->peeked_char = -1; +} + +void +g_scanner_add_symbol (GScanner *scanner, + const gchar *symbol, + gpointer value) +{ + register GScannerHashVal *hash_val; + + g_return_if_fail (scanner != NULL); + g_return_if_fail (symbol != NULL); + + hash_val = g_scanner_lookup_internal (scanner, symbol); + + if (!hash_val) + { + hash_val = g_new (GScannerHashVal, 1); + hash_val->key = g_strdup (symbol); + hash_val->value = value; + if (!scanner->config->case_sensitive) + { + register guint i, l; + + l = strlen (hash_val->key); + for (i = 0; i < l; i++) + hash_val->key[i] = to_lower (hash_val->key[i]); + } + g_hash_table_insert (scanner->symbol_table, hash_val->key, hash_val); + } + else + hash_val->value = value; +} + +gpointer +g_scanner_lookup_symbol (GScanner *scanner, + const gchar *symbol) +{ + register GScannerHashVal *hash_val; + + g_return_val_if_fail (scanner != NULL, NULL); + + if (!symbol) + return NULL; + + hash_val = g_scanner_lookup_internal (scanner, symbol); + + if (hash_val) + return hash_val->value; + else + return NULL; +} + +static void +g_scanner_foreach_internal (gpointer key, + gpointer value, + gpointer user_data) +{ + register GScannerHashVal *hash_val; + register GHFunc func; + register gpointer func_data; + register gpointer *d; + + d = user_data; + func = (GHFunc)d[0]; + func_data = d[1]; + hash_val = value; + + func (key, hash_val->value, func_data); +} + +void +g_scanner_foreach_symbol (GScanner *scanner, + GHFunc func, + gpointer func_data) +{ + gpointer d[2]; + + g_return_if_fail (scanner != NULL); + + d[0] = (gpointer)func; + d[1] = func_data; + + g_hash_table_foreach (scanner->symbol_table, g_scanner_foreach_internal, d); +} + +void +g_scanner_remove_symbol (GScanner *scanner, + const gchar *symbol) +{ + register GScannerHashVal *hash_val; + + g_return_if_fail (scanner != NULL); + + hash_val = g_scanner_lookup_internal (scanner, symbol); + + if (hash_val) + { + g_hash_table_remove (scanner->symbol_table, hash_val->key); + g_free (hash_val->key); + g_free (hash_val); + } +} + +void +g_scanner_freeze_symbol_table (GScanner *scanner) +{ + g_return_if_fail (scanner != NULL); + + g_hash_table_freeze (scanner->symbol_table); +} + +void +g_scanner_thaw_symbol_table (GScanner *scanner) +{ + g_return_if_fail (scanner != NULL); + + g_hash_table_thaw (scanner->symbol_table); +} + +GTokenType +g_scanner_peek_next_token (GScanner *scanner) +{ + g_return_val_if_fail (scanner != NULL, G_TOKEN_EOF); + + if (scanner->next_token == G_TOKEN_NONE) + { + scanner->next_line = scanner->line; + scanner->next_position = scanner->position; + g_scanner_get_token_i (scanner, + &scanner->next_token, + &scanner->next_value, + &scanner->next_line, + &scanner->next_position); + } + + return scanner->next_token; +} + +GTokenType +g_scanner_get_next_token (GScanner *scanner) +{ + g_return_val_if_fail (scanner != NULL, G_TOKEN_EOF); + + if (scanner->next_token != G_TOKEN_NONE) + { + g_scanner_free_value (&scanner->token, &scanner->value); + + scanner->token = scanner->next_token; + scanner->value = scanner->next_value; + scanner->line = scanner->next_line; + scanner->position = scanner->next_position; + scanner->next_token = G_TOKEN_NONE; + } + else + g_scanner_get_token_i (scanner, + &scanner->token, + &scanner->value, + &scanner->line, + &scanner->position); + + return scanner->token; +} + +GTokenType +g_scanner_cur_token (GScanner *scanner) +{ + g_return_val_if_fail (scanner != NULL, G_TOKEN_EOF); + + return scanner->token; +} + +GValue +g_scanner_cur_value (GScanner *scanner) +{ + register GValue v; + + v.v_int = 0; + g_return_val_if_fail (scanner != NULL, v); + + return scanner->value; +} + +guint +g_scanner_cur_line (GScanner *scanner) +{ + g_return_val_if_fail (scanner != NULL, 0); + + return scanner->line; +} + +guint +g_scanner_cur_position (GScanner *scanner) +{ + g_return_val_if_fail (scanner != NULL, 0); + + return scanner->position; +} + +gboolean +g_scanner_eof (GScanner *scanner) +{ + g_return_val_if_fail (scanner != NULL, TRUE); + + return scanner->token == G_TOKEN_EOF; +} + +static GScannerHashVal* +g_scanner_lookup_internal (GScanner *scanner, + const gchar *symbol) +{ + register GScannerHashVal *hash_val; + + if (!scanner->config->case_sensitive) + { + register gchar *buffer; + register guint i, l; + + l = strlen (symbol); + buffer = g_new (gchar, l + 1); + for (i = 0; i < l; i++) + buffer[i] = to_lower (symbol[i]); + buffer[i] = 0; + hash_val = g_hash_table_lookup (scanner->symbol_table, buffer); + g_free (buffer); + } + else + hash_val = g_hash_table_lookup (scanner->symbol_table, (gchar*) symbol); + + return hash_val; +} + +static guchar +g_scanner_peek_next_char (GScanner *scanner) +{ + guchar fchar; + + if (scanner->text_len) + { + fchar = scanner->text[0]; + } + else if (scanner->input_fd >= 0) + { + if (scanner->peeked_char < 0) + { + register gint count; + + do + { + count = read (scanner->input_fd, &fchar, 1); + } + while (count == -1 && + (errno == EINTR || + errno == EAGAIN)); + + if (count != 1) + fchar = 0; + + scanner->peeked_char = fchar; + } + else + fchar = scanner->peeked_char; + } + else + fchar = 0; + + return fchar; +} + +static guchar +g_scanner_get_char (GScanner *scanner, + guint *line_p, + guint *position_p) +{ + guchar fchar; + + if (scanner->text_len) + { + fchar = *(scanner->text++); + scanner->text_len--; + } + else if (scanner->input_fd >= 0) + { + if (scanner->peeked_char < 0) + { + register gint count; + + do + { + count = read (scanner->input_fd, &fchar, 1); + } + while (count == -1 && + (errno == EINTR || + errno == EAGAIN)); + if (count != 1 || fchar == 0) + { + fchar = 0; + scanner->peeked_char = 0; + } + } + else + { + fchar = scanner->peeked_char; + if (fchar) + scanner->peeked_char = -1; + } + } + else + fchar = 0; + + if (fchar == '\n') + { + (*position_p) = 0; + (*line_p)++; + } + else if (fchar) + { + (*position_p)++; + } + + return fchar; +} + +void +g_scanner_unexp_token (GScanner *scanner, + GTokenType expected_token, + const gchar *identifier_spec, + const gchar *symbol_spec, + const gchar *symbol_name, + const gchar *message, + gint is_error) +{ + register gchar *token_string; + register guint token_string_len; + register gchar *expected_string; + register guint expected_string_len; + register gchar *message_prefix; + register gboolean print_unexp; + void (*msg_handler) (GScanner*, const gchar*, ...); + + g_return_if_fail (scanner != NULL); + + if (is_error) + msg_handler = g_scanner_error; + else + msg_handler = g_scanner_warn; + + if (!identifier_spec) + identifier_spec = "identifier"; + if (!symbol_spec) + symbol_spec = "symbol"; + + token_string_len = 56; + token_string = g_new (gchar, token_string_len + 1); + expected_string_len = 64; + expected_string = g_new (gchar, expected_string_len + 1); + print_unexp = TRUE; + + switch (scanner->token) + { + + case G_TOKEN_EOF: + g_snprintf (token_string, token_string_len, "end of file"); + break; + + default: /* 1 ... 255 */ + if (scanner->token >= 1 && scanner->token <= 255) + { + if ((scanner->token >= ' ' && scanner->token <= '~') || + strchr (scanner->config->cset_identifier_first, scanner->token) || + strchr (scanner->config->cset_identifier_nth, scanner->token)) + g_snprintf (token_string, expected_string_len, "character `%c'", scanner->token); + else + g_snprintf (token_string, expected_string_len, "character `\\%o'", scanner->token); + } + else + g_snprintf (token_string, token_string_len, "(unknown) token <%d>", scanner->token); + break; + + case G_TOKEN_ERROR: + print_unexp = FALSE; + expected_token = G_TOKEN_NONE; + switch (scanner->value.v_error) + { + case G_ERR_UNEXP_EOF: + g_snprintf (token_string, token_string_len, "scanner: unexpected end of file"); + break; + + case G_ERR_UNEXP_EOF_IN_STRING: + g_snprintf (token_string, token_string_len, "scanner: unterminated string constant"); + break; + + case G_ERR_UNEXP_EOF_IN_COMMENT: + g_snprintf (token_string, token_string_len, "scanner: unterminated comment"); + break; + + case G_ERR_NON_DIGIT_IN_CONST: + g_snprintf (token_string, token_string_len, "scanner: non digit in constant"); + break; + + case G_ERR_FLOAT_RADIX: + g_snprintf (token_string, token_string_len, "scanner: invalid radix for floating constant"); + break; + + case G_ERR_FLOAT_MALFORMED: + g_snprintf (token_string, token_string_len, "scanner: malformed floating constant"); + break; + + case G_ERR_DIGIT_RADIX: + g_snprintf (token_string, token_string_len, "scanner: digit is beyond radix"); + break; + + case G_ERR_UNKNOWN: + default: + g_snprintf (token_string, token_string_len, "scanner: unknown error"); + break; + } + break; + + case G_TOKEN_CHAR: + g_snprintf (token_string, token_string_len, "character `%c'", scanner->value.v_char); + break; + + case G_TOKEN_SYMBOL: + if (expected_token == G_TOKEN_SYMBOL) + print_unexp = FALSE; + if (symbol_name) + g_snprintf (token_string, + token_string_len, + "%s%s `%s'", + print_unexp ? "" : "invalid ", + symbol_spec, + symbol_name); + else + g_snprintf (token_string, + token_string_len, + "%s%s", + print_unexp ? "" : "invalid ", + symbol_spec); + break; + + case G_TOKEN_IDENTIFIER: + if (expected_token == G_TOKEN_IDENTIFIER) + print_unexp = FALSE; + g_snprintf (token_string, + token_string_len, + "%s%s `%s'", + print_unexp ? "" : "invalid ", + identifier_spec, + scanner->value.v_string); + break; + + case G_TOKEN_BINARY: + case G_TOKEN_OCTAL: + case G_TOKEN_INT: + case G_TOKEN_HEX: + g_snprintf (token_string, token_string_len, "number `%ld'", scanner->value.v_int); + break; + + case G_TOKEN_FLOAT: + g_snprintf (token_string, token_string_len, "number `%.3f'", scanner->value.v_float); + break; + + case G_TOKEN_STRING: + g_snprintf (token_string, + token_string_len, + "%sstring constant \"%s\"", + scanner->value.v_string[0] == 0 ? "empty " : "", + scanner->value.v_string); + token_string[token_string_len - 2] = '"'; + token_string[token_string_len - 1] = 0; + break; + + case G_TOKEN_COMMENT_SINGLE: + case G_TOKEN_COMMENT_MULTI: + g_snprintf (token_string, token_string_len, "comment"); + break; + + case G_TOKEN_NONE: + g_assert_not_reached (); + break; + } + + + switch (expected_token) + { + default: /* 1 ... 255 */ + if (expected_token >= 1 && expected_token <= 255) + { + if ((expected_token >= ' ' && expected_token <= '~') || + strchr (scanner->config->cset_identifier_first, expected_token) || + strchr (scanner->config->cset_identifier_nth, expected_token)) + g_snprintf (expected_string, expected_string_len, "character `%c'", expected_token); + else + g_snprintf (expected_string, expected_string_len, "character `\\%o'", expected_token); + } + else + g_snprintf (expected_string, expected_string_len, "(unknown) token <%d>", expected_token); + break; + + case G_TOKEN_INT: + g_snprintf (expected_string, expected_string_len, "number (integer)"); + break; + + case G_TOKEN_FLOAT: + g_snprintf (expected_string, expected_string_len, "number (float)"); + break; + + case G_TOKEN_STRING: + g_snprintf (expected_string, expected_string_len, "string constant"); + break; + + case G_TOKEN_SYMBOL: + g_snprintf (expected_string, + expected_string_len, + "%s%s", + scanner->token == G_TOKEN_SYMBOL ? "valid " : "", + symbol_spec); + break; + + case G_TOKEN_IDENTIFIER: + g_snprintf (expected_string, + expected_string_len, + "%s%s", + scanner->token == G_TOKEN_IDENTIFIER ? "valid " : "", + identifier_spec); + break; + + case G_TOKEN_NONE: + break; + } + + if (message && message[0] != 0) + message_prefix = " - "; + else + { + message_prefix = ""; + message = ""; + } + + if (expected_token != G_TOKEN_NONE) + { + if (print_unexp) + msg_handler (scanner, + "unexpected %s, expected %s%s%s", + token_string, + expected_string, + message_prefix, + message); + else + msg_handler (scanner, + "%s, expected %s%s%s", + token_string, + expected_string, + message_prefix, + message); + } + else + { + if (print_unexp) + msg_handler (scanner, + "unexpected %s%s%s", + token_string, + message_prefix, + message); + else + msg_handler (scanner, + "%s%s%s", + token_string, + message_prefix, + message); + } + + g_free (token_string); + g_free (expected_string); +} + +gint +g_scanner_stat_mode (const gchar *filename) +{ + struct stat *stat_buf; + gint st_mode; + + stat_buf = g_new0 (struct stat, 1); + + lstat (filename, stat_buf); + + st_mode = stat_buf->st_mode; + + g_free (stat_buf); + + return st_mode; +} + +static void +g_scanner_free_value (GTokenType *token_p, + GValue *value_p) +{ + switch (*token_p) + { + case G_TOKEN_STRING: + case G_TOKEN_IDENTIFIER: + case G_TOKEN_IDENTIFIER_NULL: + case G_TOKEN_COMMENT_SINGLE: + case G_TOKEN_COMMENT_MULTI: + g_free (value_p->v_string); + break; + + default: + break; + } + + *token_p = G_TOKEN_NONE; +} + +static void +g_scanner_get_token_i (GScanner *scanner, + GTokenType *token_p, + GValue *value_p, + guint *line_p, + guint *position_p) +{ + do + { + g_scanner_free_value (token_p, value_p); + g_scanner_get_token_ll (scanner, token_p, value_p, line_p, position_p); + } + while (((*token_p > 0 && *token_p < 256) && + strchr (scanner->config->cset_skip_characters, *token_p)) || + (*token_p == G_TOKEN_CHAR && + strchr (scanner->config->cset_skip_characters, value_p->v_char)) || + (*token_p == G_TOKEN_COMMENT_MULTI && + scanner->config->skip_comment_multi) || + (*token_p == G_TOKEN_COMMENT_SINGLE && + scanner->config->skip_comment_single)); + + switch (*token_p) + { + case G_TOKEN_IDENTIFIER: + if (scanner->config->identifier_2_string) + *token_p = G_TOKEN_STRING; + break; + + case G_TOKEN_SYMBOL: + if (scanner->config->symbol_2_token) + *token_p = (GTokenType) value_p->v_symbol; + break; + + case G_TOKEN_BINARY: + case G_TOKEN_OCTAL: + case G_TOKEN_HEX: + if (scanner->config->numbers_2_int) + *token_p = G_TOKEN_INT; + break; + + default: + break; + } + + if (*token_p == G_TOKEN_INT && + scanner->config->int_2_float) + { + *token_p = G_TOKEN_FLOAT; + value_p->v_float = value_p->v_int; + } + + errno = 0; +} + +static void +g_scanner_get_token_ll (GScanner *scanner, + GTokenType *token_p, + GValue *value_p, + guint *line_p, + guint *position_p) +{ + register GScannerConfig *config; + register gboolean in_comment_multi; + register gboolean in_comment_single; + register gboolean in_string_sq; + register gboolean in_string_dq; + static guchar ch; + register GTokenType token; + register GValue value; + register GString *gstring; + + config = scanner->config; + (*value_p).v_int = 0; + + if (scanner->token == G_TOKEN_EOF || + (!scanner->text_len && + (scanner->input_fd < 0 || + scanner->peeked_char == 0))) + { + *token_p = G_TOKEN_EOF; + return; + } + + in_comment_multi = FALSE; + in_comment_single = FALSE; + in_string_sq = FALSE; + in_string_dq = FALSE; + gstring = NULL; + + do + { + register gboolean dotted_float = FALSE; + + ch = g_scanner_get_char (scanner, line_p, position_p); + + value.v_int = 0; + token = G_TOKEN_NONE; + + /* this is *evil*, but needed ;( + * we first check for identifier first character, because it + * might interfere with other key chars like slashes or numbers + */ + if (config->scan_identifier && + ch && strchr (config->cset_identifier_first, ch)) + goto identifier_precedence; + + switch (ch) + { + register gboolean in_number; + static gchar *endptr; + + case 0: + token = G_TOKEN_EOF; + (*position_p)++; + ch = 0; + break; + + case '/': + if (!config->scan_comment_multi || + g_scanner_peek_next_char (scanner) != '*') + goto default_case; + g_scanner_get_char (scanner, line_p, position_p); + token = G_TOKEN_COMMENT_MULTI; + in_comment_multi = TRUE; + gstring = g_string_new (""); + while ((ch = g_scanner_get_char (scanner, line_p, position_p)) != 0) + { + if (ch == '*' && g_scanner_peek_next_char (scanner) == '/') + { + g_scanner_get_char (scanner, line_p, position_p); + in_comment_multi = FALSE; + break; + } + else + gstring = g_string_append_c (gstring, ch); + } + ch = 0; + break; + + case '\'': + if (!config->scan_string_sq) + goto default_case; + token = G_TOKEN_STRING; + in_string_sq = TRUE; + gstring = g_string_new (""); + while ((ch = g_scanner_get_char (scanner, line_p, position_p)) != 0) + { + if (ch == '\'') + { + in_string_sq = FALSE; + break; + } + else + gstring = g_string_append_c (gstring, ch); + } + ch = 0; + break; + + case '"': + if (!config->scan_string_dq) + goto default_case; + token = G_TOKEN_STRING; + in_string_dq = TRUE; + gstring = g_string_new (""); + while ((ch = g_scanner_get_char (scanner, line_p, position_p)) != 0) + { + if (ch == '"') + { + in_string_dq = FALSE; + break; + } + else + { + if (ch == '\\') + { + ch = g_scanner_get_char (scanner, line_p, position_p); + switch (ch) + { + register guint i; + register guint fchar; + + case 0: + break; + + case '\\': + gstring = g_string_append_c (gstring, '\\'); + break; + + case 'n': + gstring = g_string_append_c (gstring, '\n'); + break; + + case 't': + gstring = g_string_append_c (gstring, '\t'); + break; + + case 'r': + gstring = g_string_append_c (gstring, '\r'); + break; + + case 'b': + gstring = g_string_append_c (gstring, '\b'); + break; + + case 'f': + gstring = g_string_append_c (gstring, '\f'); + break; + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + i = ch - '0'; + fchar = g_scanner_peek_next_char (scanner); + if (fchar >= '0' && fchar <= '7') + { + ch = g_scanner_get_char (scanner, line_p, position_p); + i= i * 8 + ch - '0'; + fchar = g_scanner_peek_next_char (scanner); + if (fchar >= '0' && fchar <= '7') + { + ch = g_scanner_get_char (scanner, line_p, position_p); + i = i * 8 + ch - '0'; + } + } + gstring = g_string_append_c (gstring, i); + break; + + default: + gstring = g_string_append_c (gstring, ch); + break; + } + } + else + gstring = g_string_append_c (gstring, ch); + } + } + ch = 0; + break; + + case '.': + if (!config->scan_float) + goto default_case; + token = G_TOKEN_FLOAT; + dotted_float = TRUE; + ch = g_scanner_get_char (scanner, line_p, position_p); + goto number_parsing; + + case '$': + if (!config->scan_hex_dollar) + goto default_case; + token = G_TOKEN_HEX; + ch = g_scanner_get_char (scanner, line_p, position_p); + goto number_parsing; + + case '0': + if (config->scan_octal) + token = G_TOKEN_OCTAL; + else + token = G_TOKEN_INT; + ch = g_scanner_peek_next_char (scanner); + if (config->scan_hex && (ch == 'x' || ch == 'X')) + { + token = G_TOKEN_HEX; + g_scanner_get_char (scanner, line_p, position_p); + ch = g_scanner_get_char (scanner, line_p, position_p); + if (ch == 0) + { + token = G_TOKEN_ERROR; + value.v_error = G_ERR_UNEXP_EOF; + (*position_p)++; + break; + } + if (g_scanner_char_2_num (ch, 16) < 0) + { + token = G_TOKEN_ERROR; + value.v_error = G_ERR_DIGIT_RADIX; + ch = 0; + break; + } + } + else if (config->scan_binary && (ch == 'b' || ch == 'B')) + { + token = G_TOKEN_BINARY; + g_scanner_get_char (scanner, line_p, position_p); + ch = g_scanner_get_char (scanner, line_p, position_p); + if (ch == 0) + { + token = G_TOKEN_ERROR; + value.v_error = G_ERR_UNEXP_EOF; + (*position_p)++; + break; + } + if (g_scanner_char_2_num (ch, 10) < 0) + { + token = G_TOKEN_ERROR; + value.v_error = G_ERR_NON_DIGIT_IN_CONST; + ch = 0; + break; + } + } + else + ch = '0'; + /* fall through */ + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + number_parsing: + if (token == G_TOKEN_NONE) + token = G_TOKEN_INT; + + gstring = g_string_new (dotted_float ? "0." : ""); + gstring = g_string_append_c (gstring, ch); + in_number = TRUE; + while (in_number) + { + register gboolean is_E; + + is_E = (ch == 'e' || ch == 'E') && token == G_TOKEN_FLOAT; + ch = g_scanner_peek_next_char (scanner); + + if (g_scanner_char_2_num (ch, 36) >= 0 || + (config->scan_float && ch == '.') || + (is_E && ch == '+') || + (is_E && ch == '-') ) + ch = g_scanner_get_char (scanner, line_p, position_p); + else + in_number = FALSE; + + if (in_number) + switch (ch) + { + case '.': + if (token != G_TOKEN_INT && + token != G_TOKEN_OCTAL) + { + token = G_TOKEN_ERROR; + if (token == G_TOKEN_FLOAT) + value.v_error = G_ERR_FLOAT_MALFORMED; + else + value.v_error = G_ERR_FLOAT_RADIX; + in_number = FALSE; + } + else + { + token = G_TOKEN_FLOAT; + gstring = g_string_append_c (gstring, ch); + } + break; + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + gstring = g_string_append_c (gstring, ch); + break; + + case '-': + case '+': + if (token != G_TOKEN_FLOAT) + { + token = G_TOKEN_ERROR; + value.v_error = G_ERR_NON_DIGIT_IN_CONST; + in_number = FALSE; + } + else + gstring = g_string_append_c (gstring, ch); + break; + + case 'e': + case 'E': + if ((token != G_TOKEN_HEX && !config->scan_float) || + (token != G_TOKEN_HEX && + token != G_TOKEN_OCTAL && + token != G_TOKEN_FLOAT && + token != G_TOKEN_INT)) + { + token = G_TOKEN_ERROR; + value.v_error = G_ERR_NON_DIGIT_IN_CONST; + in_number = FALSE; + } + else + { + if (token != G_TOKEN_HEX) + token = G_TOKEN_FLOAT; + gstring = g_string_append_c (gstring, ch); + } + break; + + default: + if (token != G_TOKEN_HEX) + { + token = G_TOKEN_ERROR; + value.v_error = G_ERR_NON_DIGIT_IN_CONST; + in_number = FALSE; + } + else + gstring = g_string_append_c (gstring, ch); + break; + } + } + endptr = NULL; + switch (token) + { + case G_TOKEN_BINARY: + value.v_binary = strtol (gstring->str, &endptr, 2); + break; + + case G_TOKEN_OCTAL: + value.v_octal = strtol (gstring->str, &endptr, 8); + break; + + case G_TOKEN_INT: + value.v_int = strtol (gstring->str, &endptr, 10); + break; + + case G_TOKEN_FLOAT: + value.v_float = g_strtod (gstring->str, &endptr); + break; + + case G_TOKEN_HEX: + value.v_hex = strtol (gstring->str, &endptr, 16); + break; + + default: + break; + } + if (endptr && *endptr) + { + token = G_TOKEN_ERROR; + if (*endptr == 'e' || *endptr == 'E') + value.v_error = G_ERR_NON_DIGIT_IN_CONST; + else + value.v_error = G_ERR_DIGIT_RADIX; + } + g_string_free (gstring, TRUE); + gstring = NULL; + ch = 0; + break; + + default: + default_case: + if (config->cpair_comment_single && + ch == config->cpair_comment_single[0]) + { + token = G_TOKEN_COMMENT_SINGLE; + in_comment_single = TRUE; + gstring = g_string_new (""); + while ((ch = g_scanner_get_char (scanner, + line_p, + position_p)) != 0) + { + if (ch == config->cpair_comment_single[1]) + { + in_comment_single = FALSE; + ch = 0; + break; + } + + gstring = g_string_append_c (gstring, ch); + ch = 0; + } + } + else if (config->scan_identifier && ch && + strchr (config->cset_identifier_first, ch)) + { + identifier_precedence: + + if (config->cset_identifier_nth && ch && + strchr (config->cset_identifier_nth, + g_scanner_peek_next_char (scanner))) + { + token = G_TOKEN_IDENTIFIER; + gstring = g_string_new (""); + gstring = g_string_append_c (gstring, ch); + do + { + ch = g_scanner_get_char (scanner, line_p, position_p); + gstring = g_string_append_c (gstring, ch); + ch = g_scanner_peek_next_char (scanner); + } + while (ch && strchr (config->cset_identifier_nth, ch)); + ch = 0; + } + else if (config->scan_identifier_1char) + { + token = G_TOKEN_IDENTIFIER; + value.v_identifier = g_new0 (gchar, 2); + value.v_identifier[0] = ch; + ch = 0; + } + } + if (ch) + { + if (config->char_2_token) + token = ch; + else + { + token = G_TOKEN_CHAR; + value.v_char = ch; + } + ch = 0; + } + break; + } + g_assert (ch == 0 && token != G_TOKEN_NONE); + } + while (ch != 0); + + if (in_comment_multi || + in_comment_single || + in_string_sq || + in_string_dq) + { + token = G_TOKEN_ERROR; + if (gstring) + { + g_string_free (gstring, TRUE); + gstring = NULL; + } + (*position_p)++; + if (in_comment_multi || in_comment_single) + value.v_error = G_ERR_UNEXP_EOF_IN_COMMENT; + else if (in_string_sq || in_string_dq) + value.v_error = G_ERR_UNEXP_EOF_IN_STRING; + } + + if (gstring) + { + value.v_string = gstring->str; + g_string_free (gstring, FALSE); + gstring = NULL; + } + + if (token == G_TOKEN_IDENTIFIER && + config->scan_symbols) + { + register GScannerHashVal *hash_val; + + hash_val = g_scanner_lookup_internal (scanner, value.v_identifier); + + if (hash_val) + { + g_free (value.v_identifier); + token = G_TOKEN_SYMBOL; + value.v_symbol = hash_val->value; + } + } + + if (token == G_TOKEN_IDENTIFIER && + config->scan_identifier_NULL && + strlen (value.v_identifier) == 4) + { + gchar *null_upper = "NULL"; + gchar *null_lower = "null"; + + if (scanner->config->case_sensitive) + { + if (value.v_identifier[0] == null_upper[0] && + value.v_identifier[1] == null_upper[1] && + value.v_identifier[2] == null_upper[2] && + value.v_identifier[3] == null_upper[3]) + token = G_TOKEN_IDENTIFIER_NULL; + } + else + { + if ((value.v_identifier[0] == null_upper[0] || + value.v_identifier[0] == null_lower[0]) && + (value.v_identifier[1] == null_upper[1] || + value.v_identifier[1] == null_lower[1]) && + (value.v_identifier[2] == null_upper[2] || + value.v_identifier[2] == null_lower[2]) && + (value.v_identifier[3] == null_upper[3] || + value.v_identifier[3] == null_lower[3])) + token = G_TOKEN_IDENTIFIER_NULL; + } + } + + *token_p = token; + *value_p = value; +} |