Starting over with the C99 grammar for the preprocessor.

This is a fresh start with a much simpler approach for the flex/bison portions of the preprocessor. This isn't functional yet, (produces no output), but can at least read all of our test cases without any parse errors. The grammar here is based on the grammar provided for the preprocessor in the C99 specification.
author: Carl Worth <cworth@cworth.org> 2010-05-25 13:09:03 -0700
committer: Carl Worth <cworth@cworth.org> 2010-05-25 14:38:15 -0700
commit: 3ff81670848abb29b92e78f45080ad36cc85001c (patch)
tree: 199c9ebeaf91d3275bc09a5bce272e2ac4b1ab23 /glcpp-parse.y
parent: 00f1ec421edf73516fdcfbbdb651f13eeefe8f08 (diff)
1 files changed, 66 insertions, 335 deletions
diff --git a/glcpp-parse.y b/glcpp-parse.y
index 2c0fe9a..ebb28ed 100644
--- a/glcpp-parse.y
+++ b/glcpp-parse.y
@@ -119,366 +119,97 @@ glcpp_parser_lex (glcpp_parser_t *parser);
 %parse-param {glcpp_parser_t *parser}
 %lex-param {glcpp_parser_t *parser}
 
-%token DEFINE DEFINED ELIF ELSE ENDIF FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED IF IFDEF IFNDEF INTEGER OBJ_MACRO NEWLINE SPACE TOKEN UNDEF
-%type <ival> punctuator
-%type <imaxval> expression INTEGER
-%type <str> content FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED OBJ_MACRO
-%type <argument_list> argument_list
-%type <string_list> macro parameter_list
-%type <token> TOKEN argument_word argument_word_or_comma
-%type <token_list> argument argument_or_comma replacement_list pp_tokens
-%left OR
-%left AND
-%left '|'
-%left '^'
-%left '&'
-%left EQUAL NOT_EQUAL
-%left '<' '>' LESS_OR_EQUAL GREATER_OR_EQUAL
-%left LEFT_SHIFT RIGHT_SHIFT
-%left '+' '-'
-%left '*' '/' '%'
-%right UNARY
-
-/* Hard to remove shift/reduce conflicts documented as follows:
- *
- * 1. '(' after FUNC_MACRO name which is correctly resolved to shift
- *    to form macro invocation rather than reducing directly to
- *    content.
- *
- * 2. Similarly, '(' after FUNC_MACRO which is correctly resolved to
- *    shift to form macro invocation rather than reducing directly to
- *    argument.
- *
- * 3. Similarly again now that we added argument_or_comma as well.
- */
-%expect 3
+%token HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH IDENTIFIER NEWLINE OTHER HASH_UNDEF
+%token LEFT_SHIFT RIGHT_SHIFT LESS_OR_EQUAL GREATER_OR_EQUAL EQUAL NOT_EQUAL AND OR PASTE
+
+	/* Stale stuff just to allow code to compile. */
+%token IDENTIFIER_FINALIZED FUNC_MACRO OBJ_MACRO
 
 %%
 
-	 /* We do all printing at the input level. */
 input:
-	/* empty */ {
-		parser->just_printed_separator = 1;
-	}
-|	input content {
-		int is_token;
-		int skipping = 0;
-
-		if (parser->skip_stack && parser->skip_stack->type != SKIP_NO_SKIP)
-			skipping = 1;
-
-		if ($2 && strlen ($2) && ! skipping) {
-			int c = $2[0];
-			int is_not_separator = ((c >= 'a' && c <= 'z') ||
-						(c >= 'A' && c <= 'Z') ||
-						(c >= 'A' && c <= 'Z') ||
-						(c >= '0' && c <= '9') ||
-						(c == '_'));
-
-			if (! parser->just_printed_separator && is_not_separator)
-			{
-				printf (" ");
-			}
-			printf ("%s", $2);
-
-			if (is_not_separator)
-				parser->just_printed_separator = 0;
-			else
-				parser->just_printed_separator = 1;
-		}
-
-		if ($2)
-			talloc_free ($2);
-
-		if (parser->need_newline) {
-			printf ("\n");
-			parser->just_printed_separator = 1;
-			parser->need_newline = 0;
-		}
-	}
-;
-
-content:
-	IDENTIFIER {
-		$$ = $1;
-	}
-|	IDENTIFIER_FINALIZED {
-		$$ = $1;
-	}
-|	TOKEN {
-		$$ = $1.value;
-	}
-|	FUNC_MACRO {
-		$$ = $1;
-	}
-|	directive {
-		$$ = talloc_strdup (parser, "\n");
-	}
-|	punctuator {
-		$$ = talloc_asprintf (parser, "%c", $1);
-	}
-|	macro {
-		$$ = NULL;
-	}
+	/* empty */
+|	input line
 ;
 
-punctuator:
-	'('	{ $$ = '('; }
-|	')'	{ $$ = ')'; }
-|	','	{ $$ = ','; }
-	;
-
-macro:
-	FUNC_MACRO '(' argument_list ')' {
-		_expand_function_macro (parser, $1, $3);
-	}
-|	OBJ_MACRO {
-		_expand_object_macro (parser, $1);
-		talloc_free ($1);
-	}
+line:
+	control_line
+|	text_line
+|	HASH non_directive
 ;
 
-argument_list:
-	/* empty */ {
-		$$ = _argument_list_create (parser);
-	}
-|	argument {
-		$$ = _argument_list_create (parser);
-		_argument_list_append ($$, $1);
-	}
-|	argument_list ',' argument {
-		_argument_list_append ($1, $3);
-		$$ = $1;
-	}
-;
-
-argument:
-	argument_word {
-		$$ = _token_list_create (parser);
-		_token_list_append ($$, $1.type, $1.value);
-	}
-|	argument argument_word {
-		_token_list_append ($1, $2.type, $2.value);
-		talloc_free ($2.value);
-		$$ = $1;
-	}
-|	argument '(' argument_or_comma ')' {
-		_token_list_append ($1, '(', "(");
-		_token_list_append_list ($1, $3);
-		_token_list_append ($1, ')', ")");
-		$$ = $1;
-	}
+control_line:
+	HASH_DEFINE_OBJ IDENTIFIER replacement_list NEWLINE
+|	HASH_DEFINE_FUNC IDENTIFIER '(' ')' replacement_list NEWLINE
+|	HASH_DEFINE_FUNC IDENTIFIER '(' identifier_list ')' replacement_list NEWLINE
+|	HASH_UNDEF IDENTIFIER NEWLINE
+|	HASH NEWLINE
 ;
 
-argument_word:
-	IDENTIFIER { $$.type = IDENTIFIER; $$.value = $1; }
-|	IDENTIFIER_FINALIZED { $$.type = IDENTIFIER_FINALIZED; $$.value = $1; }
-|	TOKEN { $$ = $1; }
-|	FUNC_MACRO { $$.type = FUNC_MACRO; $$.value = $1; }
-|	macro {	$$.type = TOKEN; $$.value = xtalloc_strdup (parser, ""); }
+identifier_list:
+	IDENTIFIER
+|	identifier_list ',' IDENTIFIER
 ;
 
-	/* XXX: The body of argument_or_comma is the same as the body
-	 * of argument, but with "argument" and "argument_word"
-	 * changed to "argument_or_comma" and
-	 * "argument_word_or_comma". It would be nice to have less
-	 * redundancy here, but I'm not sure how.
-	 *
-	 * It would also be nice to have a less ugly grammar to have
-	 * to implement, but such is the C preprocessor.
-	 */
-argument_or_comma:
-	argument_word_or_comma {
-		$$ = _token_list_create (parser);
-		_token_list_append ($$, $1.type, $1.value);
-	}
-|	argument_or_comma argument_word_or_comma {
-		_token_list_append ($1, $2.type, $2.value);
-		$$ = $1;
-	}
-|	argument_or_comma '(' argument_or_comma ')' {
-		_token_list_append ($1, '(', "(");
-		_token_list_append_list ($1, $3);
-		_token_list_append ($1, ')', ")");
-		$$ = $1;
-	}
+text_line:
+	NEWLINE
+|	pp_tokens NEWLINE
 ;
 
-argument_word_or_comma:
-	IDENTIFIER { $$.type = IDENTIFIER; $$.value = $1; }
-|	IDENTIFIER_FINALIZED { $$.type = IDENTIFIER_FINALIZED; $$.value = $1; }
-|	TOKEN { $$ = $1; }
-|	FUNC_MACRO { $$.type = FUNC_MACRO; $$.value = $1; }
-|	macro {	$$.type = TOKEN; $$.value = xtalloc_strdup (parser, ""); }
-|	',' { $$.type = ','; $$.value = xtalloc_strdup (parser, ","); }
+non_directive:
+	pp_tokens NEWLINE
 ;
 
-directive:
-	DEFINE IDENTIFIER NEWLINE {
-		token_list_t *list = _token_list_create (parser);
-		_define_object_macro (parser, $2, list);
-	}
-|	DEFINE IDENTIFIER SPACE replacement_list NEWLINE {
-		_define_object_macro (parser, $2, $4);
-	}
-|	DEFINE IDENTIFIER '(' parameter_list ')' replacement_list NEWLINE {
-		_define_function_macro (parser, $2, $4, $6);
-	}
-|	IF expression NEWLINE {
-		_glcpp_parser_skip_stack_push_if (parser, $2);
-	}
-|	IFDEF IDENTIFIER NEWLINE {
-		string_list_t *macro = hash_table_find (parser->defines, $2);
-		talloc_free ($2);
-		_glcpp_parser_skip_stack_push_if (parser, macro != NULL);
-	}
-|	IFNDEF IDENTIFIER NEWLINE {
-		string_list_t *macro = hash_table_find (parser->defines, $2);
-		talloc_free ($2);
-		_glcpp_parser_skip_stack_push_if (parser, macro == NULL);
-	}
-|	ELIF expression NEWLINE {
-		_glcpp_parser_skip_stack_change_if (parser, "#elif", $2);
-	}
-|	ELSE {
-		_glcpp_parser_skip_stack_change_if (parser, "else", 1);
-	}
-|	ENDIF {
-		_glcpp_parser_skip_stack_pop (parser);
-	}
-|	UNDEF IDENTIFIER {
-		string_list_t *macro = hash_table_find (parser->defines, $2);
-		if (macro) {
-			/* XXX: Need hash table to support a real way
-			 * to remove an element rather than prefixing
-			 * a new node with data of NULL like this. */
-			hash_table_insert (parser->defines, NULL, $2);
-			talloc_free (macro);
-		}
-		talloc_free ($2);
-	}
+replacement_list:
+	/* empty */
+|	pp_tokens
 ;
 
-expression:
-	INTEGER {
-		$$ = $1;
-	}
-|	expression OR expression {
-		$$ = $1 || $3;
-	}
-|	expression AND expression {
-		$$ = $1 && $3;
-	}
-|	expression '|' expression {
-		$$ = $1 | $3;
-	}
-|	expression '^' expression {
-		$$ = $1 ^ $3;
-	}
-|	expression '&' expression {
-		$$ = $1 & $3;
-	}
-|	expression NOT_EQUAL expression {
-		$$ = $1 != $3;
-	}
-|	expression EQUAL expression {
-		$$ = $1 == $3;
-	}
-|	expression GREATER_OR_EQUAL expression {
-		$$ = $1 >= $3;
-	}
-|	expression LESS_OR_EQUAL expression {
-		$$ = $1 <= $3;
-	}
-|	expression '>' expression {
-		$$ = $1 > $3;
-	}
-|	expression '<' expression {
-		$$ = $1 < $3;
-	}
-|	expression RIGHT_SHIFT expression {
-		$$ = $1 >> $3;
-	}
-|	expression LEFT_SHIFT expression {
-		$$ = $1 << $3;
-	}
-|	expression '-' expression {
-		$$ = $1 - $3;
-	}
-|	expression '+' expression {
-		$$ = $1 + $3;
-	}
-|	expression '%' expression {
-		$$ = $1 % $3;
-	}
-|	expression '/' expression {
-		$$ = $1 / $3;
-	}
-|	expression '*' expression {
-		$$ = $1 * $3;
-	}
-|	'!' expression %prec UNARY {
-		$$ = ! $2;
-	}
-|	'~' expression %prec UNARY {
-		$$ = ~ $2;
-	}
-|	'-' expression %prec UNARY {
-		$$ = - $2;
-	}
-|	'+' expression %prec UNARY {
-		$$ = + $2;
-	}
-|	DEFINED IDENTIFIER %prec UNARY {
-		string_list_t *macro = hash_table_find (parser->defines, $2);
-		talloc_free ($2);
-		if (macro)
-			$$ = 1;
-		else
-			$$ = 0;
-	}
-|	'(' expression ')' {
-		$$ = $2;
-	}
+pp_tokens:
+	preprocessing_token
+|	pp_tokens preprocessing_token
 ;
 
-parameter_list:
-	/* empty */ {
-		$$ = _string_list_create (parser);
-	}
-|	IDENTIFIER {
-		$$ = _string_list_create (parser);
-		_string_list_append_item ($$, $1);
-		talloc_free ($1);
-	}
-|	parameter_list ',' IDENTIFIER {
-		_string_list_append_item ($1, $3);
-		talloc_free ($3);
-		$$ = $1;
-	}
+preprocessing_token:
+	IDENTIFIER
+|	punctuator
+|	OTHER
 ;
 
-replacement_list:
-	/* empty */ {
-		$$ = _token_list_create (parser);
-	}
-|	pp_tokens {
-		$$ = $1;
-	}
+punctuator:
+	'['
+|	']'
+|	'('
+|	')'
+|	'{'
+|	'}'
+|	'.'
+|	'&'
+|	'*'
+|	'+'
+|	'-'
+|	'~'
+|	'!'
+|	'/'
+|	'%'
+|	LEFT_SHIFT
+|	RIGHT_SHIFT
+|	'<'
+|	'>'
+|	LESS_OR_EQUAL
+|	GREATER_OR_EQUAL
+|	EQUAL
+|	NOT_EQUAL
+|	'^'
+|	'|'
+|	AND
+|	OR
+|	';'
+|	','
+|	PASTE
 ;
 
 
-pp_tokens:
-	TOKEN {
-		$$ = _token_list_create (parser);
-		_token_list_append ($$, $1.type, $1.value);
-	}
-|	pp_tokens TOKEN {
-	_token_list_append ($1, $2.type, $2.value);
-		$$ = $1;
-	}
-;
-
 %%
 
 string_list_t *
author	Carl Worth <cworth@cworth.org>	2010-05-25 13:09:03 -0700
committer	Carl Worth <cworth@cworth.org>	2010-05-25 14:38:15 -0700
commit	3ff81670848abb29b92e78f45080ad36cc85001c (patch)
tree	199c9ebeaf91d3275bc09a5bce272e2ac4b1ab23 /glcpp-parse.y
parent	00f1ec421edf73516fdcfbbdb651f13eeefe8f08 (diff)