Merge branch 'take-2'

The take-2 branch started over with a new grammar based directly on the grammar from the C99 specification. It doesn't try to capture things like balanced sets of parentheses for macro arguments in the grammar. Instead, it merely captures things as token lists and then performs operations like parsing arguments and expanding macros on those lists. We merge it here since it's currently behaving better, (passing the entire test suite). But the code base has proven quite fragile really. Several of the recently added test cases required additional special cases in the take-2 branch while working trivially on master. So this merge point may be useful in the future, since we might have a cleaner code base by coming back to the state before this merge and fixing it, rather than accepting all the fragile imperative/list-munging code from the take-2 branch.
author: Carl Worth <cworth@cworth.org> 2010-05-29 06:03:32 -0700
committer: Carl Worth <cworth@cworth.org> 2010-05-29 06:03:40 -0700
commit: 96d3994881832201db7edd8a0a6f4b34655649d3 (patch)
tree: 2f599ca455e3fa16e0488516708e9871f07bbdb9
parent: ae3fb09cd20fc189d68f0c2a63cc74dd584d7ee1 (diff)
parent: 75ef1c75dd47a0b4054a767fd94f7c3cf68d2331 (diff)
8 files changed, 1106 insertions, 616 deletions
diff --git a/.gitignore b/.gitignore
index d67bd38..b88f0cc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,4 +6,5 @@ glcpp-parse.h
 *~
 tests/*.expected
 tests/*.gcc
+tests/*.glcpp
 tests/*.out
diff --git a/Makefile b/Makefile
index 8811612..0c06aa8 100644
--- a/Makefile
+++ b/Makefile
@@ -7,7 +7,7 @@ CFLAGS = -g
 override CFLAGS += -Wall -Wextra -Wwrite-strings -Wswitch-enum -Wno-unused
 
 glcpp: glcpp.o glcpp-lex.o glcpp-parse.o hash_table.o xtalloc.o
-	gcc -o $@ -ltalloc $^
+	gcc -o $@ -ltalloc -lm $^
 
 %.c %.h: %.y
 	bison --debug --defines=$*.h --output=$*.c $^
diff --git a/README b/README
index ba833a4..ab42a3f 100644
--- a/README
+++ b/README
@@ -12,3 +12,19 @@ preprocessors". To fill in these details, I've been using the C99
 standard (for which I had a convenient copy) as available from:
 
 http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1124.pdf
+
+Known limitations
+-----------------
+Macro invocations cannot include embedded newlines.
+
+The __LINE__, __FILE__, and __VERSION__ macros are not yet supported.
+
+The argument of the 'defined' operator cannot yet include enclosing
+parentheses.
+
+The #error, #pragma, #extension, #version, and #line macros are not
+yet supported.
+
+A file that ends with a function-like macro name as the last
+non-whitespace token will result in a parse error, (where it should be
+passed through as is).
+\ No newline at end of file
diff --git a/glcpp-lex.l b/glcpp-lex.l
index ee1f6e3..52269c6 100644
--- a/glcpp-lex.l
+++ b/glcpp-lex.l
@@ -32,21 +32,14 @@
 %option reentrant noyywrap
 %option extra-type="glcpp_parser_t *"
 
-%x ST_DEFINE
-%x ST_DEFINE_OBJ_OR_FUNC
-%x ST_DEFINE_PARAMETER
-%x ST_DEFINE_VALUE
-%x ST_IF
-%x ST_UNDEF
-%x ST_UNDEF_END
-
 SPACE		[[:space:]]
 NONSPACE	[^[:space:]]
 NEWLINE		[\n]
 HSPACE		[ \t]
 HASH		^{HSPACE}*#{HSPACE}*
 IDENTIFIER	[_a-zA-Z][_a-zA-Z0-9]*
-TOKEN		[^[:space:](),]+
+PUNCTUATION	[][(){}.&*~!/%<>^|;,=+-]
+OTHER		[^][(){}.&*~!/%<>^|;,=#[:space:]+-]+
 
 DECIMAL_INTEGER		[1-9][0-9]*[uU]?
 OCTAL_INTEGER		0[0-7]*[uU]?
@@ -54,210 +47,123 @@ HEXADECIMAL_INTEGER	0[xX][0-9a-fA-F]+[uU]?
 
 %%
 
-{HASH}if{HSPACE}* {
-	BEGIN ST_IF;
-	return IF;
+{HASH}define{HSPACE}+/{IDENTIFIER}"(" {
+	yyextra->space_tokens = 0;
+	return HASH_DEFINE_FUNC;
 }
 
-{HASH}elif{HSPACE}* {
-	BEGIN ST_IF;
-	return ELIF;
+{HASH}define {
+	yyextra->space_tokens = 0;
+	return HASH_DEFINE_OBJ;
 }
 
-<ST_IF>{DECIMAL_INTEGER} {
-	yylval.ival = strtoll (yytext, NULL, 10);
-	return INTEGER;
+{HASH}undef {
+	yyextra->space_tokens = 0;
+	return HASH_UNDEF;
 }
 
-<ST_IF>{OCTAL_INTEGER} {
-	yylval.ival = strtoll (yytext + 1, NULL, 8);
-	return INTEGER;
+{HASH}if {
+	yyextra->space_tokens = 0;
+	return HASH_IF;
 }
 
-<ST_IF>{HEXADECIMAL_INTEGER} {
-	yylval.ival = strtoll (yytext + 2, NULL, 16);
-	return INTEGER;
+{HASH}elif {
+	yyextra->space_tokens = 0;
+	return HASH_ELIF;
 }
 
-<ST_IF>"defined" {
-	return DEFINED;
+{HASH}else {
+	yyextra->space_tokens = 0;
+	return HASH_ELSE;
 }
 
-<ST_IF>"<<" {
-	return LEFT_SHIFT;
+{HASH}endif {
+	yyextra->space_tokens = 0;
+	return HASH_ENDIF;
 }
 
-<ST_IF>">>" {
-	return RIGHT_SHIFT;
+{HASH} {
+	yyextra->space_tokens = 0;
+	return HASH;
 }
 
-<ST_IF>"<=" {
-	return LESS_OR_EQUAL;
-}
-
-<ST_IF>">=" {
-	return GREATER_OR_EQUAL;
-}
-
-<ST_IF>"==" {
-	return EQUAL;
-}
-
-<ST_IF>"!=" {
-	return NOT_EQUAL;
-}
-
-<ST_IF>"&&" {
-	return AND;
-}
-
-<ST_IF>"||" {
-	return OR;
-}
-
-<ST_IF>[-+*/%<>&^|()~] {
-	return yytext[0];
+{DECIMAL_INTEGER} {
+	yylval.str = xtalloc_strdup (yyextra, yytext);
+	return INTEGER_STRING;
 }
 
-<ST_IF>{IDENTIFIER} {
+{OCTAL_INTEGER} {
 	yylval.str = xtalloc_strdup (yyextra, yytext);
-	return IDENTIFIER;
+	return INTEGER_STRING;
 }
 
-<ST_IF>{HSPACE}+
-
-<ST_IF>\n {
-	BEGIN INITIAL;
-	return NEWLINE;
+{HEXADECIMAL_INTEGER} {
+	yylval.str = xtalloc_strdup (yyextra, yytext);
+	return INTEGER_STRING;
 }
 
-{HASH}endif{HSPACE}* {
-	return ENDIF;
+"<<"  {
+	return LEFT_SHIFT;
 }
 
-{HASH}else{HSPACE}* {
-	return ELSE;
+">>" {
+	return RIGHT_SHIFT;
 }
 
-{HASH}undef{HSPACE}* {
-	BEGIN ST_UNDEF;
-	return UNDEF;
+"<=" {
+	return LESS_OR_EQUAL;
 }
 
-<ST_UNDEF>{IDENTIFIER} {
-	BEGIN ST_UNDEF_END;
-	yylval.str = xtalloc_strdup (yyextra, yytext);
-	return IDENTIFIER;
+">=" {
+	return GREATER_OR_EQUAL;
 }
 
-<ST_UNDEF_END>{HSPACE}*
-
-<ST_UNDEF_END>\n {
-	BEGIN INITIAL;
+"==" {
+	return EQUAL;
 }
 
-	/* We use the ST_DEFINE and ST_DEFVAL states so that we can
-	 * pass a space token, (yes, a token for whitespace!), since
-	 * the preprocessor specification requires distinguishing
-	 * "#define foo()" from "#define foo ()".
-	 */
-{HASH}define{HSPACE}* {
-	BEGIN ST_DEFINE;
-	return DEFINE;
+"!=" {
+	return NOT_EQUAL;
 }
 
-<ST_DEFINE>{IDENTIFIER}	{
-	BEGIN ST_DEFINE_OBJ_OR_FUNC;
-	yylval.str = xtalloc_strdup (yyextra, yytext);
-	return IDENTIFIER;
+"&&" {
+	return AND;
 }
 
-<ST_DEFINE_OBJ_OR_FUNC>\n {
-	BEGIN INITIAL;
-	return NEWLINE;
+"||" {
+	return OR;
 }
 
-<ST_DEFINE_OBJ_OR_FUNC>{HSPACE}+ {
-	BEGIN ST_DEFINE_VALUE;
-	return SPACE;
+"##" {
+	return PASTE;
 }
 
-<ST_DEFINE_OBJ_OR_FUNC>"(" {
-	BEGIN ST_DEFINE_PARAMETER;
-	return '(';
+"defined" {
+	return DEFINED;
 }
 
-<ST_DEFINE_PARAMETER>{IDENTIFIER} {
+{IDENTIFIER} {
 	yylval.str = xtalloc_strdup (yyextra, yytext);
 	return IDENTIFIER;
 }
 
-<ST_DEFINE_PARAMETER>"," {
-	return ',';
-}
-
-<ST_DEFINE_PARAMETER>")" {
-	BEGIN ST_DEFINE_VALUE;
-	return ')';
-}
-
-<ST_DEFINE_PARAMETER>{HSPACE}+
-
-<ST_DEFINE_VALUE>{TOKEN} {
-	yylval.token.type = TOKEN;
-	yylval.token.value = xtalloc_strdup (yyextra, yytext);
-	return TOKEN;
-}
-
-<ST_DEFINE_VALUE>[(),] {
-	yylval.token.type = TOKEN;
-	yylval.token.value = xtalloc_strdup (yyextra, yytext);
-	return TOKEN;
-}
-
-<ST_DEFINE_VALUE>{HSPACE}+
-
-<ST_DEFINE_VALUE>\n {
-	BEGIN INITIAL;
-	return NEWLINE;
+{PUNCTUATION} {
+	return yytext[0];
 }
 
-{IDENTIFIER} {
-	int parameter_index;
+{OTHER}+ {
 	yylval.str = xtalloc_strdup (yyextra, yytext);
-	switch (glcpp_parser_classify_token (yyextra, yylval.str,
-					     &parameter_index))
-	{
-		case TOKEN_CLASS_IDENTIFIER:
-			return IDENTIFIER;
-		break;
-		case TOKEN_CLASS_IDENTIFIER_FINALIZED:
-			return IDENTIFIER_FINALIZED;
-		break;
-		case TOKEN_CLASS_FUNC_MACRO:
-			return FUNC_MACRO;
-		break;
-		case TOKEN_CLASS_OBJ_MACRO:
-			return OBJ_MACRO;
-		break;
-
-	}
-}
-
-[(),]	{
-	return yytext[0];
+	return OTHER;
 }
 
-{TOKEN} {
-	yylval.token.type = TOKEN;
-	yylval.token.value = xtalloc_strdup (yyextra, yytext);
-	return TOKEN;
+{HSPACE}+ {
+	if (yyextra->space_tokens) {
+		return SPACE;
+	}
 }
 
 \n {
-	yyextra->need_newline = 1;
+	return NEWLINE;
 }
 
-{HSPACE}+
-
 %%
diff --git a/glcpp-parse.y b/glcpp-parse.y
index 2c0fe9a..f4c834e 100644
--- a/glcpp-parse.y
+++ b/glcpp-parse.y
@@ -25,69 +25,88 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <assert.h>
+#include <inttypes.h>
 
 #include "glcpp.h"
 
-void
+static void
 yyerror (void *scanner, const char *error);
 
-void
+static void
 _define_object_macro (glcpp_parser_t *parser,
 		      const char *macro,
 		      token_list_t *replacements);
 
-void
+static void
 _define_function_macro (glcpp_parser_t *parser,
 			const char *macro,
 			string_list_t *parameters,
 			token_list_t *replacements);
 
-void
-_expand_object_macro (glcpp_parser_t *parser, const char *identifier);
-
-void
-_expand_function_macro (glcpp_parser_t *parser,
-			const char *identifier,
-			argument_list_t *arguments);
-
-string_list_t *
+static string_list_t *
 _string_list_create (void *ctx);
 
-void
+static void
 _string_list_append_item (string_list_t *list, const char *str);
 
-void
+static void
 _string_list_append_list (string_list_t *list, string_list_t *tail);
 
-int
+static void
+_string_list_push (string_list_t *list, const char *str);
+
+static void
+_string_list_pop (string_list_t *list);
+
+static int
 _string_list_contains (string_list_t *list, const char *member, int *index);
 
-int
+static int
 _string_list_length (string_list_t *list);
 
-argument_list_t *
+static argument_list_t *
 _argument_list_create (void *ctx);
 
-void
+static void
 _argument_list_append (argument_list_t *list, token_list_t *argument);
 
-int
+static int
 _argument_list_length (argument_list_t *list);
 
-token_list_t *
+static token_list_t *
 _argument_list_member_at (argument_list_t *list, int index);
 
-token_list_t *
+/* Note: This function talloc_steal()s the str pointer. */
+static token_t *
+_token_create_str (void *ctx, int type, char *str);
+
+static token_t *
+_token_create_ival (void *ctx, int type, int ival);
+
+static token_list_t *
 _token_list_create (void *ctx);
 
-void
-_token_list_append (token_list_t *list, int type, const char *value);
+/* Note: This function adds a talloc_reference() to token.
+ *
+ * You may want to talloc_unlink any current reference if you no
+ * longer need it. */
+static void
+_token_list_append (token_list_t *list, token_t *token);
 
-void
+static void
 _token_list_append_list (token_list_t *list, token_list_t *tail);
 
 static void
-glcpp_parser_pop_expansion (glcpp_parser_t *parser);
+_glcpp_parser_evaluate_defined (glcpp_parser_t *parser,
+				token_list_t *list);
+
+static void
+_glcpp_parser_expand_token_list (glcpp_parser_t *parser,
+				 token_list_t *list);
+
+static void
+_glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser,
+					 token_list_t *list);
 
 static void
 _glcpp_parser_skip_stack_push_if (glcpp_parser_t *parser, int condition);
@@ -95,7 +114,7 @@ _glcpp_parser_skip_stack_push_if (glcpp_parser_t *parser, int condition);
 static void
 _glcpp_parser_skip_stack_change_if (glcpp_parser_t *parser, const char *type,
 				    int condition);
-			
+
 static void
 _glcpp_parser_skip_stack_pop (glcpp_parser_t *parser);
 
@@ -104,29 +123,21 @@ _glcpp_parser_skip_stack_pop (glcpp_parser_t *parser);
 static int
 glcpp_parser_lex (glcpp_parser_t *parser);
 
-%}
+static void
+glcpp_parser_lex_from (glcpp_parser_t *parser, token_list_t *list);
 
-%union {
-	intmax_t imaxval;
-	int ival;
-	char *str;
-	argument_list_t *argument_list;
-	string_list_t *string_list;
-	token_t token;
-	token_list_t *token_list;
-}
+%}
 
 %parse-param {glcpp_parser_t *parser}
 %lex-param {glcpp_parser_t *parser}
 
-%token DEFINE DEFINED ELIF ELSE ENDIF FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED IF IFDEF IFNDEF INTEGER OBJ_MACRO NEWLINE SPACE TOKEN UNDEF
-%type <ival> punctuator
-%type <imaxval> expression INTEGER
-%type <str> content FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED OBJ_MACRO
-%type <argument_list> argument_list
-%type <string_list> macro parameter_list
-%type <token> TOKEN argument_word argument_word_or_comma
-%type <token_list> argument argument_or_comma replacement_list pp_tokens
+%token COMMA_FINAL DEFINED ELIF_EXPANDED HASH HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH_ELIF HASH_ELSE HASH_ENDIF HASH_IF HASH_IFDEF HASH_IFNDEF HASH_UNDEF IDENTIFIER IF_EXPANDED INTEGER INTEGER_STRING NEWLINE OTHER PLACEHOLDER SPACE
+%token PASTE
+%type <ival> expression INTEGER operator SPACE
+%type <str> IDENTIFIER INTEGER_STRING OTHER
+%type <string_list> identifier_list
+%type <token> preprocessing_token
+%type <token_list> pp_tokens replacement_list text_line
 %left OR
 %left AND
 %left '|'
@@ -139,228 +150,120 @@ glcpp_parser_lex (glcpp_parser_t *parser);
 %left '*' '/' '%'
 %right UNARY
 
-/* Hard to remove shift/reduce conflicts documented as follows:
- *
- * 1. '(' after FUNC_MACRO name which is correctly resolved to shift
- *    to form macro invocation rather than reducing directly to
- *    content.
- *
- * 2. Similarly, '(' after FUNC_MACRO which is correctly resolved to
- *    shift to form macro invocation rather than reducing directly to
- *    argument.
- *
- * 3. Similarly again now that we added argument_or_comma as well.
- */
-%expect 3
-
 %%
 
-	 /* We do all printing at the input level. */
 input:
-	/* empty */ {
-		parser->just_printed_separator = 1;
-	}
-|	input content {
-		int is_token;
-		int skipping = 0;
-
-		if (parser->skip_stack && parser->skip_stack->type != SKIP_NO_SKIP)
-			skipping = 1;
-
-		if ($2 && strlen ($2) && ! skipping) {
-			int c = $2[0];
-			int is_not_separator = ((c >= 'a' && c <= 'z') ||
-						(c >= 'A' && c <= 'Z') ||
-						(c >= 'A' && c <= 'Z') ||
-						(c >= '0' && c <= '9') ||
-						(c == '_'));
-
-			if (! parser->just_printed_separator && is_not_separator)
-			{
-				printf (" ");
-			}
-			printf ("%s", $2);
-
-			if (is_not_separator)
-				parser->just_printed_separator = 0;
-			else
-				parser->just_printed_separator = 1;
-		}
-
-		if ($2)
-			talloc_free ($2);
+	/* empty */
+|	input line
+;
 
-		if (parser->need_newline) {
+line:
+	control_line {
+		if (parser->skip_stack == NULL ||
+		    parser->skip_stack->type == SKIP_NO_SKIP)
+		{
 			printf ("\n");
-			parser->just_printed_separator = 1;
-			parser->need_newline = 0;
 		}
 	}
-;
-
-content:
-	IDENTIFIER {
-		$$ = $1;
-	}
-|	IDENTIFIER_FINALIZED {
-		$$ = $1;
-	}
-|	TOKEN {
-		$$ = $1.value;
-	}
-|	FUNC_MACRO {
-		$$ = $1;
-	}
-|	directive {
-		$$ = talloc_strdup (parser, "\n");
-	}
-|	punctuator {
-		$$ = talloc_asprintf (parser, "%c", $1);
-	}
-|	macro {
-		$$ = NULL;
-	}
-;
-
-punctuator:
-	'('	{ $$ = '('; }
-|	')'	{ $$ = ')'; }
-|	','	{ $$ = ','; }
-	;
-
-macro:
-	FUNC_MACRO '(' argument_list ')' {
-		_expand_function_macro (parser, $1, $3);
-	}
-|	OBJ_MACRO {
-		_expand_object_macro (parser, $1);
+|	text_line {
+		if (parser->skip_stack == NULL ||
+		    parser->skip_stack->type == SKIP_NO_SKIP)
+		{
+			_glcpp_parser_print_expanded_token_list (parser, $1);
+			printf ("\n");
+		}
 		talloc_free ($1);
 	}
+|	expanded_line
+|	HASH non_directive
 ;
 
-argument_list:
-	/* empty */ {
-		$$ = _argument_list_create (parser);
-	}
-|	argument {
-		$$ = _argument_list_create (parser);
-		_argument_list_append ($$, $1);
-	}
-|	argument_list ',' argument {
-		_argument_list_append ($1, $3);
-		$$ = $1;
-	}
-;
-
-argument:
-	argument_word {
-		$$ = _token_list_create (parser);
-		_token_list_append ($$, $1.type, $1.value);
-	}
-|	argument argument_word {
-		_token_list_append ($1, $2.type, $2.value);
-		talloc_free ($2.value);
-		$$ = $1;
-	}
-|	argument '(' argument_or_comma ')' {
-		_token_list_append ($1, '(', "(");
-		_token_list_append_list ($1, $3);
-		_token_list_append ($1, ')', ")");
-		$$ = $1;
-	}
-;
-
-argument_word:
-	IDENTIFIER { $$.type = IDENTIFIER; $$.value = $1; }
-|	IDENTIFIER_FINALIZED { $$.type = IDENTIFIER_FINALIZED; $$.value = $1; }
-|	TOKEN { $$ = $1; }
-|	FUNC_MACRO { $$.type = FUNC_MACRO; $$.value = $1; }
-|	macro {	$$.type = TOKEN; $$.value = xtalloc_strdup (parser, ""); }
-;
-
-	/* XXX: The body of argument_or_comma is the same as the body
-	 * of argument, but with "argument" and "argument_word"
-	 * changed to "argument_or_comma" and
-	 * "argument_word_or_comma". It would be nice to have less
-	 * redundancy here, but I'm not sure how.
-	 *
-	 * It would also be nice to have a less ugly grammar to have
-	 * to implement, but such is the C preprocessor.
-	 */
-argument_or_comma:
-	argument_word_or_comma {
-		$$ = _token_list_create (parser);
-		_token_list_append ($$, $1.type, $1.value);
-	}
-|	argument_or_comma argument_word_or_comma {
-		_token_list_append ($1, $2.type, $2.value);
-		$$ = $1;
+expanded_line:
+	IF_EXPANDED expression NEWLINE {
+		_glcpp_parser_skip_stack_push_if (parser, $2);
 	}
-|	argument_or_comma '(' argument_or_comma ')' {
-		_token_list_append ($1, '(', "(");
-		_token_list_append_list ($1, $3);
-		_token_list_append ($1, ')', ")");
-		$$ = $1;
+|	ELIF_EXPANDED expression NEWLINE {
+		_glcpp_parser_skip_stack_change_if (parser, "elif", $2);
 	}
 ;
 
-argument_word_or_comma:
-	IDENTIFIER { $$.type = IDENTIFIER; $$.value = $1; }
-|	IDENTIFIER_FINALIZED { $$.type = IDENTIFIER_FINALIZED; $$.value = $1; }
-|	TOKEN { $$ = $1; }
-|	FUNC_MACRO { $$.type = FUNC_MACRO; $$.value = $1; }
-|	macro {	$$.type = TOKEN; $$.value = xtalloc_strdup (parser, ""); }
-|	',' { $$.type = ','; $$.value = xtalloc_strdup (parser, ","); }
-;
-
-directive:
-	DEFINE IDENTIFIER NEWLINE {
-		token_list_t *list = _token_list_create (parser);
-		_define_object_macro (parser, $2, list);
+control_line:
+	HASH_DEFINE_OBJ	IDENTIFIER replacement_list NEWLINE {
+		_define_object_macro (parser, $2, $3);
 	}
-|	DEFINE IDENTIFIER SPACE replacement_list NEWLINE {
-		_define_object_macro (parser, $2, $4);
+|	HASH_DEFINE_FUNC IDENTIFIER '(' ')' replacement_list NEWLINE {
+		_define_function_macro (parser, $2, NULL, $5);
 	}
-|	DEFINE IDENTIFIER '(' parameter_list ')' replacement_list NEWLINE {
+|	HASH_DEFINE_FUNC IDENTIFIER '(' identifier_list ')' replacement_list NEWLINE {
 		_define_function_macro (parser, $2, $4, $6);
 	}
-|	IF expression NEWLINE {
-		_glcpp_parser_skip_stack_push_if (parser, $2);
+|	HASH_UNDEF IDENTIFIER NEWLINE {
+		macro_t *macro = hash_table_find (parser->defines, $2);
+		if (macro) {
+			/* XXX: Need hash table to support a real way
+			 * to remove an element rather than prefixing
+			 * a new node with data of NULL like this. */
+			hash_table_insert (parser->defines, NULL, $2);
+			talloc_free (macro);
+		}
+		talloc_free ($2);
 	}
-|	IFDEF IDENTIFIER NEWLINE {
-		string_list_t *macro = hash_table_find (parser->defines, $2);
+|	HASH_IF pp_tokens NEWLINE {
+		token_list_t *expanded;
+		token_t *token;
+
+		expanded = _token_list_create (parser);
+		token = _token_create_ival (parser, IF_EXPANDED, IF_EXPANDED);
+		_token_list_append (expanded, token);
+		talloc_unlink (parser, token);
+		_glcpp_parser_evaluate_defined (parser, $2);
+		_glcpp_parser_expand_token_list (parser, $2);
+		_token_list_append_list (expanded, $2);
+		glcpp_parser_lex_from (parser, expanded);
+	}
+|	HASH_IFDEF IDENTIFIER NEWLINE {
+		macro_t *macro = hash_table_find (parser->defines, $2);
 		talloc_free ($2);
 		_glcpp_parser_skip_stack_push_if (parser, macro != NULL);
 	}
-|	IFNDEF IDENTIFIER NEWLINE {
-		string_list_t *macro = hash_table_find (parser->defines, $2);
+|	HASH_IFNDEF IDENTIFIER NEWLINE {
+		macro_t *macro = hash_table_find (parser->defines, $2);
 		talloc_free ($2);
 		_glcpp_parser_skip_stack_push_if (parser, macro == NULL);
 	}
-|	ELIF expression NEWLINE {
-		_glcpp_parser_skip_stack_change_if (parser, "#elif", $2);
+|	HASH_ELIF pp_tokens NEWLINE {
+		token_list_t *expanded;
+		token_t *token;
+
+		expanded = _token_list_create (parser);
+		token = _token_create_ival (parser, ELIF_EXPANDED, ELIF_EXPANDED);
+		_token_list_append (expanded, token);
+		talloc_unlink (parser, token);
+		_glcpp_parser_evaluate_defined (parser, $2);
+		_glcpp_parser_expand_token_list (parser, $2);
+		_token_list_append_list (expanded, $2);
+		glcpp_parser_lex_from (parser, expanded);
 	}
-|	ELSE {
+|	HASH_ELSE NEWLINE {
 		_glcpp_parser_skip_stack_change_if (parser, "else", 1);
 	}
-|	ENDIF {
+|	HASH_ENDIF NEWLINE {
 		_glcpp_parser_skip_stack_pop (parser);
 	}
-|	UNDEF IDENTIFIER {
-		string_list_t *macro = hash_table_find (parser->defines, $2);
-		if (macro) {
-			/* XXX: Need hash table to support a real way
-			 * to remove an element rather than prefixing
-			 * a new node with data of NULL like this. */
-			hash_table_insert (parser->defines, NULL, $2);
-			talloc_free (macro);
-		}
-		talloc_free ($2);
-	}
+|	HASH NEWLINE
 ;
 
 expression:
-	INTEGER {
+	INTEGER_STRING {
+		if (strlen ($1) >= 3 && strncmp ($1, "0x", 2) == 0) {
+			$$ = strtoll ($1 + 2, NULL, 16);
+		} else if ($1[0] == '0') {
+			$$ = strtoll ($1, NULL, 8);
+		} else {
+			$$ = strtoll ($1, NULL, 10);
+		}
+	}
+|	INTEGER {
 		$$ = $1;
 	}
 |	expression OR expression {
@@ -429,56 +332,105 @@ expression:
 |	'+' expression %prec UNARY {
 		$$ = + $2;
 	}
-|	DEFINED IDENTIFIER %prec UNARY {
-		string_list_t *macro = hash_table_find (parser->defines, $2);
-		talloc_free ($2);
-		if (macro)
-			$$ = 1;
-		else
-			$$ = 0;
-	}
 |	'(' expression ')' {
 		$$ = $2;
 	}
 ;
 
-parameter_list:
-	/* empty */ {
-		$$ = _string_list_create (parser);
-	}
-|	IDENTIFIER {
+identifier_list:
+	IDENTIFIER {
 		$$ = _string_list_create (parser);
 		_string_list_append_item ($$, $1);
-		talloc_free ($1);
+		talloc_steal ($$, $1);
 	}
-|	parameter_list ',' IDENTIFIER {
-		_string_list_append_item ($1, $3);
-		talloc_free ($3);
-		$$ = $1;
+|	identifier_list ',' IDENTIFIER {
+		$$ = $1;	
+		_string_list_append_item ($$, $3);
+		talloc_steal ($$, $3);
 	}
 ;
 
+text_line:
+	NEWLINE { $$ = NULL; }
+|	pp_tokens NEWLINE
+;
+
+non_directive:
+	pp_tokens NEWLINE
+;
+
 replacement_list:
-	/* empty */ {
+	/* empty */ { $$ = NULL; }
+|	pp_tokens
+;
+
+pp_tokens:
+	preprocessing_token {
+		parser->space_tokens = 1;
 		$$ = _token_list_create (parser);
+		_token_list_append ($$, $1);
+		talloc_unlink (parser, $1);
 	}
-|	pp_tokens {
+|	pp_tokens preprocessing_token {
 		$$ = $1;
+		_token_list_append ($$, $2);
+		talloc_unlink (parser, $2);
 	}
 ;
 
-
-pp_tokens:
-	TOKEN {
-		$$ = _token_list_create (parser);
-		_token_list_append ($$, $1.type, $1.value);
+preprocessing_token:
+	IDENTIFIER {
+		$$ = _token_create_str (parser, IDENTIFIER, $1);
 	}
-|	pp_tokens TOKEN {
-	_token_list_append ($1, $2.type, $2.value);
-		$$ = $1;
+|	INTEGER_STRING {
+		$$ = _token_create_str (parser, INTEGER_STRING, $1);
+	}
+|	operator {
+		$$ = _token_create_ival (parser, $1, $1);
+	}
+|	OTHER {
+		$$ = _token_create_str (parser, OTHER, $1);
+	}
+|	SPACE {
+		$$ = _token_create_ival (parser, SPACE, SPACE);
 	}
 ;
 
+operator:
+	'['			{ $$ = '['; }
+|	']'			{ $$ = ']'; }
+|	'('			{ $$ = '('; }
+|	')'			{ $$ = ')'; }
+|	'{'			{ $$ = '{'; }
+|	'}'			{ $$ = '}'; }
+|	'.'			{ $$ = '.'; }
+|	'&'			{ $$ = '&'; }
+|	'*'			{ $$ = '*'; }
+|	'+'			{ $$ = '+'; }
+|	'-'			{ $$ = '-'; }
+|	'~'			{ $$ = '~'; }
+|	'!'			{ $$ = '!'; }
+|	'/'			{ $$ = '/'; }
+|	'%'			{ $$ = '%'; }
+|	LEFT_SHIFT		{ $$ = LEFT_SHIFT; }
+|	RIGHT_SHIFT		{ $$ = RIGHT_SHIFT; }
+|	'<'			{ $$ = '<'; }
+|	'>'			{ $$ = '>'; }
+|	LESS_OR_EQUAL		{ $$ = LESS_OR_EQUAL; }
+|	GREATER_OR_EQUAL	{ $$ = GREATER_OR_EQUAL; }
+|	EQUAL			{ $$ = EQUAL; }
+|	NOT_EQUAL		{ $$ = NOT_EQUAL; }
+|	'^'			{ $$ = '^'; }
+|	'|'			{ $$ = '|'; }
+|	AND			{ $$ = AND; }
+|	OR			{ $$ = OR; }
+|	';'			{ $$ = ';'; }
+|	','			{ $$ = ','; }
+|	'='			{ $$ = '='; }
+|	PASTE			{ $$ = PASTE; }
+|	DEFINED			{ $$ = DEFINED; }
+;
+
 %%
 
 string_list_t *
@@ -512,7 +464,7 @@ _string_list_append_item (string_list_t *list, const char *str)
 
 	node = xtalloc (list, string_node_t);
 	node->str = xtalloc_strdup (node, str);
-		
+
 	node->next = NULL;
 
 	if (list->head == NULL) {
@@ -524,6 +476,42 @@ _string_list_append_item (string_list_t *list, const char *str)
 	list->tail = node;
 }
 
+void
+_string_list_push (string_list_t *list, const char *str)
+{
+	string_node_t *node;
+
+	node = xtalloc (list, string_node_t);
+	node->str = xtalloc_strdup (node, str);
+	node->next = list->head;
+
+	if (list->tail == NULL) {
+		list->tail = node;
+	}
+	list->head = node;
+}
+
+void
+_string_list_pop (string_list_t *list)
+{
+	string_node_t *node;
+
+	node = list->head;
+
+	if (node == NULL) {
+		fprintf (stderr, "Internal error: _string_list_pop called on an empty list.\n");
+		exit (1);
+	}
+
+	list->head = node->next;
+	if (list->tail == node) {
+		assert (node->next == NULL);
+		list->tail = NULL;
+	}
+
+	talloc_free (node);
+}
+
 int
 _string_list_contains (string_list_t *list, const char *member, int *index)
 {
@@ -576,9 +564,6 @@ _argument_list_append (argument_list_t *list, token_list_t *argument)
 {
 	argument_node_t *node;
 
-	if (argument == NULL || argument->head == NULL)
-		return;
-
 	node = xtalloc (list, argument_node_t);
 	node->argument = argument;
 
@@ -630,6 +615,31 @@ _argument_list_member_at (argument_list_t *list, int index)
 	return NULL;
 }
 
+/* Note: This function talloc_steal()s the str pointer. */
+token_t *
+_token_create_str (void *ctx, int type, char *str)
+{
+	token_t *token;
+
+	token = xtalloc (ctx, token_t);
+	token->type = type;
+	token->value.str = talloc_steal (token, str);
+
+	return token;
+}
+
+token_t *
+_token_create_ival (void *ctx, int type, int ival)
+{
+	token_t *token;
+
+	token = xtalloc (ctx, token_t);
+	token->type = type;
+	token->value.ival = ival;
+
+	return token;
+}
+
 token_list_t *
 _token_list_create (void *ctx)
 {
@@ -638,18 +648,18 @@ _token_list_create (void *ctx)
 	list = xtalloc (ctx, token_list_t);
 	list->head = NULL;
 	list->tail = NULL;
+	list->non_space_tail = NULL;
 
 	return list;
 }
 
 void
-_token_list_append (token_list_t *list, int type, const char *value)
+_token_list_append (token_list_t *list, token_t *token)
 {
 	token_node_t *node;
 
 	node = xtalloc (list, token_node_t);
-	node->type = type;
-	node->value = xtalloc_strdup (list, value);
+	node->token = xtalloc_reference (list, token);
 
 	node->next = NULL;
 
@@ -660,11 +670,16 @@ _token_list_append (token_list_t *list, int type, const char *value)
 	}
 
 	list->tail = node;
+	if (token->type != SPACE)
+		list->non_space_tail = node;
 }
 
 void
 _token_list_append_list (token_list_t *list, token_list_t *tail)
 {
+	if (tail == NULL || tail->head == NULL)
+		return;
+
 	if (list->head == NULL) {
 		list->head = tail->head;
 	} else {
@@ -672,8 +687,191 @@ _token_list_append_list (token_list_t *list, token_list_t *tail)
 	}
 
 	list->tail = tail->tail;
+	list->non_space_tail = tail->non_space_tail;
+}
+
+token_list_t *
+_token_list_copy (void *ctx, token_list_t *other)
+{
+	token_list_t *copy;
+	token_node_t *node;
+
+	if (other == NULL)
+		return NULL;
+
+	copy = _token_list_create (ctx);
+	for (node = other->head; node; node = node->next)
+		_token_list_append (copy, node->token);
+
+	return copy;
 }
-		
+
+void
+_token_list_trim_trailing_space (token_list_t *list)
+{
+	token_node_t *tail, *next;
+
+	if (list->non_space_tail) {
+		tail = list->non_space_tail->next;
+		list->non_space_tail->next = NULL;
+		list->tail = list->non_space_tail;
+
+		while (tail) {
+			next = tail->next;
+			talloc_free (tail);
+			tail = next;
+		}
+	}
+}
+
+static void
+_token_print (token_t *token)
+{
+	if (token->type < 256) {
+		printf ("%c", token->type);
+		return;
+	}
+
+	switch (token->type) {
+	case INTEGER:
+		printf ("%" PRIxMAX, token->value.ival);
+		break;
+	case IDENTIFIER:
+	case INTEGER_STRING:
+	case OTHER:
+		printf ("%s", token->value.str);
+		break;
+	case SPACE:
+		printf (" ");
+		break;
+	case LEFT_SHIFT:
+		printf ("<<");
+		break;
+	case RIGHT_SHIFT:
+		printf (">>");
+		break;
+	case LESS_OR_EQUAL:
+		printf ("<=");
+		break;
+	case GREATER_OR_EQUAL:
+		printf (">=");
+		break;
+	case EQUAL:
+		printf ("==");
+		break;
+	case NOT_EQUAL:
+		printf ("!=");
+		break;
+	case AND:
+		printf ("&&");
+		break;
+	case OR:
+		printf ("||");
+		break;
+	case PASTE:
+		printf ("##");
+		break;
+	case COMMA_FINAL:
+		printf (",");
+		break;
+	case PLACEHOLDER:
+		/* Nothing to print. */
+		break;
+	default:
+		fprintf (stderr, "Error: Don't know how to print token type %d\n", token->type);
+		break;
+	}
+}
+
+/* Return a new token (talloc()ed off of 'token') formed by pasting
+ * 'token' and 'other'. Note that this function may return 'token' or
+ * 'other' directly rather than allocating anything new.
+ *
+ * Caution: Only very cursory error-checking is performed to see if
+ * the final result is a valid single token. */
+static token_t *
+_token_paste (token_t *token, token_t *other)
+{
+	/* Pasting a placeholder onto anything makes no change. */
+	if (other->type == PLACEHOLDER)
+		return token;
+
+	/* When 'token' is a placeholder, just return 'other'. */
+	if (token->type == PLACEHOLDER)
+		return other;
+
+	/* A very few single-character punctuators can be combined
+	 * with another to form a multi-character punctuator. */
+	switch (token->type) {
+	case '<':
+		if (other->type == '<')
+			return _token_create_ival (token, LEFT_SHIFT, LEFT_SHIFT);
+		else if (other->type == '=')
+			return _token_create_ival (token, LESS_OR_EQUAL, LESS_OR_EQUAL);
+		break;
+	case '>':
+		if (other->type == '>')
+			return _token_create_ival (token, RIGHT_SHIFT, RIGHT_SHIFT);
+		else if (other->type == '=')
+			return _token_create_ival (token, GREATER_OR_EQUAL, GREATER_OR_EQUAL);
+		break;
+	case '=':
+		if (other->type == '=')
+			return _token_create_ival (token, EQUAL, EQUAL);
+		break;
+	case '!':
+		if (other->type == '=')
+			return _token_create_ival (token, NOT_EQUAL, NOT_EQUAL);
+		break;
+	case '&':
+		if (other->type == '&')
+			return _token_create_ival (token, AND, AND);
+		break;
+	case '|':
+		if (other->type == '|')
+			return _token_create_ival (token, OR, OR);
+		break;
+	}
+
+	/* Two string-valued tokens can usually just be mashed
+	 * together.
+	 *
+	 * XXX: This isn't actually legitimate. Several things here
+	 * should result in a diagnostic since the result cannot be a
+	 * valid, single pre-processing token. For example, pasting
+	 * "123" and "abc" is not legal, but we don't catch that
+	 * here. */
+	if ((token->type == IDENTIFIER || token->type == OTHER || token->type == INTEGER_STRING) &&
+	    (other->type == IDENTIFIER || other->type == OTHER || other->type == INTEGER_STRING))
+	{
+		char *str;
+
+		str = xtalloc_asprintf (token, "%s%s",
+					token->value.str, other->value.str);
+		return _token_create_str (token, token->type, str);
+	}
+
+	printf ("Error: Pasting \"");
+	_token_print (token);
+	printf ("\" and \"");
+	_token_print (other);
+	printf ("\" does not give a valid preprocessing token.\n");
+
+	return token;
+}
+
+static void
+_token_list_print (token_list_t *list)
+{
+	token_node_t *node;
+
+	if (list == NULL)
+		return;
+
+	for (node = list->head; node; node = node->next)
+		_token_print (node->token);
+}
+
 void
 yyerror (void *scanner, const char *error)
 {
@@ -690,13 +888,17 @@ glcpp_parser_create (void)
 	glcpp_lex_init_extra (parser, &parser->scanner);
 	parser->defines = hash_table_ctor (32, hash_table_string_hash,
 					   hash_table_string_compare);
-	parser->expansions = NULL;
-
-	parser->just_printed_separator = 1;
-	parser->need_newline = 0;
+	parser->active = _string_list_create (parser);
+	parser->space_tokens = 1;
+	parser->newline_as_space = 0;
+	parser->in_control_line = 0;
+	parser->paren_count = 0;
 
 	parser->skip_stack = NULL;
 
+	parser->lex_from_list = NULL;
+	parser->lex_from_node = NULL;
+
 	return parser;
 }
 
@@ -709,8 +911,6 @@ glcpp_parser_parse (glcpp_parser_t *parser)
 void
 glcpp_parser_destroy (glcpp_parser_t *parser)
 {
-	if (parser->need_newline)
-		printf ("\n");
 	if (parser->skip_stack)
 		fprintf (stderr, "Error: Unterminated #if\n");
 	glcpp_lex_destroy (parser->scanner);
@@ -718,247 +918,577 @@ glcpp_parser_destroy (glcpp_parser_t *parser)
 	talloc_free (parser);
 }
 
-static int
-glcpp_parser_is_expanding (glcpp_parser_t *parser, const char *member)
+/* Replace any occurences of DEFINED tokens in 'list' with either a
+ * '0' or '1' INTEGER token depending on whether the next token in the
+ * list is defined or not. */
+static void
+_glcpp_parser_evaluate_defined (glcpp_parser_t *parser,
+				token_list_t *list)
 {
-	expansion_node_t *node;
+	token_node_t *node, *next;
+	macro_t *macro;
+
+	if (list == NULL)
+		return;
 
-	for (node = parser->expansions; node; node = node->next) {
-		if (node->macro &&
-		    strcmp (node->macro->identifier, member) == 0)
+	for (node = list->head; node; node = node->next) {
+		if (node->token->type != DEFINED)
+			continue;
+		next = node->next;
+		while (next && next->token->type == SPACE)
+			next = next->next;
+		if (next == NULL || next->token->type != IDENTIFIER) {
+			fprintf (stderr, "Error: operator \"defined\" requires an identifier\n");
+			exit (1);
+		}
+		macro = hash_table_find (parser->defines,
+					 next->token->value.str);
+
+		node->token->type = INTEGER;
+		node->token->value.ival = (macro != NULL);
+		node->next = next->next;
+	}
+}
+	
+typedef enum function_status
+{
+	FUNCTION_STATUS_SUCCESS,
+	FUNCTION_NOT_A_FUNCTION,
+	FUNCTION_UNBALANCED_PARENTHESES
+} function_status_t;
+
+/* Find a set of function-like macro arguments by looking for a
+ * balanced set of parentheses.
+ *
+ * When called, 'node' should be the opening-parenthesis token, (or
+ * perhaps preceeding SPACE tokens). Upon successful return *last will
+ * be the last consumed node, (corresponding to the closing right
+ * parenthesis).
+ *
+ * Return values:
+ *
+ *   FUNCTION_STATUS_SUCCESS:
+ *
+ *	Successfully parsed a set of function arguments.	
+ *
+ *   FUNCTION_NOT_A_FUNCTION:
+ *
+ *	Macro name not followed by a '('. This is not an error, but
+ *	simply that the macro name should be treated as a non-macro.
+ *
+ *   FUNCTION_UNBLANCED_PARENTHESES
+ *
+ *	Macro name is not followed by a balanced set of parentheses.
+ */
+static function_status_t
+_arguments_parse (argument_list_t *arguments,
+		  token_node_t *node,
+		  token_node_t **last)
+{
+	token_list_t *argument;
+	int paren_count;
+
+	node = node->next;
+
+	/* Ignore whitespace before first parenthesis. */
+	while (node && node->token->type == SPACE)
+		node = node->next;
+
+	if (node == NULL || node->token->type != '(')
+		return FUNCTION_NOT_A_FUNCTION;
+
+	node = node->next;
+
+	argument = _token_list_create (arguments);
+	_argument_list_append (arguments, argument);
+
+	for (paren_count = 1; node; node = node->next) {
+		if (node->token->type == '(')
 		{
-			return 1;
+			paren_count++;
+		}
+		else if (node->token->type == ')')
+		{
+			paren_count--;
+			if (paren_count == 0)
+				break;
+		}
+
+		if (node->token->type == ',' &&
+			 paren_count == 1)
+		{
+			_token_list_trim_trailing_space (argument);
+			argument = _token_list_create (arguments);
+			_argument_list_append (arguments, argument);
+		}
+		else {
+			if (argument->head == NULL) {
+				/* Don't treat initial whitespace as
+				 * part of the arguement. */
+				if (node->token->type == SPACE)
+					continue;
+			}
+			_token_list_append (argument, node->token);
 		}
 	}
 
-	return 0;
+	if (paren_count)
+		return FUNCTION_UNBALANCED_PARENTHESES;
+
+	*last = node;
+
+	return FUNCTION_STATUS_SUCCESS;
 }
 
-token_class_t
-glcpp_parser_classify_token (glcpp_parser_t *parser,
-			     const char *identifier,
-			     int *parameter_index)
+/* This is a helper function that's essentially part of the
+ * implementation of _glcpp_parser_expand_node. It shouldn't be called
+ * except for by that function.
+ *
+ * Returns NULL if node is a simple token with no expansion, (that is,
+ * although 'node' corresponds to an identifier defined as a
+ * function-like macro, it is not followed with a parenthesized
+ * argument list).
+ *
+ * Compute the complete expansion of node (which is a function-like
+ * macro) and subsequent nodes which are arguments.
+ *
+ * Returns the token list that results from the expansion and sets
+ * *last to the last node in the list that was consumed by the
+ * expansion. Specificallty, *last will be set as follows: as the
+ * token of the closing right parenthesis.
+ */
+static token_list_t *
+_glcpp_parser_expand_function (glcpp_parser_t *parser,
+			       token_node_t *node,
+			       token_node_t **last)
+			       
 {
 	macro_t *macro;
+	const char *identifier;
+	argument_list_t *arguments;
+	function_status_t status;
+	token_list_t *substituted;
+	int parameter_index;
+
+	identifier = node->token->value.str;
 
-	/* Is this token a defined macro? */
 	macro = hash_table_find (parser->defines, identifier);
 
-	if (macro == NULL)
-		return TOKEN_CLASS_IDENTIFIER;
+	assert (macro->is_function);
 
-	/* Don't consider this a macro if we are already actively
-	 * expanding this macro. */
-	if (glcpp_parser_is_expanding (parser, identifier))
-		return TOKEN_CLASS_IDENTIFIER_FINALIZED;
+	arguments = _argument_list_create (parser);
+	status = _arguments_parse (arguments, node, last);
 
-	/* Definitely a macro. Just need to check if it's function-like. */
-	if (macro->is_function)
-		return TOKEN_CLASS_FUNC_MACRO;
-	else
-		return TOKEN_CLASS_OBJ_MACRO;
-}
+	switch (status) {
+	case FUNCTION_STATUS_SUCCESS:
+		break;
+	case FUNCTION_NOT_A_FUNCTION:
+		return NULL;
+	case FUNCTION_UNBALANCED_PARENTHESES:
+		return NULL;
+	}
 
-void
-_define_object_macro (glcpp_parser_t *parser,
-		      const char *identifier,
-		      token_list_t *replacements)
-{
-	macro_t *macro;
+	if (macro->replacements == NULL) {
+		talloc_free (arguments);
+		return _token_list_create (parser);
+	}
 
-	macro = xtalloc (parser, macro_t);
+	if (! ((_argument_list_length (arguments) == 
+		_string_list_length (macro->parameters)) ||
+	       (_string_list_length (macro->parameters) == 0 &&
+		_argument_list_length (arguments) == 1 &&
+		arguments->head->argument->head == NULL)))
+	{
+		fprintf (stderr,
+			 "Error: macro %s invoked with %d arguments (expected %d)\n",
+			 identifier,
+			 _argument_list_length (arguments),
+			 _string_list_length (macro->parameters));
+		return NULL;
+	}
 
-	macro->is_function = 0;
-	macro->parameters = NULL;
-	macro->identifier = talloc_strdup (macro, identifier);
-	macro->replacements = talloc_steal (macro, replacements);
+	/* Perform argument substitution on the replacement list. */
+	substituted = _token_list_create (arguments);
 
-	hash_table_insert (parser->defines, macro, identifier);
+	for (node = macro->replacements->head; node; node = node->next)
+	{
+		if (node->token->type == IDENTIFIER &&
+		    _string_list_contains (macro->parameters,
+					   node->token->value.str,
+					   &parameter_index))
+		{
+			token_list_t *argument;
+			argument = _argument_list_member_at (arguments,
+							     parameter_index);
+			/* Before substituting, we expand the argument
+			 * tokens, or append a placeholder token for
+			 * an empty argument. */
+			if (argument->head) {
+				_glcpp_parser_expand_token_list (parser,
+								 argument);
+				_token_list_append_list (substituted, argument);
+			} else {
+				token_t *new_token;
+
+				new_token = _token_create_ival (substituted,
+								PLACEHOLDER,
+								PLACEHOLDER);
+				_token_list_append (substituted, new_token);
+			}
+		} else {
+			_token_list_append (substituted, node->token);
+		}
+	}
+
+	/* After argument substitution, and before further expansion
+	 * below, implement token pasting. */
+
+	_token_list_trim_trailing_space (substituted);
+
+	node = substituted->head;
+	while (node)
+	{
+		token_node_t *next_non_space;
+
+		/* Look ahead for a PASTE token, skipping space. */
+		next_non_space = node->next;
+		while (next_non_space && next_non_space->token->type == SPACE)
+			next_non_space = next_non_space->next;
+
+		if (next_non_space == NULL)
+			break;
+
+		if (next_non_space->token->type != PASTE) {
+			node = next_non_space;
+			continue;
+		}
+
+		/* Now find the next non-space token after the PASTE. */
+		next_non_space = next_non_space->next;
+		while (next_non_space && next_non_space->token->type == SPACE)
+			next_non_space = next_non_space->next;
+
+		if (next_non_space == NULL) {
+			fprintf (stderr, "Error: '##' cannot appear at either end of a macro expansion\n");
+			return NULL;
+		}
+
+		node->token = _token_paste (node->token, next_non_space->token);
+		node->next = next_non_space->next;
+		if (next_non_space == substituted->tail)
+			substituted->tail = node;
+
+		node = node->next;
+	}
+
+	substituted->non_space_tail = substituted->tail;
+
+	_string_list_push (parser->active, identifier);
+	_glcpp_parser_expand_token_list (parser, substituted);
+	_string_list_pop (parser->active);
+
+	return substituted;
 }
 
-void
-_define_function_macro (glcpp_parser_t *parser,
-			const char *identifier,
-			string_list_t *parameters,
-			token_list_t *replacements)
+/* Compute the complete expansion of node, (and subsequent nodes after
+ * 'node' in the case that 'node' is a function-like macro and
+ * subsequent nodes are arguments).
+ *
+ * Returns NULL if node is a simple token with no expansion.
+ *
+ * Otherwise, returns the token list that results from the expansion
+ * and sets *last to the last node in the list that was consumed by
+ * the expansion. Specificallty, *last will be set as follows:
+ *
+ *	As 'node' in the case of object-like macro expansion.
+ *
+ *	As the token of the closing right parenthesis in the case of
+ *	function-like macro expansion.
+ */
+static token_list_t *
+_glcpp_parser_expand_node (glcpp_parser_t *parser,
+			   token_node_t *node,
+			   token_node_t **last)
 {
+	token_t *token = node->token;
+	const char *identifier;
 	macro_t *macro;
+	token_list_t *expansion;
+
+	/* We only expand identifiers */
+	if (token->type != IDENTIFIER) {
+		/* We change any COMMA into a COMMA_FINAL to prevent
+		 * it being mistaken for an argument separator
+		 * later. */
+		if (token->type == ',') {
+			token->type = COMMA_FINAL;
+			token->value.ival = COMMA_FINAL;
+		}
 
-	macro = xtalloc (parser, macro_t);
+		return NULL;
+	}
 
-	macro->is_function = 1;
-	macro->parameters = talloc_steal (macro, parameters);
-	macro->identifier = talloc_strdup (macro, identifier);
-	macro->replacements = talloc_steal (macro, replacements);
+	/* Look up this identifier in the hash table. */
+	identifier = token->value.str;
+	macro = hash_table_find (parser->defines, identifier);
 
-	hash_table_insert (parser->defines, macro, identifier);
+	/* Not a macro, so no expansion needed. */
+	if (macro == NULL)
+		return NULL;
+
+	/* Finally, don't expand this macro if we're already actively
+	 * expanding it, (to avoid infinite recursion). */
+	if (_string_list_contains (parser->active, identifier, NULL)) {
+		/* We change the token type here from IDENTIFIER to
+		 * OTHER to prevent any future expansion of this
+		 * unexpanded token. */
+		char *str;
+		token_list_t *expansion;
+		token_t *final;
+
+		str = xtalloc_strdup (parser, token->value.str);
+		final = _token_create_str (parser, OTHER, str);
+		expansion = _token_list_create (parser);
+		_token_list_append (expansion, final);
+		*last = node;
+		return expansion;
+	}
+
+	if (! macro->is_function)
+	{
+		*last = node;
+
+		if (macro->replacements == NULL)
+			return _token_list_create (parser);
+
+		expansion = _token_list_copy (parser, macro->replacements);
+
+		_string_list_push (parser->active, identifier);
+		_glcpp_parser_expand_token_list (parser, expansion);
+		_string_list_pop (parser->active);
+
+		return expansion;
+	}
+
+	return _glcpp_parser_expand_function (parser, node, last);
 }
 
+/* Walk over the token list replacing nodes with their expansion.
+ * Whenever nodes are expanded the walking will walk over the new
+ * nodes, continuing to expand as necessary. The results are placed in
+ * 'list' itself;
+ */
 static void
-_glcpp_parser_push_expansion (glcpp_parser_t *parser,
-			      macro_t *macro,
-			      token_node_t *replacements)
+_glcpp_parser_expand_token_list (glcpp_parser_t *parser,
+				 token_list_t *list)
 {
-	expansion_node_t *node;
+	token_node_t *node_prev;
+	token_node_t *node, *last;
+	token_list_t *expansion;
 
-	node = xtalloc (parser, expansion_node_t);
+	if (list == NULL)
+		return;
 
-	node->macro = macro;
-	node->replacements = replacements;
+	_token_list_trim_trailing_space (list);
 
-	node->next = parser->expansions;
-	parser->expansions = node;
+	node_prev = NULL;
+	node = list->head;
+
+	while (node) {
+		/* Find the expansion for node, which will replace all
+		 * nodes from node to last, inclusive. */
+		expansion = _glcpp_parser_expand_node (parser, node, &last);
+		if (expansion) {
+			/* Splice expansion into list, supporting a
+			 * simple deletion if the expansion is
+			 * empty. */
+			if (expansion->head) {
+				if (node_prev)
+					node_prev->next = expansion->head;
+				else
+					list->head = expansion->head;
+				expansion->tail->next = last->next;
+				if (last == list->tail)
+					list->tail = expansion->tail;
+			} else {
+				if (node_prev)
+					node_prev->next = last->next;
+				else
+					list->head = last->next;
+				if (last == list->tail)
+					list->tail == NULL;
+			}
+		} else {
+			node_prev = node;
+		}
+		node = node_prev ? node_prev->next : list->head;
+	}
+
+	list->non_space_tail = list->tail;
 }
 
 static void
-glcpp_parser_pop_expansion (glcpp_parser_t *parser)
+_glcpp_parser_expand_token_list_onto (glcpp_parser_t *parser,
+				      token_list_t *list,
+				      token_list_t *result)
 {
-	expansion_node_t *node;
+	_glcpp_parser_expand_token_list (parser, list);
 
-	node = parser->expansions;
+	_token_list_append_list (result, list);
+}
 
-	if (node == NULL) {
-		fprintf (stderr, "Internal error: _expansion_list_pop called on an empty list.\n");
-		exit (1);
-	}
+void
+_glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser,
+					 token_list_t *list)
+{
+	if (list == NULL)
+		return;
 
-	parser->expansions = node->next;
+	_glcpp_parser_expand_token_list (parser, list);
 
-	talloc_free (node);
+	_token_list_trim_trailing_space (list);
+
+	_token_list_print (list);
 }
 
 void
-_expand_object_macro (glcpp_parser_t *parser, const char *identifier)
+_define_object_macro (glcpp_parser_t *parser,
+		      const char *identifier,
+		      token_list_t *replacements)
 {
 	macro_t *macro;
 
-	macro = hash_table_find (parser->defines, identifier);
-	assert (! macro->is_function);
-	assert (! glcpp_parser_is_expanding (parser, identifier));
+	macro = xtalloc (parser, macro_t);
 
-	_glcpp_parser_push_expansion (parser, macro, macro->replacements->head);
+	macro->is_function = 0;
+	macro->parameters = NULL;
+	macro->identifier = talloc_strdup (macro, identifier);
+	macro->replacements = talloc_steal (macro, replacements);
+
+	hash_table_insert (parser->defines, macro, identifier);
 }
 
 void
-_expand_function_macro (glcpp_parser_t *parser,
+_define_function_macro (glcpp_parser_t *parser,
 			const char *identifier,
-			argument_list_t *arguments)
+			string_list_t *parameters,
+			token_list_t *replacements)
 {
 	macro_t *macro;
-	token_list_t *expanded;
-	token_node_t *i, *j;
-	int parameter_index;
-
-	macro = hash_table_find (parser->defines, identifier);
-	assert (macro->is_function);
-	assert (! glcpp_parser_is_expanding (parser, identifier));
-
-	if (_argument_list_length (arguments) !=
-	    _string_list_length (macro->parameters))
-	{
-		fprintf (stderr,
-			 "Error: macro %s invoked with %d arguments (expected %d)\n",
-			 identifier,
-			 _argument_list_length (arguments),
-			 _string_list_length (macro->parameters));
-		return;
-	}
 
-	expanded = _token_list_create (macro);
+	macro = xtalloc (parser, macro_t);
 
-	for (i = macro->replacements->head; i; i = i->next) {
-		if (_string_list_contains (macro->parameters, i->value,
-					   &parameter_index))
-		{
-			token_list_t *argument;
-			argument = _argument_list_member_at (arguments,
-							     parameter_index);
-			for (j = argument->head; j; j = j->next)
-			{
-				_token_list_append (expanded, j->type,
-						    j->value);
-			}
-		} else {
-			_token_list_append (expanded, i->type, i->value);
-		}
-	}
+	macro->is_function = 1;
+	macro->parameters = talloc_steal (macro, parameters);
+	macro->identifier = talloc_strdup (macro, identifier);
+	macro->replacements = talloc_steal (macro, replacements);
 
-	_glcpp_parser_push_expansion (parser, macro, expanded->head);
+	hash_table_insert (parser->defines, macro, identifier);
 }
 
 static int
 glcpp_parser_lex (glcpp_parser_t *parser)
 {
-	expansion_node_t *expansion;
-	token_node_t *replacements;
-	int parameter_index;
-	const char *token;
-	token_class_t class;
-
-    /* Who says C can't do efficient tail recursion? */
-    RECURSE:
-
-	expansion = parser->expansions;
+	token_node_t *node;
+	int ret;
+
+	if (parser->lex_from_list == NULL) {
+		ret = glcpp_lex (parser->scanner);
+
+		/* XXX: This ugly block of code exists for the sole
+		 * purpose of converting a NEWLINE token into a SPACE
+		 * token, but only in the case where we have seen a
+		 * function-like macro name, but have not yet seen its
+		 * closing parenthesis.
+		 *
+		 * There's perhaps a more compact way to do this with
+		 * mid-rule actions in the grammar.
+		 *
+		 * I'm definitely not pleased with the complexity of
+		 * this code here.
+		 */
+		if (parser->newline_as_space)
+		{
+			if (ret == '(') {
+				parser->paren_count++;
+			} else if (ret == ')') {
+				parser->paren_count--;
+				if (parser->paren_count == 0)
+					parser->newline_as_space = 0;
+			} else if (ret == NEWLINE) {
+				ret = SPACE;
+			} else if (ret != SPACE) {
+				if (parser->paren_count == 0)
+					parser->newline_as_space = 0;
+			}
+		}
+		else if (parser->in_control_line)
+		{
+			if (ret == NEWLINE)
+				parser->in_control_line = 0;
+		}
+		else if (ret == HASH_DEFINE_OBJ || ret == HASH_DEFINE_FUNC ||
+			   ret == HASH_UNDEF || ret == HASH_IF ||
+			   ret == HASH_IFDEF || ret == HASH_IFNDEF ||
+			   ret == HASH_ELIF || ret == HASH_ELSE ||
+			   ret == HASH_ENDIF || ret == HASH)
+		{
+			parser->in_control_line = 1;
+		}
+		else if (ret == IDENTIFIER)
+		{
+			macro_t *macro;
+			macro = hash_table_find (parser->defines,
+						 yylval.str);
+			if (macro && macro->is_function) {
+				parser->newline_as_space = 1;
+				parser->paren_count = 0;
+			}
+		}
 
-	if (expansion == NULL)
-		return glcpp_lex (parser->scanner);
+		return ret;
+	}
 
-	replacements = expansion->replacements;
+	node = parser->lex_from_node;
 
-	/* Pop expansion when replacements is exhausted. */
-	if (replacements == NULL) {
-		glcpp_parser_pop_expansion (parser);
-		goto RECURSE;
+	if (node == NULL) {
+		talloc_free (parser->lex_from_list);
+		parser->lex_from_list = NULL;
+		return NEWLINE;
 	}
 
-	expansion->replacements = replacements->next;
-
-	token = replacements->value;
+	yylval = node->token->value;
+	ret = node->token->type;
 
-	/* Implement token pasting. */
-	if (replacements->next && strcmp (replacements->next->value, "##") == 0) {
-		token_node_t *next_node;
+	parser->lex_from_node = node->next;
 
-		next_node = replacements->next->next;
+	return ret;
+}
 
-		if (next_node == NULL) {
-			fprintf (stderr, "Error: '##' cannot appear at the end of a macro expansion.\n");
-			exit (1);
-		}
+static void
+glcpp_parser_lex_from (glcpp_parser_t *parser, token_list_t *list)
+{
+	token_node_t *node;
 
-		token = xtalloc_asprintf (parser, "%s%s",
-					  token, next_node->value);
-		expansion->replacements = next_node->next;
-	}
+	assert (parser->lex_from_list == NULL);
 
+	/* Copy list, eliminating any space tokens. */
+	parser->lex_from_list = _token_list_create (parser);
 
-	if (strcmp (token, "(") == 0)
-		return '(';
-	else if (strcmp (token, ")") == 0)
-		return ')';
+	for (node = list->head; node; node = node->next) {
+		if (node->token->type == SPACE)
+			continue;
+		_token_list_append (parser->lex_from_list, node->token);
+	}
 
-	yylval.str = xtalloc_strdup (parser, token);
+	talloc_free (list);
 
-	/* Carefully refuse to expand any finalized identifier. */
-	if (replacements->type == IDENTIFIER_FINALIZED)
-		return IDENTIFIER_FINALIZED;
+	parser->lex_from_node = parser->lex_from_list->head;
 
-	switch (glcpp_parser_classify_token (parser, yylval.str,
-					     &parameter_index))
-	{
-	case TOKEN_CLASS_IDENTIFIER:
-		return IDENTIFIER;
-		break;
-	case TOKEN_CLASS_IDENTIFIER_FINALIZED:
-		return IDENTIFIER_FINALIZED;
-		break;
-	case TOKEN_CLASS_FUNC_MACRO:
-		return FUNC_MACRO;
-		break;
-	default:
-	case TOKEN_CLASS_OBJ_MACRO:
-		return OBJ_MACRO;
-		break;
+	/* It's possible the list consisted of nothing but whitespace. */
+	if (parser->lex_from_node == NULL) {
+		talloc_free (parser->lex_from_list);
+		parser->lex_from_list = NULL;
 	}
 }
 
@@ -1002,7 +1532,7 @@ _glcpp_parser_skip_stack_change_if (glcpp_parser_t *parser, const char *type,
 		parser->skip_stack->type = SKIP_TO_ENDIF;
 	}
 }
-			
+
 static void
 _glcpp_parser_skip_stack_pop (glcpp_parser_t *parser)
 {
diff --git a/glcpp.h b/glcpp.h
index 503731b..5c8c304 100644
--- a/glcpp.h
+++ b/glcpp.h
@@ -44,21 +44,36 @@ typedef struct string_list {
 	string_node_t *tail;
 } string_list_t;
 
-typedef struct token {
+typedef struct token token_t;
+typedef struct token_list token_list_t;
+
+typedef union YYSTYPE
+{
+	intmax_t ival;
+	char *str;
+	string_list_t *string_list;
+	token_t *token;
+	token_list_t *token_list;
+} YYSTYPE;
+
+# define YYSTYPE_IS_TRIVIAL 1
+# define YYSTYPE_IS_DECLARED 1
+
+struct token {
 	int type;
-	char *value;
-} token_t;
+	YYSTYPE value;
+};
 
 typedef struct token_node {
-	int type;
-	const char *value;
+	token_t *token;
 	struct token_node *next;
 } token_node_t;
 
-typedef struct token_list {
+struct token_list {
 	token_node_t *head;
 	token_node_t *tail;
-} token_list_t;
+	token_node_t *non_space_tail;
+};
 
 typedef struct argument_node {
 	token_list_t *argument;
@@ -111,16 +126,16 @@ typedef struct skip_node {
 struct glcpp_parser {
 	yyscan_t scanner;
 	struct hash_table *defines;
-	expansion_node_t *expansions;
-	int just_printed_separator;
-	int need_newline;
+	string_list_t *active;
+	int space_tokens;
+	int newline_as_space;
+	int in_control_line;
+	int paren_count;
 	skip_node_t *skip_stack;
+	token_list_t *lex_from_list;
+	token_node_t *lex_from_node;
 };
 
-void
-glcpp_parser_push_expansion_argument (glcpp_parser_t *parser,
-				      int argument_index);
-
 glcpp_parser_t *
 glcpp_parser_create (void);
 
@@ -164,4 +179,10 @@ xtalloc_strndup (const void *t, const char *p, size_t n);
 char *
 xtalloc_asprintf (const void *t, const char *fmt, ...);
 
+void *
+_xtalloc_reference_loc (const void *context,
+			const void *ptr, const char *location);
+
+#define xtalloc_reference(ctx, ptr) (_TALLOC_TYPEOF(ptr))_xtalloc_reference_loc((ctx),(ptr), __location__)
+
 #endif
diff --git a/tests/glcpp-test b/tests/glcpp-test
index 022a236..ba398af 100755
--- a/tests/glcpp-test
+++ b/tests/glcpp-test
@@ -2,8 +2,9 @@
 
 for test in *.c; do
     echo "Testing $test"
-    ../glcpp < $test > $test.out
+    ../glcpp < $test > $test.glcpp
+    grep -v '^$' < $test.glcpp > $test.out || true
     gcc -E $test -o $test.gcc
-    grep -v '^#' < $test.gcc > $test.expected
-    diff -B -u $test.expected $test.out
+    grep -v '^#' < $test.gcc | grep -v '^$' > $test.expected || true
+    diff -u $test.expected $test.out
 done
diff --git a/xtalloc.c b/xtalloc.c
index e52d12a..656ac2d 100644
--- a/xtalloc.c
+++ b/xtalloc.c
@@ -82,3 +82,18 @@ xtalloc_asprintf (const void *t, const char *fmt, ...)
 	va_end(ap);
 	return ret;
 }
+
+void *
+_xtalloc_reference_loc (const void *context,
+			const void *ptr, const char *location)
+{
+	void *ret;
+
+	ret = _talloc_reference_loc (context, ptr, location);
+	if (ret == NULL) {
+		fprintf (stderr, "Out of memory.\n");
+		exit (1);
+	}
+
+	return ret;
+}
author	Carl Worth <cworth@cworth.org>	2010-05-29 06:03:32 -0700
committer	Carl Worth <cworth@cworth.org>	2010-05-29 06:03:40 -0700
commit	96d3994881832201db7edd8a0a6f4b34655649d3 (patch)
tree	2f599ca455e3fa16e0488516708e9871f07bbdb9
parent	ae3fb09cd20fc189d68f0c2a63cc74dd584d7ee1 (diff)
parent	75ef1c75dd47a0b4054a767fd94f7c3cf68d2331 (diff)