public Runnable ProcessParseTree(ParseTree parseTree, IEnumerable<Error> parseErrors, bool isValidParseTree) { Runnable prog = new Runnable(); foreach (Error err in parseErrors) { prog.errors.Add(err); } // can't construct AST if parse tree is bad if (!isValidParseTree) return prog; // first remove unnecessary symbols ; : .. ( ) := and epsilons String[] pruneTokens = { "(", ")", ";", ":", "..", ":=", "var", "in", "for", "end", "do" }; Predicate<IParseNode> isUnnecessaryTerminal = n => (n is ParseLeaf) ? (n as ParseLeaf).Token == null || pruneTokens.Contains((n as ParseLeaf).Token.Lexeme) : false; parseTree.RemoveNodes(isUnnecessaryTerminal); // remove any tree nodes with no children Predicate<IParseNode> isEmptyNonterminal = v => (v is ParseTree) ? (v as ParseTree).Children.Count == 0 : false; parseTree.RemoveNodes(isEmptyNonterminal); // refactor // STMTS->STMTS_HEAD STMTS_TAIL to STMTS->(STMT)+ // DECL->"var" <IDENT> ":" <TYPE> ASSIGN to DECL->"var" <IDENT> ":" <TYPE> [":=" <EXPR>] // EXPR->UNARY|OPND BINARY to EXPR-> unary_op OPND | OPND | OPND binary_op OPND // OPND-><INT>|<STRING>|<IDENT>|<EXPR> to just <INT>|<STRING>|<IDENT>|<EXPR> Nonterminal[] pruneVariables = new Nonterminal[] { nonterminals["statements_head"], nonterminals["statements_tail"], nonterminals["unary_operation"], nonterminals["binary_operation"], nonterminals["declaration_assignment"], nonterminals["operand"] }; Predicate<IParseNode> isUnnecessaryNonterminal = n => (n is ParseTree) ? pruneVariables.Contains((n as ParseTree).Nonterminal) : false; parseTree.RemoveNodes(isUnnecessaryNonterminal); if (Program.debug) Console.WriteLine(parseTree); // AST is formed at this point, so do semantic checks // find declarations, produce errors if identifier declared multiple times foreach (IParseNode node in parseTree.Nodes()) { if (node is ParseTree) { ParseTree subtree = node as ParseTree; if (subtree.Nonterminal == nonterminals["declaration"]) { ParseLeaf idLeaf = (subtree.Children[0] as ParseLeaf); ParseLeaf typeLeaf = (subtree.Children[1] as ParseLeaf); Token idToken = idLeaf.Token; Token typeToken = typeLeaf.Token; string identifier = idToken.Lexeme; ValueType type = Value.TypeFromString(typeToken.Lexeme); Statement.DeclarationStmt declaration; switch (subtree.Children.Count) { case 2: // simple declaration declaration = new Statement.DeclarationStmt(identifier, type, idToken); break; case 3: // declaration with assignment ParseLeaf valueLeaf = (subtree.Children[2] as ParseLeaf); Expression expr = Expression.FromTreeNode(subtree.Children[2], terminals, nonterminals); declaration = new Statement.DeclarationStmt(identifier, type, idToken, expr); break; default: throw new Exception("BAD AST STRUCTURE"); } if (prog.declarations.ContainsKey(identifier)) prog.errors.Add(new SemanticError(idToken, identifier + " multiply defined")); else prog.declarations[identifier] = declaration; } } } // check that variables are defined before use foreach (IParseNode node in parseTree.Nodes()) { if (node is ParseLeaf) { ParseLeaf leaf = node as ParseLeaf; Token leafToken = leaf.Token; if (leafToken.Type == tokenTypes["identifier"]) { string identifier = leafToken.Lexeme; Position idPosition = leafToken.TextPosition; if (!prog.declarations.ContainsKey(identifier)) prog.errors.Add(new SemanticError(leafToken, identifier + " never defined")); else if (idPosition.CompareTo(prog.declarations[identifier].Token.TextPosition) < 0) prog.errors.Add(new SemanticError(leafToken, identifier + " not defined before use")); } } } // add statements to runnable ParseTree statementListNode = parseTree.Children[0] as ParseTree; foreach (IParseNode statementNode in statementListNode.Children) prog.statements.Add(Statement.FromTreeNode(statementNode, terminals, nonterminals)); // check that for-loop control variables are not modified inside the for-loop foreach (Statement stmt in prog.statements) { if (stmt is Statement.ForStmt) { Statement.ForStmt forStmt = stmt as Statement.ForStmt; Stack<Statement> stmtStack = new Stack<Statement>(); foreach (Statement substmt in forStmt.Block) stmtStack.Push(substmt); while (stmtStack.Count != 0) { Statement s = stmtStack.Pop(); if (s is Statement.AssignStmt) { Statement.AssignStmt assignment = s as Statement.AssignStmt; if (assignment.Identifier == forStmt.Identifier) prog.errors.Add(new SemanticError(assignment.Token, forStmt.Identifier + " cannot be modified inside for-loop")); } else if (s is Statement.DeclarationStmt) { Statement.DeclarationStmt declaration = s as Statement.DeclarationStmt; if (declaration.Identifier == forStmt.Identifier) prog.errors.Add(new SemanticError(declaration.Token, forStmt.Identifier + " cannot be modified inside for-loop")); } else if (s is Statement.ForStmt) { Statement.ForStmt nestedFor = s as Statement.ForStmt; if (nestedFor.Identifier == forStmt.Identifier) prog.errors.Add(new SemanticError(nestedFor.Token, forStmt.Identifier + " cannot be modified inside for-loop")); foreach (Statement substmt in nestedFor.Block) stmtStack.Push(substmt); } } } } // typecheck each statement foreach (Statement stmt in prog.statements) stmt.TypeCheck(prog); return prog; }
// Parse the given stream of tokens public ParseTree Parse(IEnumerable<Token> tokenSource) { isValidParseTree = true; errors = new List<Error>(); Stack<ISymbol> symbolStack = new Stack<ISymbol>(); symbolStack.Push(Terminal.EOF); symbolStack.Push(start); ParseTree parseTree = new ParseTree(start); Stack<IParseNode> treeStack = new Stack<IParseNode>(); treeStack.Push(new ParseLeaf(Terminal.EOF)); treeStack.Push(parseTree); IEnumerator<Token> tokenStream = tokenSource.GetEnumerator(); tokenStream.MoveNext(); while (symbolStack.Count > 0) { if (Program.debug) { Console.WriteLine("========================================================="); Console.WriteLine(" PARSE: Stack " + SymbolsToString(symbolStack)); Console.WriteLine(" PARSE: expecting " + symbolStack.Peek()); Console.WriteLine(" PARSE: token " + tokenStream.Current); } // ignore error tokens if (tokenStream.Current.Type == TokenType.ERROR) { if (Program.debug) Console.WriteLine(" PARSE: skipping error token"); errors.Add(new LexicalError(tokenStream.Current)); tokenStream.MoveNext(); continue; } if (symbolStack.Peek() is Terminal) { Terminal term = symbolStack.Peek() as Terminal; ParseLeaf leaf = treeStack.Peek() as ParseLeaf; if (term == Terminal.EPSILON) { // epsilon production was used, exclude from parse tree if (Program.debug) Console.WriteLine(" PARSE: ignore epsilon"); symbolStack.Pop(); treeStack.Pop(); } else if (term.Matches(tokenStream.Current)) { // current token matches the top of the parse stack, add it to parse tree if (Program.debug) Console.WriteLine(" PARSE: Terminal match"); leaf.Token = tokenStream.Current; tokenStream.MoveNext(); symbolStack.Pop(); treeStack.Pop(); } else { // current token does no match, recover from error if (Program.debug) Console.WriteLine(" PARSE: Error, Terminal mismatch"); errors.Add(new SyntaxError(tokenStream.Current)); Synchronize(symbolStack, treeStack, tokenStream); } } else // top of stack is a nonterminal { Nonterminal var = symbolStack.Pop() as Nonterminal; IParseNode popped = treeStack.Pop(); ParseTree subtree = popped as ParseTree; ISymbol[] production = table.Get(var, tokenStream.Current); if (production == null) { // cannot derive the current token from the nonterminal at the top of the stack if (Program.debug) Console.WriteLine(" PARSE: Error, No such production"); symbolStack.Push(var); treeStack.Push(popped); errors.Add(new SyntaxError(tokenStream.Current)); Synchronize(symbolStack, treeStack, tokenStream); } else { // use the production specified by the parse table, add node to parse tree if (Program.debug) Console.WriteLine(" PARSE: Using production " + SymbolsToString(production)); for (int i = production.Length - 1; i >= 0; i--) { IParseNode treeChild; if (production[i] is Terminal) treeChild = new ParseLeaf(production[i] as Terminal); else treeChild = new ParseTree(production[i] as Nonterminal); subtree.Children.Insert(0, treeChild); treeStack.Push(treeChild); symbolStack.Push(production[i]); } } } } if (Program.debug) Console.WriteLine(parseTree); return parseTree; }
// Parse the given stream of tokens public ParseTree Parse(IEnumerable <Token> tokenSource) { isValidParseTree = true; errors = new List <Error>(); Stack <ISymbol> symbolStack = new Stack <ISymbol>(); symbolStack.Push(Terminal.EOF); symbolStack.Push(start); ParseTree parseTree = new ParseTree(start); Stack <IParseNode> treeStack = new Stack <IParseNode>(); treeStack.Push(new ParseLeaf(Terminal.EOF)); treeStack.Push(parseTree); IEnumerator <Token> tokenStream = tokenSource.GetEnumerator(); tokenStream.MoveNext(); while (symbolStack.Count > 0) { if (Program.debug) { Console.WriteLine("========================================================="); Console.WriteLine(" PARSE: Stack " + SymbolsToString(symbolStack)); Console.WriteLine(" PARSE: expecting " + symbolStack.Peek()); Console.WriteLine(" PARSE: token " + tokenStream.Current); } // ignore error tokens if (tokenStream.Current.Type == TokenType.ERROR) { if (Program.debug) { Console.WriteLine(" PARSE: skipping error token"); } errors.Add(new LexicalError(tokenStream.Current)); tokenStream.MoveNext(); continue; } if (symbolStack.Peek() is Terminal) { Terminal term = symbolStack.Peek() as Terminal; ParseLeaf leaf = treeStack.Peek() as ParseLeaf; if (term == Terminal.EPSILON) { // epsilon production was used, exclude from parse tree if (Program.debug) { Console.WriteLine(" PARSE: ignore epsilon"); } symbolStack.Pop(); treeStack.Pop(); } else if (term.Matches(tokenStream.Current)) { // current token matches the top of the parse stack, add it to parse tree if (Program.debug) { Console.WriteLine(" PARSE: Terminal match"); } leaf.Token = tokenStream.Current; tokenStream.MoveNext(); symbolStack.Pop(); treeStack.Pop(); } else { // current token does no match, recover from error if (Program.debug) { Console.WriteLine(" PARSE: Error, Terminal mismatch"); } errors.Add(new SyntaxError(tokenStream.Current)); Synchronize(symbolStack, treeStack, tokenStream); } } else // top of stack is a nonterminal { Nonterminal var = symbolStack.Pop() as Nonterminal; IParseNode popped = treeStack.Pop(); ParseTree subtree = popped as ParseTree; ISymbol[] production = table.Get(var, tokenStream.Current); if (production == null) { // cannot derive the current token from the nonterminal at the top of the stack if (Program.debug) { Console.WriteLine(" PARSE: Error, No such production"); } symbolStack.Push(var); treeStack.Push(popped); errors.Add(new SyntaxError(tokenStream.Current)); Synchronize(symbolStack, treeStack, tokenStream); } else { // use the production specified by the parse table, add node to parse tree if (Program.debug) { Console.WriteLine(" PARSE: Using production " + SymbolsToString(production)); } for (int i = production.Length - 1; i >= 0; i--) { IParseNode treeChild; if (production[i] is Terminal) { treeChild = new ParseLeaf(production[i] as Terminal); } else { treeChild = new ParseTree(production[i] as Nonterminal); } subtree.Children.Insert(0, treeChild); treeStack.Push(treeChild); symbolStack.Push(production[i]); } } } } if (Program.debug) { Console.WriteLine(parseTree); } return(parseTree); }