// can get production from nonterminal and terminal as usual
 public ISymbol[] Get(Nonterminal var, Terminal term)
 {
     if (!table.ContainsKey(var))
         return null;
     if (!table[var].ContainsKey(term))
         return null;
     return table[var][term];
 }
        public Parser(CFG grammar, Terminal syncTerm)
        {
            this.grammar = grammar;
            this.table = grammar.CreateLL1ParseTable();
            this.start = grammar.StartSymbol;
            this.syncTerm = syncTerm;

            if (table == null)
                throw new Exception("GRAMMAR NOT LL(1)");
        }
 // Add a production for some (nonterminal, terminal) pair to the table
 public void Add(Nonterminal var, Terminal term, ISymbol[] production)
 {
     if (!table.ContainsKey(var))
         table[var] = new Dictionary<Terminal, ISymbol[]>();
     table[var][term] = production;
 }
        private MiniPL()
        {
            // Create NFA-type things for tokens using regular operations
            Regex reBlockCommentStart = Regex.Concat("/*");
            Regex reBlockCommentEnd = Regex.Concat("*/");

            Regex reLineComment = Regex.Concat("//").Concat(Regex.Not('\n').Star());

            Regex reWhitespace = Regex.Union(" \t\r\n").Star().Union(reLineComment);

            Regex reString = Regex.Char('"').Concat(Regex.Char('\\').Concat(Regex.Any()).Union(Regex.Not('"', '\\')).Star()).Concat(Regex.Char('"'));
            Regex reBinaryOperator = Regex.Union("+-*/<=&");
            Regex reUnaryOperator = Regex.Char('!');
            Regex reKeyword = Regex.Union(Regex.Concat("var"), Regex.Concat("for"), Regex.Concat("end"), Regex.Concat("in"),
                                          Regex.Concat("do"), Regex.Concat("read"), Regex.Concat("print"), Regex.Concat("assert"));
            Regex reType = Regex.Union(Regex.Concat("bool"), Regex.Concat("int"), Regex.Concat("string"));

            Regex reParenRight = Regex.Char(')'),
                  reParenLeft = Regex.Char('('),
                  reColon = Regex.Char(':'),
                  reSemicolon = Regex.Char(';'),
                  reAssignment = Regex.Concat(":="),
                  reDots = Regex.Concat("..");

            Regex reIdentifier = Regex.Union(Regex.Range('A', 'Z'), Regex.Range('a', 'z'))
                                      .Concat(Regex.Union(Regex.Range('A', 'Z'), Regex.Range('a', 'z'), Regex.Range('0', '9'), Regex.Char('_')).Star());
            Regex reInteger = Regex.Range('0', '9').Plus();

            // Define token types
            tokenTypes["block_comment_start"] = new TokenType("block_comment_start", reBlockCommentStart);
            tokenTypes["block_comment_end"] = new TokenType("block_comment_end", reBlockCommentEnd);
            tokenTypes["int"] = new TokenType("int", reInteger);
            tokenTypes["whitespace"] = new TokenType("whitespace", reWhitespace, priority: TokenType.Priority.Whitespace);
            tokenTypes["string"] = new TokenType("string", reString);
            tokenTypes["binary_op"] = new TokenType("binary op", reBinaryOperator);
            tokenTypes["unary_op"] = new TokenType("unary op", reUnaryOperator);
            tokenTypes["keyword"] = new TokenType("keyword", reKeyword, priority: TokenType.Priority.Keyword);
            tokenTypes["type"] = new TokenType("type", reType, priority: TokenType.Priority.Keyword);
            tokenTypes["left_paren"] = new TokenType("left paren", reParenLeft);
            tokenTypes["right_paren"] = new TokenType("right paren", reParenRight);
            tokenTypes["colon"] = new TokenType("colon", reColon);
            tokenTypes["semicolon"] = new TokenType("semicolon", reSemicolon);
            tokenTypes["assignment"] = new TokenType("assignment", reAssignment);
            tokenTypes["dots"] = new TokenType("dots", reDots);
            tokenTypes["identifier"] = new TokenType("identifier", reIdentifier);

            // create combined automaton and scanner object
            TokenAutomaton automaton = TokenType.CombinedAutomaton(tokenTypes.Values.ToArray());
            scanner = new Scanner(automaton, tokenTypes["block_comment_start"], tokenTypes["block_comment_end"]);

            // Define nonterminal variables of CFG
            nonterminals["program"] = new Nonterminal("PROG");
            nonterminals["statements"] = new Nonterminal("STMTS");
            nonterminals["statements_head"] = new Nonterminal("STMTS_HEAD");
            nonterminals["statements_tail"] = new Nonterminal("STMTS_TAIL");
            nonterminals["statement"] = new Nonterminal("STMT");
            nonterminals["declaration"] = new Nonterminal("DECL");
            nonterminals["declaration_assignment"] = new Nonterminal("DECL_ASSIGN");
            nonterminals["expression"] = new Nonterminal("EXPR");
            nonterminals["unary_operation"] = new Nonterminal("UNARY_OP");
            nonterminals["binary_operation"] = new Nonterminal("BINARY_OP");
            nonterminals["operand"] = new Nonterminal("OPND");

            // Define terminal variables of CFG
            terminals["identifier"] = new Terminal(tokenTypes["identifier"]);
            terminals["assert"] = new Terminal("assert");
            terminals["print"] = new Terminal("print");
            terminals["read"] = new Terminal("read");
            terminals["for"] = new Terminal("for");
            terminals["in"] = new Terminal("in");
            terminals["end"] = new Terminal("end");
            terminals["do"] = new Terminal("do");
            terminals["var"] = new Terminal("var");
            terminals["type"] = new Terminal(tokenTypes["type"]);
            terminals["string"] = new Terminal(tokenTypes["string"]);
            terminals["int"] = new Terminal(tokenTypes["int"]);
            terminals[")"] = new Terminal(")");
            terminals["("] = new Terminal("(");
            terminals[".."] = new Terminal("..");
            terminals[":="] = new Terminal(":=");
            terminals[":"] = new Terminal(":");
            terminals[";"] = new Terminal(";");
            terminals["binary_operator"] = new Terminal(tokenTypes["binary_op"]);
            terminals["unary_operator"] = new Terminal(tokenTypes["unary_op"]);

            // Create the Mini-PL grammar
            grammar = new CFG(nonterminals["program"], terminals.Values, nonterminals.Values);

            // define production rules for the grammar
            grammar.AddProductionRule(nonterminals["program"], new ISymbol[] { nonterminals["statements"] });
            grammar.AddProductionRule(nonterminals["statements"], new ISymbol[] { nonterminals["statements_head"], nonterminals["statements_tail"] });
            grammar.AddProductionRule(nonterminals["statements_head"], new ISymbol[] { nonterminals["statement"], terminals[";"] });
            grammar.AddProductionRule(nonterminals["statements_tail"], new ISymbol[] { nonterminals["statements_head"], nonterminals["statements_tail"] });
            grammar.AddProductionRule(nonterminals["statements_tail"], new ISymbol[] { Terminal.EPSILON });

            grammar.AddProductionRule(nonterminals["statement"], new ISymbol[] { nonterminals["declaration"] });
            grammar.AddProductionRule(nonterminals["statement"], new ISymbol[] { terminals["identifier"], terminals[":="], nonterminals["expression"] });
            grammar.AddProductionRule(nonterminals["statement"], new ISymbol[] { terminals["for"], terminals["identifier"], terminals["in"], nonterminals["expression"], terminals[".."], nonterminals["expression"], terminals["do"],
                                                                                   nonterminals["statements"], terminals["end"], terminals["for"] });
            grammar.AddProductionRule(nonterminals["statement"], new ISymbol[] { terminals["read"], terminals["identifier"] });
            grammar.AddProductionRule(nonterminals["statement"], new ISymbol[] { terminals["print"], nonterminals["expression"] });
            grammar.AddProductionRule(nonterminals["statement"], new ISymbol[] { terminals["assert"], terminals["("], nonterminals["expression"], terminals[")"] });

            grammar.AddProductionRule(nonterminals["declaration"], new ISymbol[] { terminals["var"], terminals["identifier"], terminals[":"], terminals["type"], nonterminals["declaration_assignment"] });
            grammar.AddProductionRule(nonterminals["declaration_assignment"], new ISymbol[] { terminals[":="], nonterminals["expression"] });
            grammar.AddProductionRule(nonterminals["declaration_assignment"], new ISymbol[] { Terminal.EPSILON });

            grammar.AddProductionRule(nonterminals["expression"], new ISymbol[] { nonterminals["unary_operation"] });
            grammar.AddProductionRule(nonterminals["expression"], new ISymbol[] { nonterminals["operand"], nonterminals["binary_operation"] });

            grammar.AddProductionRule(nonterminals["unary_operation"], new ISymbol[] { terminals["unary_operator"], nonterminals["operand"] });

            grammar.AddProductionRule(nonterminals["binary_operation"], new ISymbol[] { terminals["binary_operator"], nonterminals["operand"] });
            grammar.AddProductionRule(nonterminals["binary_operation"], new ISymbol[] { Terminal.EPSILON });

            grammar.AddProductionRule(nonterminals["operand"], new ISymbol[] { terminals["int"] });
            grammar.AddProductionRule(nonterminals["operand"], new ISymbol[] { terminals["string"] });
            grammar.AddProductionRule(nonterminals["operand"], new ISymbol[] { terminals["identifier"] });
            grammar.AddProductionRule(nonterminals["operand"], new ISymbol[] { terminals["("], nonterminals["expression"], terminals[")"] });

            // use ; as synchronizing token for Mini-PL
            parser = new Parser(grammar, terminals[";"]);
        }
 public ParseLeaf(Terminal terminal)
 {
     this.terminal = terminal;
 }
Exemple #6
0
        // Parse the given stream of tokens
        public ParseTree Parse(IEnumerable <Token> tokenSource)
        {
            isValidParseTree = true;
            errors           = new List <Error>();

            Stack <ISymbol> symbolStack = new Stack <ISymbol>();

            symbolStack.Push(Terminal.EOF);
            symbolStack.Push(start);

            ParseTree          parseTree = new ParseTree(start);
            Stack <IParseNode> treeStack = new Stack <IParseNode>();

            treeStack.Push(new ParseLeaf(Terminal.EOF));
            treeStack.Push(parseTree);

            IEnumerator <Token> tokenStream = tokenSource.GetEnumerator();

            tokenStream.MoveNext();

            while (symbolStack.Count > 0)
            {
                if (Program.debug)
                {
                    Console.WriteLine("=========================================================");
                    Console.WriteLine("  PARSE: Stack " + SymbolsToString(symbolStack));
                    Console.WriteLine("  PARSE: expecting " + symbolStack.Peek());
                    Console.WriteLine("  PARSE: token " + tokenStream.Current);
                }

                // ignore error tokens
                if (tokenStream.Current.Type == TokenType.ERROR)
                {
                    if (Program.debug)
                    {
                        Console.WriteLine("  PARSE: skipping error token");
                    }
                    errors.Add(new LexicalError(tokenStream.Current));
                    tokenStream.MoveNext();
                    continue;
                }

                if (symbolStack.Peek() is Terminal)
                {
                    Terminal  term = symbolStack.Peek() as Terminal;
                    ParseLeaf leaf = treeStack.Peek() as ParseLeaf;

                    if (term == Terminal.EPSILON)
                    {
                        // epsilon production was used, exclude from parse tree
                        if (Program.debug)
                        {
                            Console.WriteLine("  PARSE: ignore epsilon");
                        }
                        symbolStack.Pop();
                        treeStack.Pop();
                    }
                    else if (term.Matches(tokenStream.Current))
                    {
                        // current token matches the top of the parse stack, add it to parse tree
                        if (Program.debug)
                        {
                            Console.WriteLine("  PARSE: Terminal match");
                        }
                        leaf.Token = tokenStream.Current;
                        tokenStream.MoveNext();
                        symbolStack.Pop();
                        treeStack.Pop();
                    }
                    else
                    {
                        // current token does no match, recover from error
                        if (Program.debug)
                        {
                            Console.WriteLine("  PARSE: Error, Terminal mismatch");
                        }
                        errors.Add(new SyntaxError(tokenStream.Current));
                        Synchronize(symbolStack, treeStack, tokenStream);
                    }
                }
                else // top of stack is a nonterminal
                {
                    Nonterminal var     = symbolStack.Pop() as Nonterminal;
                    IParseNode  popped  = treeStack.Pop();
                    ParseTree   subtree = popped as ParseTree;

                    ISymbol[] production = table.Get(var, tokenStream.Current);

                    if (production == null)
                    {
                        // cannot derive the current token from the nonterminal at the top of the stack
                        if (Program.debug)
                        {
                            Console.WriteLine("  PARSE: Error, No such production");
                        }

                        symbolStack.Push(var);
                        treeStack.Push(popped);

                        errors.Add(new SyntaxError(tokenStream.Current));
                        Synchronize(symbolStack, treeStack, tokenStream);
                    }
                    else
                    {
                        // use the production specified by the parse table, add node to parse tree
                        if (Program.debug)
                        {
                            Console.WriteLine("  PARSE: Using production " + SymbolsToString(production));
                        }

                        for (int i = production.Length - 1; i >= 0; i--)
                        {
                            IParseNode treeChild;
                            if (production[i] is Terminal)
                            {
                                treeChild = new ParseLeaf(production[i] as Terminal);
                            }
                            else
                            {
                                treeChild = new ParseTree(production[i] as Nonterminal);
                            }
                            subtree.Children.Insert(0, treeChild);
                            treeStack.Push(treeChild);
                            symbolStack.Push(production[i]);
                        }
                    }
                }
            }

            if (Program.debug)
            {
                Console.WriteLine(parseTree);
            }

            return(parseTree);
        }
Exemple #7
0
        // Uses phrase-level error recovery with First and Follow sets to recover from bad state
        private void Synchronize(Stack <ISymbol> symbolStack, Stack <IParseNode> treeStack, IEnumerator <Token> tokenStream)
        {
            // Loop until good state found (return statements) or no more symbols on stack.
            sync : while (symbolStack.Count > 0)
            {
                if (Program.debug)
                {
                    Console.WriteLine("  PARSE: Synchronize token " + tokenStream.Current + " symbol " + symbolStack.Peek());
                }

                // no recovery from end of file, empty the parse stack
                if (tokenStream.Current.Type == TokenType.EOF)
                {
                    while (symbolStack.Count > 0)
                    {
                        isValidParseTree = false;
                        symbolStack.Pop();
                        treeStack.Pop();
                    }
                    if (Program.debug)
                    {
                        Console.WriteLine("PARSE: Unexpected EOF");
                    }
                    return;
                }

                if (symbolStack.Peek() is Terminal)
                {
                    Terminal t = symbolStack.Peek() as Terminal;
                    if (t.Matches(tokenStream.Current))
                    {
                        // good state reached
                        if (Program.debug)
                        {
                            Console.WriteLine("  PARSE: Token matches");
                        }
                        return;
                    }

                    if (syncTerm.Matches(tokenStream.Current))
                    {
                        // special case: we have a synchronising token like ';' that we do not want to skip over
                        // so remove symbol from parse stack
                        if (Program.debug)
                        {
                            Console.WriteLine("  PARSE: Discarding symbol " + symbolStack.Peek());
                        }
                        isValidParseTree = false;
                        symbolStack.Pop();
                        treeStack.Pop();
                        continue;
                    }

                    // normal case: discard token and continue with recovery
                    if (Program.debug)
                    {
                        Console.WriteLine("  PARSE: Discard token");
                    }
                    if (!tokenStream.MoveNext())
                    {
                        throw new Exception("OUT OF TOKENS");
                    }
                    continue;
                }
                else
                {
                    Nonterminal v = symbolStack.Peek() as Nonterminal;
                    if (table.Get(v, tokenStream.Current) != null)
                    {
                        // good state reached (current token in First set of symbol at top of stack)
                        if (Program.debug)
                        {
                            Console.WriteLine("  PARSE: Valid production exists");
                        }
                        return;
                    }

                    // if current terminal could be matched by something in Follow set of
                    // symbol at top of stack, then skip the current symbol
                    foreach (ISymbol sym in grammar.Follow(v))
                    {
                        if (sym is Terminal)
                        {
                            Terminal followTerm = sym as Terminal;
                            if (followTerm.Matches(tokenStream.Current))
                            {
                                if (Program.debug)
                                {
                                    Console.WriteLine("  PARSE: Discarding symbol " + symbolStack.Peek());
                                }
                                isValidParseTree = false;
                                symbolStack.Pop();
                                treeStack.Pop();
                                goto sync;
                            }
                        }
                        if (sym is Nonterminal)
                        {
                            Nonterminal followVar = sym as Nonterminal;
                            if (table.Get(followVar, tokenStream.Current) != null)
                            {
                                if (Program.debug)
                                {
                                    Console.WriteLine("  PARSE: Discarding symbol " + symbolStack.Peek());
                                }
                                isValidParseTree = false;
                                symbolStack.Pop();
                                treeStack.Pop();
                                goto sync;
                            }
                        }
                    }

                    // default action: skip current terminal
                    if (Program.debug)
                    {
                        Console.WriteLine("  PARSE: Discard token");
                    }
                    if (!tokenStream.MoveNext())
                    {
                        throw new Exception("OUT OF TOKENS");
                    }
                }
            }
        }