Example #1
0
        public Parser(CFG grammar, Terminal syncTerm)
        {
            this.grammar = grammar;
            this.table = grammar.CreateLL1ParseTable();
            this.start = grammar.StartSymbol;
            this.syncTerm = syncTerm;

            if (table == null)
                throw new Exception("GRAMMAR NOT LL(1)");
        }
Example #2
0
        public Parser(CFG grammar, Terminal syncTerm)
        {
            this.grammar  = grammar;
            this.table    = grammar.CreateLL1ParseTable();
            this.start    = grammar.StartSymbol;
            this.syncTerm = syncTerm;

            if (table == null)
            {
                throw new Exception("GRAMMAR NOT LL(1)");
            }
        }
Example #3
0
        private MiniPL()
        {
            // Create NFA-type things for tokens using regular operations
            Regex reBlockCommentStart = Regex.Concat("/*");
            Regex reBlockCommentEnd = Regex.Concat("*/");

            Regex reLineComment = Regex.Concat("//").Concat(Regex.Not('\n').Star());

            Regex reWhitespace = Regex.Union(" \t\r\n").Star().Union(reLineComment);

            Regex reString = Regex.Char('"').Concat(Regex.Char('\\').Concat(Regex.Any()).Union(Regex.Not('"', '\\')).Star()).Concat(Regex.Char('"'));
            Regex reBinaryOperator = Regex.Union("+-*/<=&");
            Regex reUnaryOperator = Regex.Char('!');
            Regex reKeyword = Regex.Union(Regex.Concat("var"), Regex.Concat("for"), Regex.Concat("end"), Regex.Concat("in"),
                                          Regex.Concat("do"), Regex.Concat("read"), Regex.Concat("print"), Regex.Concat("assert"));
            Regex reType = Regex.Union(Regex.Concat("bool"), Regex.Concat("int"), Regex.Concat("string"));

            Regex reParenRight = Regex.Char(')'),
                  reParenLeft = Regex.Char('('),
                  reColon = Regex.Char(':'),
                  reSemicolon = Regex.Char(';'),
                  reAssignment = Regex.Concat(":="),
                  reDots = Regex.Concat("..");

            Regex reIdentifier = Regex.Union(Regex.Range('A', 'Z'), Regex.Range('a', 'z'))
                                      .Concat(Regex.Union(Regex.Range('A', 'Z'), Regex.Range('a', 'z'), Regex.Range('0', '9'), Regex.Char('_')).Star());
            Regex reInteger = Regex.Range('0', '9').Plus();

            // Define token types
            tokenTypes["block_comment_start"] = new TokenType("block_comment_start", reBlockCommentStart);
            tokenTypes["block_comment_end"] = new TokenType("block_comment_end", reBlockCommentEnd);
            tokenTypes["int"] = new TokenType("int", reInteger);
            tokenTypes["whitespace"] = new TokenType("whitespace", reWhitespace, priority: TokenType.Priority.Whitespace);
            tokenTypes["string"] = new TokenType("string", reString);
            tokenTypes["binary_op"] = new TokenType("binary op", reBinaryOperator);
            tokenTypes["unary_op"] = new TokenType("unary op", reUnaryOperator);
            tokenTypes["keyword"] = new TokenType("keyword", reKeyword, priority: TokenType.Priority.Keyword);
            tokenTypes["type"] = new TokenType("type", reType, priority: TokenType.Priority.Keyword);
            tokenTypes["left_paren"] = new TokenType("left paren", reParenLeft);
            tokenTypes["right_paren"] = new TokenType("right paren", reParenRight);
            tokenTypes["colon"] = new TokenType("colon", reColon);
            tokenTypes["semicolon"] = new TokenType("semicolon", reSemicolon);
            tokenTypes["assignment"] = new TokenType("assignment", reAssignment);
            tokenTypes["dots"] = new TokenType("dots", reDots);
            tokenTypes["identifier"] = new TokenType("identifier", reIdentifier);

            // create combined automaton and scanner object
            TokenAutomaton automaton = TokenType.CombinedAutomaton(tokenTypes.Values.ToArray());
            scanner = new Scanner(automaton, tokenTypes["block_comment_start"], tokenTypes["block_comment_end"]);

            // Define nonterminal variables of CFG
            nonterminals["program"] = new Nonterminal("PROG");
            nonterminals["statements"] = new Nonterminal("STMTS");
            nonterminals["statements_head"] = new Nonterminal("STMTS_HEAD");
            nonterminals["statements_tail"] = new Nonterminal("STMTS_TAIL");
            nonterminals["statement"] = new Nonterminal("STMT");
            nonterminals["declaration"] = new Nonterminal("DECL");
            nonterminals["declaration_assignment"] = new Nonterminal("DECL_ASSIGN");
            nonterminals["expression"] = new Nonterminal("EXPR");
            nonterminals["unary_operation"] = new Nonterminal("UNARY_OP");
            nonterminals["binary_operation"] = new Nonterminal("BINARY_OP");
            nonterminals["operand"] = new Nonterminal("OPND");

            // Define terminal variables of CFG
            terminals["identifier"] = new Terminal(tokenTypes["identifier"]);
            terminals["assert"] = new Terminal("assert");
            terminals["print"] = new Terminal("print");
            terminals["read"] = new Terminal("read");
            terminals["for"] = new Terminal("for");
            terminals["in"] = new Terminal("in");
            terminals["end"] = new Terminal("end");
            terminals["do"] = new Terminal("do");
            terminals["var"] = new Terminal("var");
            terminals["type"] = new Terminal(tokenTypes["type"]);
            terminals["string"] = new Terminal(tokenTypes["string"]);
            terminals["int"] = new Terminal(tokenTypes["int"]);
            terminals[")"] = new Terminal(")");
            terminals["("] = new Terminal("(");
            terminals[".."] = new Terminal("..");
            terminals[":="] = new Terminal(":=");
            terminals[":"] = new Terminal(":");
            terminals[";"] = new Terminal(";");
            terminals["binary_operator"] = new Terminal(tokenTypes["binary_op"]);
            terminals["unary_operator"] = new Terminal(tokenTypes["unary_op"]);

            // Create the Mini-PL grammar
            grammar = new CFG(nonterminals["program"], terminals.Values, nonterminals.Values);

            // define production rules for the grammar
            grammar.AddProductionRule(nonterminals["program"], new ISymbol[] { nonterminals["statements"] });
            grammar.AddProductionRule(nonterminals["statements"], new ISymbol[] { nonterminals["statements_head"], nonterminals["statements_tail"] });
            grammar.AddProductionRule(nonterminals["statements_head"], new ISymbol[] { nonterminals["statement"], terminals[";"] });
            grammar.AddProductionRule(nonterminals["statements_tail"], new ISymbol[] { nonterminals["statements_head"], nonterminals["statements_tail"] });
            grammar.AddProductionRule(nonterminals["statements_tail"], new ISymbol[] { Terminal.EPSILON });

            grammar.AddProductionRule(nonterminals["statement"], new ISymbol[] { nonterminals["declaration"] });
            grammar.AddProductionRule(nonterminals["statement"], new ISymbol[] { terminals["identifier"], terminals[":="], nonterminals["expression"] });
            grammar.AddProductionRule(nonterminals["statement"], new ISymbol[] { terminals["for"], terminals["identifier"], terminals["in"], nonterminals["expression"], terminals[".."], nonterminals["expression"], terminals["do"],
                                                                                   nonterminals["statements"], terminals["end"], terminals["for"] });
            grammar.AddProductionRule(nonterminals["statement"], new ISymbol[] { terminals["read"], terminals["identifier"] });
            grammar.AddProductionRule(nonterminals["statement"], new ISymbol[] { terminals["print"], nonterminals["expression"] });
            grammar.AddProductionRule(nonterminals["statement"], new ISymbol[] { terminals["assert"], terminals["("], nonterminals["expression"], terminals[")"] });

            grammar.AddProductionRule(nonterminals["declaration"], new ISymbol[] { terminals["var"], terminals["identifier"], terminals[":"], terminals["type"], nonterminals["declaration_assignment"] });
            grammar.AddProductionRule(nonterminals["declaration_assignment"], new ISymbol[] { terminals[":="], nonterminals["expression"] });
            grammar.AddProductionRule(nonterminals["declaration_assignment"], new ISymbol[] { Terminal.EPSILON });

            grammar.AddProductionRule(nonterminals["expression"], new ISymbol[] { nonterminals["unary_operation"] });
            grammar.AddProductionRule(nonterminals["expression"], new ISymbol[] { nonterminals["operand"], nonterminals["binary_operation"] });

            grammar.AddProductionRule(nonterminals["unary_operation"], new ISymbol[] { terminals["unary_operator"], nonterminals["operand"] });

            grammar.AddProductionRule(nonterminals["binary_operation"], new ISymbol[] { terminals["binary_operator"], nonterminals["operand"] });
            grammar.AddProductionRule(nonterminals["binary_operation"], new ISymbol[] { Terminal.EPSILON });

            grammar.AddProductionRule(nonterminals["operand"], new ISymbol[] { terminals["int"] });
            grammar.AddProductionRule(nonterminals["operand"], new ISymbol[] { terminals["string"] });
            grammar.AddProductionRule(nonterminals["operand"], new ISymbol[] { terminals["identifier"] });
            grammar.AddProductionRule(nonterminals["operand"], new ISymbol[] { terminals["("], nonterminals["expression"], terminals[")"] });

            // use ; as synchronizing token for Mini-PL
            parser = new Parser(grammar, terminals[";"]);
        }