Ejemplo n.º 1
0
        private MiniPL()
        {
            // Create NFA-type things for tokens using regular operations
            Regex reBlockCommentStart = Regex.Concat("/*");
            Regex reBlockCommentEnd   = Regex.Concat("*/");

            Regex reLineComment = Regex.Concat("//").Concat(Regex.Not('\n').Star());

            Regex reWhitespace = Regex.Union(" \t\r\n").Star().Union(reLineComment);

            Regex reString         = Regex.Char('"').Concat(Regex.Char('\\').Concat(Regex.Any()).Union(Regex.Not('"', '\\')).Star()).Concat(Regex.Char('"'));
            Regex reBinaryOperator = Regex.Union("+-*/<=&");
            Regex reUnaryOperator  = Regex.Char('!');
            Regex reKeyword        = Regex.Union(Regex.Concat("var"), Regex.Concat("for"), Regex.Concat("end"), Regex.Concat("in"),
                                                 Regex.Concat("do"), Regex.Concat("read"), Regex.Concat("print"), Regex.Concat("assert"));
            Regex reType = Regex.Union(Regex.Concat("bool"), Regex.Concat("int"), Regex.Concat("string"));

            Regex reParenRight = Regex.Char(')'),
                  reParenLeft  = Regex.Char('('),
                  reColon      = Regex.Char(':'),
                  reSemicolon  = Regex.Char(';'),
                  reAssignment = Regex.Concat(":="),
                  reDots       = Regex.Concat("..");

            Regex reIdentifier = Regex.Union(Regex.Range('A', 'Z'), Regex.Range('a', 'z'))
                                 .Concat(Regex.Union(Regex.Range('A', 'Z'), Regex.Range('a', 'z'), Regex.Range('0', '9'), Regex.Char('_')).Star());
            Regex reInteger = Regex.Range('0', '9').Plus();

            // Define token types
            tokenTypes["block_comment_start"] = new TokenType("block_comment_start", reBlockCommentStart);
            tokenTypes["block_comment_end"]   = new TokenType("block_comment_end", reBlockCommentEnd);
            tokenTypes["int"]         = new TokenType("int", reInteger);
            tokenTypes["whitespace"]  = new TokenType("whitespace", reWhitespace, priority: TokenType.Priority.Whitespace);
            tokenTypes["string"]      = new TokenType("string", reString);
            tokenTypes["binary_op"]   = new TokenType("binary op", reBinaryOperator);
            tokenTypes["unary_op"]    = new TokenType("unary op", reUnaryOperator);
            tokenTypes["keyword"]     = new TokenType("keyword", reKeyword, priority: TokenType.Priority.Keyword);
            tokenTypes["type"]        = new TokenType("type", reType, priority: TokenType.Priority.Keyword);
            tokenTypes["left_paren"]  = new TokenType("left paren", reParenLeft);
            tokenTypes["right_paren"] = new TokenType("right paren", reParenRight);
            tokenTypes["colon"]       = new TokenType("colon", reColon);
            tokenTypes["semicolon"]   = new TokenType("semicolon", reSemicolon);
            tokenTypes["assignment"]  = new TokenType("assignment", reAssignment);
            tokenTypes["dots"]        = new TokenType("dots", reDots);
            tokenTypes["identifier"]  = new TokenType("identifier", reIdentifier);

            // create combined automaton and scanner object
            TokenAutomaton automaton = TokenType.CombinedAutomaton(tokenTypes.Values.ToArray());

            scanner = new Scanner(automaton, tokenTypes["block_comment_start"], tokenTypes["block_comment_end"]);

            // Define nonterminal variables of CFG
            nonterminals["program"]                = new Nonterminal("PROG");
            nonterminals["statements"]             = new Nonterminal("STMTS");
            nonterminals["statements_head"]        = new Nonterminal("STMTS_HEAD");
            nonterminals["statements_tail"]        = new Nonterminal("STMTS_TAIL");
            nonterminals["statement"]              = new Nonterminal("STMT");
            nonterminals["declaration"]            = new Nonterminal("DECL");
            nonterminals["declaration_assignment"] = new Nonterminal("DECL_ASSIGN");
            nonterminals["expression"]             = new Nonterminal("EXPR");
            nonterminals["unary_operation"]        = new Nonterminal("UNARY_OP");
            nonterminals["binary_operation"]       = new Nonterminal("BINARY_OP");
            nonterminals["operand"]                = new Nonterminal("OPND");

            // Define terminal variables of CFG
            terminals["identifier"]      = new Terminal(tokenTypes["identifier"]);
            terminals["assert"]          = new Terminal("assert");
            terminals["print"]           = new Terminal("print");
            terminals["read"]            = new Terminal("read");
            terminals["for"]             = new Terminal("for");
            terminals["in"]              = new Terminal("in");
            terminals["end"]             = new Terminal("end");
            terminals["do"]              = new Terminal("do");
            terminals["var"]             = new Terminal("var");
            terminals["type"]            = new Terminal(tokenTypes["type"]);
            terminals["string"]          = new Terminal(tokenTypes["string"]);
            terminals["int"]             = new Terminal(tokenTypes["int"]);
            terminals[")"]               = new Terminal(")");
            terminals["("]               = new Terminal("(");
            terminals[".."]              = new Terminal("..");
            terminals[":="]              = new Terminal(":=");
            terminals[":"]               = new Terminal(":");
            terminals[";"]               = new Terminal(";");
            terminals["binary_operator"] = new Terminal(tokenTypes["binary_op"]);
            terminals["unary_operator"]  = new Terminal(tokenTypes["unary_op"]);

            // Create the Mini-PL grammar
            grammar = new CFG(nonterminals["program"], terminals.Values, nonterminals.Values);

            // define production rules for the grammar
            grammar.AddProductionRule(nonterminals["program"], new ISymbol[] { nonterminals["statements"] });
            grammar.AddProductionRule(nonterminals["statements"], new ISymbol[] { nonterminals["statements_head"], nonterminals["statements_tail"] });
            grammar.AddProductionRule(nonterminals["statements_head"], new ISymbol[] { nonterminals["statement"], terminals[";"] });
            grammar.AddProductionRule(nonterminals["statements_tail"], new ISymbol[] { nonterminals["statements_head"], nonterminals["statements_tail"] });
            grammar.AddProductionRule(nonterminals["statements_tail"], new ISymbol[] { Terminal.EPSILON });

            grammar.AddProductionRule(nonterminals["statement"], new ISymbol[] { nonterminals["declaration"] });
            grammar.AddProductionRule(nonterminals["statement"], new ISymbol[] { terminals["identifier"], terminals[":="], nonterminals["expression"] });
            grammar.AddProductionRule(nonterminals["statement"], new ISymbol[] { terminals["for"], terminals["identifier"], terminals["in"], nonterminals["expression"], terminals[".."], nonterminals["expression"], terminals["do"],
                                                                                 nonterminals["statements"], terminals["end"], terminals["for"] });
            grammar.AddProductionRule(nonterminals["statement"], new ISymbol[] { terminals["read"], terminals["identifier"] });
            grammar.AddProductionRule(nonterminals["statement"], new ISymbol[] { terminals["print"], nonterminals["expression"] });
            grammar.AddProductionRule(nonterminals["statement"], new ISymbol[] { terminals["assert"], terminals["("], nonterminals["expression"], terminals[")"] });

            grammar.AddProductionRule(nonterminals["declaration"], new ISymbol[] { terminals["var"], terminals["identifier"], terminals[":"], terminals["type"], nonterminals["declaration_assignment"] });
            grammar.AddProductionRule(nonterminals["declaration_assignment"], new ISymbol[] { terminals[":="], nonterminals["expression"] });
            grammar.AddProductionRule(nonterminals["declaration_assignment"], new ISymbol[] { Terminal.EPSILON });

            grammar.AddProductionRule(nonterminals["expression"], new ISymbol[] { nonterminals["unary_operation"] });
            grammar.AddProductionRule(nonterminals["expression"], new ISymbol[] { nonterminals["operand"], nonterminals["binary_operation"] });

            grammar.AddProductionRule(nonterminals["unary_operation"], new ISymbol[] { terminals["unary_operator"], nonterminals["operand"] });

            grammar.AddProductionRule(nonterminals["binary_operation"], new ISymbol[] { terminals["binary_operator"], nonterminals["operand"] });
            grammar.AddProductionRule(nonterminals["binary_operation"], new ISymbol[] { Terminal.EPSILON });

            grammar.AddProductionRule(nonterminals["operand"], new ISymbol[] { terminals["int"] });
            grammar.AddProductionRule(nonterminals["operand"], new ISymbol[] { terminals["string"] });
            grammar.AddProductionRule(nonterminals["operand"], new ISymbol[] { terminals["identifier"] });
            grammar.AddProductionRule(nonterminals["operand"], new ISymbol[] { terminals["("], nonterminals["expression"], terminals[")"] });

            // use ; as synchronizing token for Mini-PL
            parser = new Parser(grammar, terminals[";"]);
        }