Пример #1
0
        public void Scanner_BlockCommentTest()
        {
            Regex a = Regex.Char('a').Star();
            Regex b = Regex.Char('b').Star();
            // block comment regex /\*([^*]|(\*+[^*/]))\*+/
            Regex blockComment = Regex.Concat("/*")
                                    .Concat(Regex.Not('*')
                                        .Union(Regex.Char('*').Plus()
                                                .Concat(Regex.Not('*','/')))
                                        .Star())
                                    .Concat(Regex.Char('*').Plus().Concat(Regex.Char('/')));
            Regex whitespace = Regex.Union(" \t\n").Star();
            TokenType ttWhitespace = new TokenType("Whitespace", whitespace, priority: TokenType.Priority.Whitespace);
            TokenType ttA = new TokenType("a*", a);
            TokenType ttB = new TokenType("b*", b);
            TokenType ttBlockComment = new TokenType("line comment", blockComment, priority: TokenType.Priority.Whitespace);
            TokenAutomaton automaton = TokenType.CombinedAutomaton(ttA, ttB, ttBlockComment, ttWhitespace);

            string text = "/* aaa */   \n" +
                          "bbb         \n" +
                          "/* aaa */   \n" +
                          "bbb         \n" +
                          "/* aaa      \n" +
                          "   aaa */   \n" +
                          "bbb         \n" +
                          "/*          \n" +
                          " * aaa      \n" +
                          " */         \n" +
                          "bbb         \n" +
                          "/***        \n" +
                          " * aaa      \n" +
                          " ***/       \n" +
                          "bbb         \n" +
                          "/*/ aaa /*/ \n" +
                          "/*****/     \n" +
                          "/*///*/     \n" +
                          "bbb         \n" +
                          "/***        \n" +
                          " * aaa      \n" +
                          " */         \n" +
                          "bbb        ";

            Scanner sc = new Scanner(automaton);
            IEnumerable<Token> tokenEnumerable = sc.Tokenize(text, yieldEOF: false);
            List<Token> tokens = new List<Token>(tokenEnumerable);

            string[] expectedTokens = { "bbb", "bbb", "bbb", "bbb", "bbb", "bbb", "bbb" };
            Assert.AreEqual(expectedTokens.Length, tokens.Count);
            for (int i = 0; i < expectedTokens.Length; i++)
            {
                Assert.AreEqual(expectedTokens[i], tokens[i].Lexeme);
                Assert.AreEqual(ttB, tokens[i].Type);
            }
        }
Пример #2
0
 private List<Token> GetTokens(TokenAutomaton automaton, string text)
 {
     Scanner sc = new Scanner(automaton);
     IEnumerable<Token> tokens = sc.Tokenize(text, yieldEOF:false);
     return new List<Token>(tokens);
 }
Пример #3
0
        public void Scanner_NestedCommentHackTest3()
        {
            Regex a = Regex.Char('a').Star();
            Regex b = Regex.Char('b').Star();
            // hacky solution that allows for nested comments
            Regex reBlockCommentStart = Regex.Concat("/*");
            Regex reBlockCommentEnd = Regex.Concat("*/");
            Regex whitespace = Regex.Union(" \t\n").Star();
            TokenType ttWhitespace = new TokenType("Whitespace", whitespace, priority: TokenType.Priority.Whitespace);
            TokenType ttA = new TokenType("a*", a);
            TokenType ttB = new TokenType("b*", b);
            TokenType ttBlockCommentStart = new TokenType("block comment start", reBlockCommentStart);
            TokenType ttBlockCommentEnd = new TokenType("block comment end", reBlockCommentEnd);
            TokenAutomaton automaton = TokenType.CombinedAutomaton(ttA, ttB, ttBlockCommentStart, ttBlockCommentEnd, ttWhitespace);

            string text = "bbb /* aaa";

            Scanner sc = new Scanner(automaton, ttBlockCommentStart, ttBlockCommentEnd);
            IEnumerable<Token> tokenEnumerable = sc.Tokenize(text, yieldEOF: false);
            List<Token> tokens = new List<Token>(tokenEnumerable);

            string[] expectedTokens = { "bbb" };
            Assert.AreEqual(expectedTokens.Length, tokens.Count);
            for (int i = 0; i < expectedTokens.Length; i++)
            {
                Assert.AreEqual(expectedTokens[i], tokens[i].Lexeme);
                Assert.AreEqual(ttB, tokens[i].Type);
            }
        }
Пример #4
0
        private MiniPL()
        {
            // Create NFA-type things for tokens using regular operations
            Regex reBlockCommentStart = Regex.Concat("/*");
            Regex reBlockCommentEnd = Regex.Concat("*/");

            Regex reLineComment = Regex.Concat("//").Concat(Regex.Not('\n').Star());

            Regex reWhitespace = Regex.Union(" \t\r\n").Star().Union(reLineComment);

            Regex reString = Regex.Char('"').Concat(Regex.Char('\\').Concat(Regex.Any()).Union(Regex.Not('"', '\\')).Star()).Concat(Regex.Char('"'));
            Regex reBinaryOperator = Regex.Union("+-*/<=&");
            Regex reUnaryOperator = Regex.Char('!');
            Regex reKeyword = Regex.Union(Regex.Concat("var"), Regex.Concat("for"), Regex.Concat("end"), Regex.Concat("in"),
                                          Regex.Concat("do"), Regex.Concat("read"), Regex.Concat("print"), Regex.Concat("assert"));
            Regex reType = Regex.Union(Regex.Concat("bool"), Regex.Concat("int"), Regex.Concat("string"));

            Regex reParenRight = Regex.Char(')'),
                  reParenLeft = Regex.Char('('),
                  reColon = Regex.Char(':'),
                  reSemicolon = Regex.Char(';'),
                  reAssignment = Regex.Concat(":="),
                  reDots = Regex.Concat("..");

            Regex reIdentifier = Regex.Union(Regex.Range('A', 'Z'), Regex.Range('a', 'z'))
                                      .Concat(Regex.Union(Regex.Range('A', 'Z'), Regex.Range('a', 'z'), Regex.Range('0', '9'), Regex.Char('_')).Star());
            Regex reInteger = Regex.Range('0', '9').Plus();

            // Define token types
            tokenTypes["block_comment_start"] = new TokenType("block_comment_start", reBlockCommentStart);
            tokenTypes["block_comment_end"] = new TokenType("block_comment_end", reBlockCommentEnd);
            tokenTypes["int"] = new TokenType("int", reInteger);
            tokenTypes["whitespace"] = new TokenType("whitespace", reWhitespace, priority: TokenType.Priority.Whitespace);
            tokenTypes["string"] = new TokenType("string", reString);
            tokenTypes["binary_op"] = new TokenType("binary op", reBinaryOperator);
            tokenTypes["unary_op"] = new TokenType("unary op", reUnaryOperator);
            tokenTypes["keyword"] = new TokenType("keyword", reKeyword, priority: TokenType.Priority.Keyword);
            tokenTypes["type"] = new TokenType("type", reType, priority: TokenType.Priority.Keyword);
            tokenTypes["left_paren"] = new TokenType("left paren", reParenLeft);
            tokenTypes["right_paren"] = new TokenType("right paren", reParenRight);
            tokenTypes["colon"] = new TokenType("colon", reColon);
            tokenTypes["semicolon"] = new TokenType("semicolon", reSemicolon);
            tokenTypes["assignment"] = new TokenType("assignment", reAssignment);
            tokenTypes["dots"] = new TokenType("dots", reDots);
            tokenTypes["identifier"] = new TokenType("identifier", reIdentifier);

            // create combined automaton and scanner object
            TokenAutomaton automaton = TokenType.CombinedAutomaton(tokenTypes.Values.ToArray());
            scanner = new Scanner(automaton, tokenTypes["block_comment_start"], tokenTypes["block_comment_end"]);

            // Define nonterminal variables of CFG
            nonterminals["program"] = new Nonterminal("PROG");
            nonterminals["statements"] = new Nonterminal("STMTS");
            nonterminals["statements_head"] = new Nonterminal("STMTS_HEAD");
            nonterminals["statements_tail"] = new Nonterminal("STMTS_TAIL");
            nonterminals["statement"] = new Nonterminal("STMT");
            nonterminals["declaration"] = new Nonterminal("DECL");
            nonterminals["declaration_assignment"] = new Nonterminal("DECL_ASSIGN");
            nonterminals["expression"] = new Nonterminal("EXPR");
            nonterminals["unary_operation"] = new Nonterminal("UNARY_OP");
            nonterminals["binary_operation"] = new Nonterminal("BINARY_OP");
            nonterminals["operand"] = new Nonterminal("OPND");

            // Define terminal variables of CFG
            terminals["identifier"] = new Terminal(tokenTypes["identifier"]);
            terminals["assert"] = new Terminal("assert");
            terminals["print"] = new Terminal("print");
            terminals["read"] = new Terminal("read");
            terminals["for"] = new Terminal("for");
            terminals["in"] = new Terminal("in");
            terminals["end"] = new Terminal("end");
            terminals["do"] = new Terminal("do");
            terminals["var"] = new Terminal("var");
            terminals["type"] = new Terminal(tokenTypes["type"]);
            terminals["string"] = new Terminal(tokenTypes["string"]);
            terminals["int"] = new Terminal(tokenTypes["int"]);
            terminals[")"] = new Terminal(")");
            terminals["("] = new Terminal("(");
            terminals[".."] = new Terminal("..");
            terminals[":="] = new Terminal(":=");
            terminals[":"] = new Terminal(":");
            terminals[";"] = new Terminal(";");
            terminals["binary_operator"] = new Terminal(tokenTypes["binary_op"]);
            terminals["unary_operator"] = new Terminal(tokenTypes["unary_op"]);

            // Create the Mini-PL grammar
            grammar = new CFG(nonterminals["program"], terminals.Values, nonterminals.Values);

            // define production rules for the grammar
            grammar.AddProductionRule(nonterminals["program"], new ISymbol[] { nonterminals["statements"] });
            grammar.AddProductionRule(nonterminals["statements"], new ISymbol[] { nonterminals["statements_head"], nonterminals["statements_tail"] });
            grammar.AddProductionRule(nonterminals["statements_head"], new ISymbol[] { nonterminals["statement"], terminals[";"] });
            grammar.AddProductionRule(nonterminals["statements_tail"], new ISymbol[] { nonterminals["statements_head"], nonterminals["statements_tail"] });
            grammar.AddProductionRule(nonterminals["statements_tail"], new ISymbol[] { Terminal.EPSILON });

            grammar.AddProductionRule(nonterminals["statement"], new ISymbol[] { nonterminals["declaration"] });
            grammar.AddProductionRule(nonterminals["statement"], new ISymbol[] { terminals["identifier"], terminals[":="], nonterminals["expression"] });
            grammar.AddProductionRule(nonterminals["statement"], new ISymbol[] { terminals["for"], terminals["identifier"], terminals["in"], nonterminals["expression"], terminals[".."], nonterminals["expression"], terminals["do"],
                                                                                   nonterminals["statements"], terminals["end"], terminals["for"] });
            grammar.AddProductionRule(nonterminals["statement"], new ISymbol[] { terminals["read"], terminals["identifier"] });
            grammar.AddProductionRule(nonterminals["statement"], new ISymbol[] { terminals["print"], nonterminals["expression"] });
            grammar.AddProductionRule(nonterminals["statement"], new ISymbol[] { terminals["assert"], terminals["("], nonterminals["expression"], terminals[")"] });

            grammar.AddProductionRule(nonterminals["declaration"], new ISymbol[] { terminals["var"], terminals["identifier"], terminals[":"], terminals["type"], nonterminals["declaration_assignment"] });
            grammar.AddProductionRule(nonterminals["declaration_assignment"], new ISymbol[] { terminals[":="], nonterminals["expression"] });
            grammar.AddProductionRule(nonterminals["declaration_assignment"], new ISymbol[] { Terminal.EPSILON });

            grammar.AddProductionRule(nonterminals["expression"], new ISymbol[] { nonterminals["unary_operation"] });
            grammar.AddProductionRule(nonterminals["expression"], new ISymbol[] { nonterminals["operand"], nonterminals["binary_operation"] });

            grammar.AddProductionRule(nonterminals["unary_operation"], new ISymbol[] { terminals["unary_operator"], nonterminals["operand"] });

            grammar.AddProductionRule(nonterminals["binary_operation"], new ISymbol[] { terminals["binary_operator"], nonterminals["operand"] });
            grammar.AddProductionRule(nonterminals["binary_operation"], new ISymbol[] { Terminal.EPSILON });

            grammar.AddProductionRule(nonterminals["operand"], new ISymbol[] { terminals["int"] });
            grammar.AddProductionRule(nonterminals["operand"], new ISymbol[] { terminals["string"] });
            grammar.AddProductionRule(nonterminals["operand"], new ISymbol[] { terminals["identifier"] });
            grammar.AddProductionRule(nonterminals["operand"], new ISymbol[] { terminals["("], nonterminals["expression"], terminals[")"] });

            // use ; as synchronizing token for Mini-PL
            parser = new Parser(grammar, terminals[";"]);
        }