private MiniPL() { // Create NFA-type things for tokens using regular operations Regex reBlockCommentStart = Regex.Concat("/*"); Regex reBlockCommentEnd = Regex.Concat("*/"); Regex reLineComment = Regex.Concat("//").Concat(Regex.Not('\n').Star()); Regex reWhitespace = Regex.Union(" \t\r\n").Star().Union(reLineComment); Regex reString = Regex.Char('"').Concat(Regex.Char('\\').Concat(Regex.Any()).Union(Regex.Not('"', '\\')).Star()).Concat(Regex.Char('"')); Regex reBinaryOperator = Regex.Union("+-*/<=&"); Regex reUnaryOperator = Regex.Char('!'); Regex reKeyword = Regex.Union(Regex.Concat("var"), Regex.Concat("for"), Regex.Concat("end"), Regex.Concat("in"), Regex.Concat("do"), Regex.Concat("read"), Regex.Concat("print"), Regex.Concat("assert")); Regex reType = Regex.Union(Regex.Concat("bool"), Regex.Concat("int"), Regex.Concat("string")); Regex reParenRight = Regex.Char(')'), reParenLeft = Regex.Char('('), reColon = Regex.Char(':'), reSemicolon = Regex.Char(';'), reAssignment = Regex.Concat(":="), reDots = Regex.Concat(".."); Regex reIdentifier = Regex.Union(Regex.Range('A', 'Z'), Regex.Range('a', 'z')) .Concat(Regex.Union(Regex.Range('A', 'Z'), Regex.Range('a', 'z'), Regex.Range('0', '9'), Regex.Char('_')).Star()); Regex reInteger = Regex.Range('0', '9').Plus(); // Define token types tokenTypes["block_comment_start"] = new TokenType("block_comment_start", reBlockCommentStart); tokenTypes["block_comment_end"] = new TokenType("block_comment_end", reBlockCommentEnd); tokenTypes["int"] = new TokenType("int", reInteger); tokenTypes["whitespace"] = new TokenType("whitespace", reWhitespace, priority: TokenType.Priority.Whitespace); tokenTypes["string"] = new TokenType("string", reString); tokenTypes["binary_op"] = new TokenType("binary op", reBinaryOperator); tokenTypes["unary_op"] = new TokenType("unary op", reUnaryOperator); tokenTypes["keyword"] = new TokenType("keyword", reKeyword, priority: TokenType.Priority.Keyword); tokenTypes["type"] = new TokenType("type", reType, priority: TokenType.Priority.Keyword); tokenTypes["left_paren"] = new TokenType("left paren", reParenLeft); tokenTypes["right_paren"] = new TokenType("right paren", reParenRight); tokenTypes["colon"] = new TokenType("colon", reColon); tokenTypes["semicolon"] = new TokenType("semicolon", reSemicolon); tokenTypes["assignment"] = new TokenType("assignment", reAssignment); tokenTypes["dots"] = new TokenType("dots", reDots); tokenTypes["identifier"] = new TokenType("identifier", reIdentifier); // create combined automaton and scanner object TokenAutomaton automaton = TokenType.CombinedAutomaton(tokenTypes.Values.ToArray()); scanner = new Scanner(automaton, tokenTypes["block_comment_start"], tokenTypes["block_comment_end"]); // Define nonterminal variables of CFG nonterminals["program"] = new Nonterminal("PROG"); nonterminals["statements"] = new Nonterminal("STMTS"); nonterminals["statements_head"] = new Nonterminal("STMTS_HEAD"); nonterminals["statements_tail"] = new Nonterminal("STMTS_TAIL"); nonterminals["statement"] = new Nonterminal("STMT"); nonterminals["declaration"] = new Nonterminal("DECL"); nonterminals["declaration_assignment"] = new Nonterminal("DECL_ASSIGN"); nonterminals["expression"] = new Nonterminal("EXPR"); nonterminals["unary_operation"] = new Nonterminal("UNARY_OP"); nonterminals["binary_operation"] = new Nonterminal("BINARY_OP"); nonterminals["operand"] = new Nonterminal("OPND"); // Define terminal variables of CFG terminals["identifier"] = new Terminal(tokenTypes["identifier"]); terminals["assert"] = new Terminal("assert"); terminals["print"] = new Terminal("print"); terminals["read"] = new Terminal("read"); terminals["for"] = new Terminal("for"); terminals["in"] = new Terminal("in"); terminals["end"] = new Terminal("end"); terminals["do"] = new Terminal("do"); terminals["var"] = new Terminal("var"); terminals["type"] = new Terminal(tokenTypes["type"]); terminals["string"] = new Terminal(tokenTypes["string"]); terminals["int"] = new Terminal(tokenTypes["int"]); terminals[")"] = new Terminal(")"); terminals["("] = new Terminal("("); terminals[".."] = new Terminal(".."); terminals[":="] = new Terminal(":="); terminals[":"] = new Terminal(":"); terminals[";"] = new Terminal(";"); terminals["binary_operator"] = new Terminal(tokenTypes["binary_op"]); terminals["unary_operator"] = new Terminal(tokenTypes["unary_op"]); // Create the Mini-PL grammar grammar = new CFG(nonterminals["program"], terminals.Values, nonterminals.Values); // define production rules for the grammar grammar.AddProductionRule(nonterminals["program"], new ISymbol[] { nonterminals["statements"] }); grammar.AddProductionRule(nonterminals["statements"], new ISymbol[] { nonterminals["statements_head"], nonterminals["statements_tail"] }); grammar.AddProductionRule(nonterminals["statements_head"], new ISymbol[] { nonterminals["statement"], terminals[";"] }); grammar.AddProductionRule(nonterminals["statements_tail"], new ISymbol[] { nonterminals["statements_head"], nonterminals["statements_tail"] }); grammar.AddProductionRule(nonterminals["statements_tail"], new ISymbol[] { Terminal.EPSILON }); grammar.AddProductionRule(nonterminals["statement"], new ISymbol[] { nonterminals["declaration"] }); grammar.AddProductionRule(nonterminals["statement"], new ISymbol[] { terminals["identifier"], terminals[":="], nonterminals["expression"] }); grammar.AddProductionRule(nonterminals["statement"], new ISymbol[] { terminals["for"], terminals["identifier"], terminals["in"], nonterminals["expression"], terminals[".."], nonterminals["expression"], terminals["do"], nonterminals["statements"], terminals["end"], terminals["for"] }); grammar.AddProductionRule(nonterminals["statement"], new ISymbol[] { terminals["read"], terminals["identifier"] }); grammar.AddProductionRule(nonterminals["statement"], new ISymbol[] { terminals["print"], nonterminals["expression"] }); grammar.AddProductionRule(nonterminals["statement"], new ISymbol[] { terminals["assert"], terminals["("], nonterminals["expression"], terminals[")"] }); grammar.AddProductionRule(nonterminals["declaration"], new ISymbol[] { terminals["var"], terminals["identifier"], terminals[":"], terminals["type"], nonterminals["declaration_assignment"] }); grammar.AddProductionRule(nonterminals["declaration_assignment"], new ISymbol[] { terminals[":="], nonterminals["expression"] }); grammar.AddProductionRule(nonterminals["declaration_assignment"], new ISymbol[] { Terminal.EPSILON }); grammar.AddProductionRule(nonterminals["expression"], new ISymbol[] { nonterminals["unary_operation"] }); grammar.AddProductionRule(nonterminals["expression"], new ISymbol[] { nonterminals["operand"], nonterminals["binary_operation"] }); grammar.AddProductionRule(nonterminals["unary_operation"], new ISymbol[] { terminals["unary_operator"], nonterminals["operand"] }); grammar.AddProductionRule(nonterminals["binary_operation"], new ISymbol[] { terminals["binary_operator"], nonterminals["operand"] }); grammar.AddProductionRule(nonterminals["binary_operation"], new ISymbol[] { Terminal.EPSILON }); grammar.AddProductionRule(nonterminals["operand"], new ISymbol[] { terminals["int"] }); grammar.AddProductionRule(nonterminals["operand"], new ISymbol[] { terminals["string"] }); grammar.AddProductionRule(nonterminals["operand"], new ISymbol[] { terminals["identifier"] }); grammar.AddProductionRule(nonterminals["operand"], new ISymbol[] { terminals["("], nonterminals["expression"], terminals[")"] }); // use ; as synchronizing token for Mini-PL parser = new Parser(grammar, terminals[";"]); }