public void ParseTest() { LexicalRule digit = LexicalRule.Range('0', '9'); LexerBuilder lexb = new LexerBuilder(); var blank = lexb.DefineLexeme(0, true, LexicalRule.Chars(" \n\t\r").Repeat()); var number = lexb.DefineLexeme(1, digit.Repeat() + (LexicalRule.Char('.') + digit.Repeat() | LexicalRule.Empty)); var plus = lexb.DefineLexeme(2, LexicalRule.Char('+')); var minus = lexb.DefineLexeme(2, LexicalRule.Char('-')); var times = lexb.DefineLexeme(2, LexicalRule.Char('*')); var divide = lexb.DefineLexeme(2, LexicalRule.Char('/')); var bra = lexb.DefineLexeme(3, LexicalRule.Char('(')); var ket = lexb.DefineLexeme(3, LexicalRule.Char(')')); var plu = plus.GetParsingRule(); var min = minus.GetParsingRule(); var mul = times.GetParsingRule(); var div = divide.GetParsingRule(); var br = bra.GetParsingRule(); var ke = ket.GetParsingRule(); var num = number.GetParsingRule(i => double.Parse(i.Text)); ParsingRuleContainer <double> expr = new ParsingRuleContainer <double>(); ParsingRuleContainer <double> term = new ParsingRuleContainer <double>(); ParsingRuleContainer <double> factor = new ParsingRuleContainer <double>(); // ParsingRuleContainer<int, double> bracket = new ParsingRuleContainer<int, double>(); expr.Content = term.Concat((plu.Concat(term, (t, y) => y) | min.Concat(term, (t, y) => - y)).Repeat(i => i.Sum()), (x, y) => x + y) | term; term.Content = factor.Concat((mul.Concat(term, (s, y) => y) | (div.Concat(term, (s, y) => 1 / y))).Repeat(t => t.Count() == 0 ? 1 : t.Aggregate((x, y) => x * y)), (x, y) => x * y) | factor; factor.Content = br.Concat(expr, (s, x) => x).Concat(ke, (x, s) => x) | num; string str = "1 * 5 + 2 * 3 / 5 - 3"; BranchedLexer lexer = lexb.GetBranchedLexer(str); double r; expr.TryParse(lexer, out r); Assert.AreEqual(1.0 * 5.0 + 2.0 * 3.0 / 5.0 - 3.0, r); }
public void LexerTest() { LexicalRule letter = LexicalRule.Range('A', 'Z') | LexicalRule.Range('a', 'z'); LexicalRule digit = LexicalRule.Range('0', '9'); LexerBuilder lexb = new LexerBuilder(); Lexeme blank = lexb.DefineLexeme(0, true, LexicalRule.Chars(" \n\t\r").Repeat()); Lexeme id = lexb.DefineLexeme(1, letter + (letter | digit).Repeat()); Lexeme keyword = lexb.DefineLexeme(2, LexicalRule.Literal("var") | LexicalRule.Literal("function") | LexicalRule.Literal("new") | LexicalRule.Literal("this") | LexicalRule.Literal("for") | LexicalRule.Literal("return")); Lexeme number = lexb.DefineLexeme(3, digit.Repeat() + (LexicalRule.Char('.') + digit.Repeat() | LexicalRule.Empty)); Lexeme inc = lexb.DefineLexeme(4, LexicalRule.Literal("++")); Lexeme oper = lexb.DefineLexeme(4, LexicalRule.Chars("+-*/^=<>")); Lexeme str = lexb.DefineLexeme(5, LexicalRule.Char('\'') + (LexicalRule.NotChar('\'') | LexicalRule.Literal(@"\'")).Repeat() + LexicalRule.Char('\'')); Lexeme bracket = lexb.DefineLexeme(6, LexicalRule.Chars("()[]{}")); Lexeme deli = lexb.DefineLexeme(7, LexicalRule.Chars(",;:")); Lexeme comm = lexb.DefineLexeme(10, true, LexicalRule.Literal("//") + LexicalRule.NotChars("\n\r").Repeat() + LexicalRule.Chars("\n\r")); Lexeme commul = lexb.DefineLexeme(10, true, LexicalRule.Literal("/*") + (LexicalRule.Char('/') | LexicalRule.Char('*').Repeat() + LexicalRule.NotChars("/*")).Repeat() + LexicalRule.Char('*') + LexicalRule.Char('/')); var input = System.IO.File.ReadAllText("test_data/1.input.txt"); var expected = System.IO.File.ReadAllText("test_data/1.expected.txt"); string actual; { var sb = new System.Text.StringBuilder(); BranchedLexer blexer = lexb.GetBranchedLexer(input); Token t; while ((t = blexer.Read()) != null) { sb.AppendLine(t.ToString()); } actual = sb.ToString(); } if (expected != actual) { System.IO.File.WriteAllText("test_data/1.actual.txt", actual); Assert.Fail(); } }
private void InitializeLexer() { lexb = new LexerBuilder(); L l_newline = L.Chars("\u000D\u000A\u2028\u2029"); L l_whitespace = L.Chars("\u0009\u000B\u000C") | L.CharWhen(c => CharUnicodeInfo.GetUnicodeCategory(c) == UnicodeCategory.SpaceSeparator); lex_white = lexb.DefineLexeme("blank", true, l_whitespace | l_newline); L l_singleLineComment = L.Literal("//") + L.NotChars("\n\r").Repeat() + (L.Chars("\n\r") | L.CharWhen(c => { return(c == '\0'); })); L l_delimitedComment = L.Literal("/*") + (L.Char('/') | L.Char('*').Repeat() + L.NotChars("/*")).Repeat() + L.Char('*') + L.Char('/'); lex_comment = lexb.DefineLexeme("comment", true, l_singleLineComment | l_delimitedComment); L l_letter = L.CharWhen(c => char.IsLetter(c)); L l_digit = L.CharWhen(c => char.IsDigit(c)); L l_identifier = (l_letter | L.Char('_')) + (l_letter | l_digit | L.Char('_')).Repeat(); lex_identifer = lexb.DefineLexeme("identifier", l_identifier); // keywords lex_kw_break = lexb.DefineLexeme("kw_break", L.Literal("break")); lex_kw_case = lexb.DefineLexeme("kw_case", L.Literal("case")); lex_kw_continue = lexb.DefineLexeme("kw_continue", L.Literal("continue")); lex_kw_default = lexb.DefineLexeme("kw_default", L.Literal("default")); lex_kw_do = lexb.DefineLexeme("kw_do", L.Literal("do")); lex_kw_else = lexb.DefineLexeme("kw_else", L.Literal("else")); lex_kw_false = lexb.DefineLexeme("kw_false", L.Literal("false")); lex_kw_for = lexb.DefineLexeme("kw_for", L.Literal("for")); lex_kw_goto = lexb.DefineLexeme("kw_goto", L.Literal("goto")); lex_kw_if = lexb.DefineLexeme("kw_if", L.Literal("if")); lex_kw_is = lexb.DefineLexeme("kw_is", L.Literal("is")); lex_kw_new = lexb.DefineLexeme("kw_new", L.Literal("new")); lex_kw_null = lexb.DefineLexeme("kw_null", L.Literal("null")); lex_kw_return = lexb.DefineLexeme("kw_return", L.Literal("return")); lex_kw_switch = lexb.DefineLexeme("kw_switch", L.Literal("switch")); lex_kw_this = lexb.DefineLexeme("kw_this", L.Literal("this")); lex_kw_true = lexb.DefineLexeme("kw_true", L.Literal("true")); lex_kw_while = lexb.DefineLexeme("kw_while", L.Literal("while")); lex_kw_class = lexb.DefineLexeme("kw_class", L.Literal("class")); lex_kw_var = lexb.DefineLexeme("kw_var", L.Literal("var")); lex_kw_function = lexb.DefineLexeme("kw_function", L.Literal("function")); lex_kw_super = lexb.DefineLexeme("kw_super", L.Literal("super")); lex_kw_extends = lexb.DefineLexeme("kw_extends", L.Literal("extends")); lex_kw_public = lexb.DefineLexeme("kw_public", L.Literal("public")); lex_kw_private = lexb.DefineLexeme("kw_private", L.Literal("private")); lex_kw_static = lexb.DefineLexeme("kw_static", L.Literal("static")); lex_kw_constructor = lexb.DefineLexeme("kw_constructor", L.Literal("constructor")); lex_kw_protected = lexb.DefineLexeme("kw_protected", L.Literal("protected")); lex_kw_get = lexb.DefineLexeme("kw_get", L.Literal("get")); lex_kw_set = lexb.DefineLexeme("kw_set", L.Literal("set")); lex_kw_future = lexb.DefineLexeme("kw_future", L.Literal("abstract") | L.Literal("as") | L.Literal("base") | L.Literal("bool") | L.Literal("byte") | L.Literal("catch") | L.Literal("char") | L.Literal("checked") | L.Literal("const") | L.Literal("decimal") | L.Literal("delegate") | L.Literal("double") | L.Literal("enum") | L.Literal("event") | L.Literal("explicit") | L.Literal("extern") | L.Literal("finally") | L.Literal("fixed") | L.Literal("float") | L.Literal("foreach") | L.Literal("implicit") | L.Literal("in") | L.Literal("int") | L.Literal("interface") | L.Literal("internal") | L.Literal("lock") | L.Literal("long") | L.Literal("namespace") | L.Literal("object") | L.Literal("operator") | L.Literal("out") | L.Literal("override") | L.Literal("params") | L.Literal("readonly") | L.Literal("ref") | L.Literal("sbyte") | L.Literal("sealed") | L.Literal("short") | L.Literal("sizeof") | L.Literal("stackalloc") | L.Literal("string") | L.Literal("struct") | L.Literal("throw") | L.Literal("try") | L.Literal("typeof") | L.Literal("uint") | L.Literal("ulong") | L.Literal("uncheck") | L.Literal("unsafe") | L.Literal("ushort") | L.Literal("using") | L.Literal("virtual") | L.Literal("void") | L.Literal("volatile") ); L l_num_int = l_digit.Repeat(1, 0); L l_num_dec = L.Char('.') + l_digit.Repeat(1, 0); L l_num_ind = L.Chars("eE") + L.Chars("+-").Optional() + l_digit.Repeat(1, 0); L l_num = (l_num_int + l_num_dec.Optional() | l_num_dec) .Concat(l_num_ind.Optional()); lex_li_num = lexb.DefineLexeme("li_number", l_num); L l_string1 = L.Char('\'') + (L.NotChar('\'') | L.Char('\\') + L.AnyChar).Repeat() + L.Char('\''); lex_li_string1 = lexb.DefineLexeme("li_string1", l_string1); L l_string = L.Char('\"') + (L.NotChar('\"') | L.Char('\\') + L.AnyChar).Repeat() + LexicalRule.Char('\"'); lex_li_string = lexb.DefineLexeme("li_string", l_string); // Operators and delimiters lex_op_and = lexb.DefineLexeme("op_&", L.Char('&')); lex_op_andAlso = lexb.DefineLexeme("op_&&", L.Literal("&&")); lex_op_andAssign = lexb.DefineLexeme("op_+=", L.Literal("+=")); lex_op_assign = lexb.DefineLexeme("op_=", L.Char('=')); lex_op_colon = lexb.DefineLexeme("op_:", L.Char(':')); lex_op_comma = lexb.DefineLexeme("op_,", L.Char(',')); lex_op_decrement = lexb.DefineLexeme("op_--", L.Literal("--")); lex_op_divide = lexb.DefineLexeme("op_/", L.Char('/')); lex_op_divideAssign = lexb.DefineLexeme("op_/=", L.Literal("/=")); lex_op_dot = lexb.DefineLexeme("op_.", L.Char('.')); lex_op_equal = lexb.DefineLexeme("op_==", L.Literal("==")); lex_op_greater = lexb.DefineLexeme("op_>", L.Char('>')); lex_op_greaterEqual = lexb.DefineLexeme("op_>=", L.Literal(">=")); lex_op_increment = lexb.DefineLexeme("op_++", L.Literal("++")); lex_op_inverse = lexb.DefineLexeme("op_~", L.Char('~')); lex_op_leftBrace = lexb.DefineLexeme("op_{", L.Char('{')); lex_op_leftBracket = lexb.DefineLexeme("op_[", L.Char('[')); lex_op_leftParenthesis = lexb.DefineLexeme("op_(", L.Char('(')); lex_op_less = lexb.DefineLexeme("op_<", L.Char('<')); lex_op_lessEqual = lexb.DefineLexeme("op_<=", L.Literal("<=")); lex_op_minus = lexb.DefineLexeme("op_-", L.Char('-')); lex_op_minusAssign = lexb.DefineLexeme("op_-=", L.Literal("-=")); lex_op_mod = lexb.DefineLexeme("op_%", L.Char('%')); lex_op_modAssign = lexb.DefineLexeme("op_%=", L.Literal("%=")); lex_op_multiply = lexb.DefineLexeme("op_*", L.Char('*')); lex_op_mutiplyAssign = lexb.DefineLexeme("op_*=", L.Literal("*=")); lex_op_not = lexb.DefineLexeme("op_!", L.Char('!')); lex_op_notEqual = lexb.DefineLexeme("op_!=", L.Literal("!=")); lex_op_or = lexb.DefineLexeme("op_|", L.Char('|')); lex_op_orAssign = lexb.DefineLexeme("op_|=", L.Literal("|=")); lex_op_orElse = lexb.DefineLexeme("op_||", L.Literal("||")); lex_op_plus = lexb.DefineLexeme("op_+", L.Char('+')); lex_op_plusAssign = lexb.DefineLexeme("op_+=", L.Literal("+=")); lex_op_lambda = lexb.DefineLexeme("op_=>", L.Literal("=>")); lex_op_question = lexb.DefineLexeme("op_?", L.Char('?')); lex_op_rightBrace = lexb.DefineLexeme("op_}", L.Char('}')); lex_op_rightBracket = lexb.DefineLexeme("op_]", L.Char(']')); lex_op_rightParenthesis = lexb.DefineLexeme("op_)", L.Char(')')); lex_op_semicolon = lexb.DefineLexeme("op_;", L.Char(';')); lex_op_shiftLeft = lexb.DefineLexeme("op_<<", L.Literal("<<")); lex_op_shiftLeftAssign = lexb.DefineLexeme("op_<<=", L.Literal("<<=")); lex_op_shiftRight = lexb.DefineLexeme("op_>>", L.Literal(">>")); lex_op_shiftRightAssign = lexb.DefineLexeme("op_>>=", L.Literal(">>=")); lex_op_xor = lexb.DefineLexeme("op_^", L.Char('^')); lex_op_xorAssign = lexb.DefineLexeme("op_^=", L.Literal("^=")); lex_op_null = lexb.DefineLexeme("op_??", L.Literal("??")); }