public void ParserFuncTest() { Lexicon test = new Lexicon(); var ID = test.DefaultLexer.DefineToken(RE.Range('a', 'z').Concat( (RE.Range('a', 'z') | RE.Range('0', '9')).Many())); var NUM = test.DefaultLexer.DefineToken(RE.Range('0', '9').Many1()); var GREATER = test.DefaultLexer.DefineToken(RE.Symbol('>')); var WHITESPACE = test.DefaultLexer.DefineToken(RE.Symbol(' ').Union(RE.Symbol('\t'))); var p1 = from i in ID from g in GREATER from g2 in GREATER.AsParser(l => l.PrefixTrivia.Count == 0) from n in NUM select "A"; var p2 = from i in ID from g in GREATER from g2 in GREATER from n in NUM select "B"; var parser1 = p1 | p2; var info = test.CreateScannerInfo(); ForkableScannerBuilder builder = new ForkableScannerBuilder(info); builder.SetTriviaTokens(WHITESPACE.Index); var errorManager = new CompilationErrorManager(); var context = new ParserContext(errorManager, 1, 2); context.DefineDefaultCompilationErrorInfo(0); ParserRunner <string> runner = new ParserRunner <string>(parser1, context); string source1 = "abc >> 123"; var sr1 = new SourceReader(new StringReader(source1)); ForkableScanner scanner1 = builder.Create(sr1); var result1 = runner.Run(scanner1); Assert.AreEqual("A", result1); Assert.AreEqual(0, errorManager.Errors.Count); string source2 = "abc > > 123"; var sr2 = new SourceReader(new StringReader(source2)); ForkableScanner scanner2 = builder.Create(sr2); var result2 = runner.Run(scanner2); Assert.AreEqual("B", result2); Assert.AreEqual(0, errorManager.Errors.Count); }
public void ThrowExceptionAtEosTest() { Lexicon lexicon = new Lexicon(); Lexer global = lexicon.Lexer; var ID = global.DefineToken(RE.Range('a', 'z').Concat( (RE.Range('a', 'z') | RE.Range('0', '9')).Many())); var NUM = global.DefineToken(RE.Range('0', '9').Many1()); var WHITESPACE = global.DefineToken(RE.Symbol(' ').Many()); var ERROR = global.DefineToken(RE.Range(Char.MinValue, (char)255)); ScannerInfo info = lexicon.CreateScannerInfo(); Scanner scanner = new Scanner(info); string source = "aaa bbb ccc"; StringReader sr = new StringReader(source); scanner.SetSource(new SourceReader(sr)); scanner.SetTriviaTokens(WHITESPACE.Index, ERROR.Index); scanner.ThrowAtReadingAfterEndOfStream = true; Lexeme l1 = scanner.Read(); Assert.AreEqual(ID.Index, l1.TokenIndex); Assert.AreEqual("aaa", l1.ToContentString()); Assert.AreEqual(0, l1.PrefixTrivia.Count); Lexeme l2 = scanner.Read(); Assert.AreEqual(ID.Index, l2.TokenIndex); Assert.AreEqual("bbb", l2.ToContentString()); Assert.AreEqual(1, l2.PrefixTrivia.Count); Lexeme l3 = scanner.Read(); Assert.AreEqual(ID.Index, l3.TokenIndex); Assert.AreEqual("ccc", l3.ToContentString()); Assert.AreEqual(1, l3.PrefixTrivia.Count); Lexeme leof = scanner.Read(); Assert.AreEqual(info.EndOfStreamTokenIndex, leof.TokenIndex); Assert.AreEqual(leof.Span.StartLocation.CharIndex, leof.Span.EndLocation.CharIndex); Assert.AreEqual(source.Length, leof.Span.StartLocation.CharIndex); try { Lexeme leof2 = scanner.Read(); Assert.Fail("The Read above should throw an exception"); } catch (ScannerException) { Assert.True(true); } }
protected override void OnDefineLexer(Lexicon lexicon, ICollection <Token> triviaTokens) { var lex = lexicon.Lexer; //lex LEFTPH = lex.DefineToken(RE.Symbol('(')); RIGHTPH = lex.DefineToken(RE.Symbol(')')); COMMA = lex.DefineToken(RE.Symbol(',')); LETTER = lex.DefineToken(RE.Range('a', 'z') | RE.Range('A', 'Z'), "ID"); }
public void ForkableScannerTest() { Lexicon lexicon = new Lexicon(); var A = lexicon.DefaultLexer.DefineToken(RE.Range('a', 'z')); ScannerInfo si = lexicon.CreateScannerInfo(); string source = "abcdefghijklmnopqrstuvwxyz"; ForkableScannerBuilder fsBuilder = new ForkableScannerBuilder(si); ForkableScanner fscanner = fsBuilder.Create(new SourceReader(new StringReader(source))); var l1 = fscanner.Read(); Assert.AreEqual("a", l1.Value); var l2 = fscanner.Read(); Assert.AreEqual("b", l2.Value); //fork ForkableScanner fscanner2 = fscanner.Fork(); for (int i = 2; i <= 4; i++) { var l = fscanner.Read(); Assert.AreEqual(source[i].ToString(), l.Value); } for (int i = 2; i <= 5; i++) { var l = fscanner2.Read(); Assert.AreEqual(source[i].ToString(), l.Value); } ForkableScanner fscanner3 = fscanner.Fork(); var l5a = fscanner.Read(); var l5b = fscanner3.Read(); Assert.AreEqual(source[5].ToString(), l5a.Value); Assert.AreEqual(source[5].ToString(), l5b.Value); var l6b = fscanner2.Read(); var l6a = fscanner3.Read(); Assert.AreEqual(source[6].ToString(), l6a.Value); Assert.AreEqual(source[6].ToString(), l6b.Value); var l7a = fscanner2.Read(); for (int i = 7; i < 9; i++) { var l = fscanner3.Read(); Assert.AreEqual(source[i].ToString(), l.Value); } }
protected override void OnDefineLexer(Lexicon lexicon, ICollection <Token> triviaTokens) { var lexer = lexicon.Lexer; PLUS = lexer.DefineToken(RE.Symbol('+')); ASTERISK = lexer.DefineToken(RE.Symbol('*')); LEFT_PARENTHESIS = lexer.DefineToken(RE.Symbol('(')); RIGHT_PARENTHESIS = lexer.DefineToken(RE.Symbol(')')); NUMBER = lexer.DefineToken(RE.Range('0', '9').Many1(), "number"); SPACE = lexer.DefineToken(RE.Symbol(' ').Many1()); triviaTokens.Add(SPACE); }
protected override void OnDefineLexer(Lexicon lexicon, ICollection <Token> triviaTokens) { ID = lexicon.Lexer.DefineToken(RE.Range('a', 'z').Concat( (RE.Range('a', 'z') | RE.Range('0', '9')).Many()), "ID"); NUM = lexicon.Lexer.DefineToken(RE.Range('0', '9').Many1(), "NUM"); GREATER = lexicon.Lexer.DefineToken(RE.Symbol('>')); LESS = lexicon.Lexer.DefineToken(RE.Symbol('<')); SEMICOLON = lexicon.Lexer.DefineToken(RE.Symbol(';')); var WHITESPACE = lexicon.Lexer.DefineToken(RE.Symbol(' ').Union(RE.Symbol('\t')), "white space"); triviaTokens.Add(WHITESPACE); }
public void ErrorRecoveryTest() { Lexicon lexicon = new Lexicon(); Lexer global = lexicon.Lexer; var ID = global.DefineToken(RE.Range('a', 'z').Concat( (RE.Range('a', 'z') | RE.Range('0', '9')).Many())); var NUM = global.DefineToken(RE.Range('0', '9').Many1()); var WHITESPACE = global.DefineToken(RE.Symbol(' ').Many()); ScannerInfo info = lexicon.CreateScannerInfo(); Scanner scanner = new Scanner(info); string source = "asdf04a 1107 !@#$!@ Z if vvv xmlns 772737"; StringReader sr = new StringReader(source); scanner.SetSource(new SourceReader(sr)); scanner.SetTriviaTokens(WHITESPACE.Index); scanner.RecoverErrors = true; CompilationErrorManager em = new CompilationErrorManager(); em.DefineError(101, 0, CompilationStage.Scanning, "Invalid token: {0}"); var el = em.CreateErrorList(); scanner.ErrorList = el; scanner.LexicalErrorId = 101; Lexeme l1 = scanner.Read(); Assert.AreEqual(ID.Index, l1.TokenIndex); Lexeme l2 = scanner.Read(); Assert.AreEqual(NUM.Index, l2.TokenIndex); Assert.AreEqual(0, el.Count); Lexeme l3 = scanner.Read(); Assert.AreEqual(ID.Index, l3.TokenIndex); Assert.IsTrue(el.Count > 0); Assert.AreEqual(101, el[0].Info.Id); }
public void ParserCastTest() { Lexicon test = new Lexicon(); var ID = test.Lexer.DefineToken(RE.Range('a', 'z').Concat( (RE.Range('a', 'z') | RE.Range('0', '9')).Many())); var NUM = test.Lexer.DefineToken(RE.Range('0', '9').Many1()); var GREATER = test.Lexer.DefineToken(RE.Symbol('>')); var WHITESPACE = test.Lexer.DefineToken(RE.Symbol(' ').Union(RE.Symbol('\t'))); var p1 = from i in ID from g in GREATER from g2 in GREATER from n in NUM select "hello"; var parser1 = p1.TryCast <object>(); var info = test.CreateScannerInfo(); ForkableScannerBuilder builder = new ForkableScannerBuilder(info); builder.SetTriviaTokens(WHITESPACE.Index); var errorManager = new CompilationErrorManager(); var context = new ParserContext(errorManager, 1, 2); context.DefineDefaultCompilationErrorInfo(0); var el = errorManager.CreateErrorList(); context.ErrorList = el; ParserRunner <object> runner = new ParserRunner <object>(parser1, context); string source1 = "abc >> 123"; var sr1 = new SourceReader(new StringReader(source1)); ForkableScanner scanner1 = builder.Create(sr1); var result1 = runner.Run(scanner1); Assert.AreEqual("hello", result1); Assert.AreEqual(0, el.Count); }
public void RegExToDFATest() { //var RE_IF = RE.Literal("if"); //var RE_ELSE = RE.Literal("else"); var RE_ID = RE.Range('a', 'z').Concat( (RE.Range('a', 'z') | RE.Range('0', '9')).Many()); //var RE_NUM = RE.Range('0', '9').Many1(); //var RE_ERROR = RE.Range(Char.MinValue, (char)255); Lexicon lexicon = new Lexicon(); var ID = lexicon.DefaultLexer.DefineToken(RE_ID); NFAConverter nfaConverter = new NFAConverter(lexicon.CreateCompactCharSetManager()); DFAModel D_ID = DFAModel.Create(lexicon); //verify state 0 var state0 = D_ID.States[0]; Assert.AreEqual(3, state0.OutEdges.Count); foreach (var edge in state0.OutEdges) { Assert.AreEqual(0, edge.TargetState.Index); } //verify initialization state var state1 = D_ID.States[1]; foreach (var edge in state1.OutEdges) { if (edge.Symbol == 1) //a..z { Assert.IsTrue(edge.TargetState.Index > 0); } else { Assert.AreEqual(0, edge.TargetState.Index); } } }
public void ParserDriverConflictTest() { Lexicon test = new Lexicon(); var X = test.Lexer.DefineToken(RE.Symbol('x')); var PLUS = test.Lexer.DefineToken(RE.Symbol('+')); var ASTERISK = test.Lexer.DefineToken(RE.Symbol('*')); var scannerinfo = test.CreateScannerInfo(); Production <object> E = new Production <object>(), T = new Production <object>(); E.Rule = (from e1 in E from plus in PLUS from e2 in E select(object)(((int)e1) + ((int)e2))) | (from e1 in E from mul in ASTERISK from e2 in E select(object)(((int)e1) * ((int)e2))) | T; T.Rule = from x in X select(object) 2; ProductionInfoManager pim = new ProductionInfoManager(E.SuffixedBy(Grammar.Eos())); LR0Model lr0 = new LR0Model(pim); lr0.BuildModel(); string dot = lr0.ToString(); TransitionTable tt = TransitionTable.Create(lr0, scannerinfo); ParserEngine driver = new ParserEngine(tt, new SyntaxErrors()); ForkableScannerBuilder builder = new ForkableScannerBuilder(scannerinfo); var em = new CompilationErrorManager(); var el = em.CreateErrorList(); builder.ErrorList = el; var scanner = builder.Create(new SourceReader(new StringReader("x+x*x"))); var z1 = scanner.Read(); driver.Input(z1); var z2 = scanner.Read(); driver.Input(z2); var z3 = scanner.Read(); driver.Input(z3); var z4 = scanner.Read(); driver.Input(z4); var z5 = scanner.Read(); driver.Input(z5); var z6 = scanner.Read(); driver.Input(z6); Assert.AreEqual(0, driver.CurrentStackCount); Assert.AreEqual(2, driver.AcceptedCount); var results = new[] { (int)driver.GetResult(0, null), (int)driver.GetResult(1, null) }; Assert.IsTrue(results.Contains(8)); Assert.IsTrue(results.Contains(6)); }
protected override void OnDefineLexer(Compilers.Scanners.Lexicon lexicon, ICollection <Token> skippedTokens) { var lettersCategories = new HashSet <UnicodeCategory>() { UnicodeCategory.LetterNumber, UnicodeCategory.LowercaseLetter, UnicodeCategory.ModifierLetter, UnicodeCategory.OtherLetter, UnicodeCategory.TitlecaseLetter, UnicodeCategory.UppercaseLetter }; RE RE_IdChar = null; RE RE_SpaceChar = null; RE RE_InputChar = null; RE RE_NotSlashOrAsterisk = null; CharSetExpressionBuilder charSetBuilder = new CharSetExpressionBuilder(); charSetBuilder.DefineCharSet(c => lettersCategories.Contains(Char.GetUnicodeCategory(c)), re => RE_IdChar = re | RE.Symbol('_')); charSetBuilder.DefineCharSet(c => Char.GetUnicodeCategory(c) == UnicodeCategory.SpaceSeparator, re => RE_SpaceChar = re); charSetBuilder.DefineCharSet(c => "\u000D\u000A\u0085\u2028\u2029".IndexOf(c) < 0, re => RE_InputChar = re); charSetBuilder.DefineCharSet(c => "/*".IndexOf(c) < 0, re => RE_NotSlashOrAsterisk = re); charSetBuilder.Build(); var lex = lexicon.Lexer; //keywords K_CLASS = lex.DefineToken(RE.Literal("class")); K_PUBLIC = lex.DefineToken(RE.Literal("public")); K_STATIC = lex.DefineToken(RE.Literal("static")); K_VOID = lex.DefineToken(RE.Literal("void")); K_MAIN = lex.DefineToken(RE.Literal("Main")); K_STRING = lex.DefineToken(RE.Literal("string")); K_RETURN = lex.DefineToken(RE.Literal("return")); K_INT = lex.DefineToken(RE.Literal("int")); K_BOOL = lex.DefineToken(RE.Literal("bool")); K_IF = lex.DefineToken(RE.Literal("if")); K_ELSE = lex.DefineToken(RE.Literal("else")); K_WHILE = lex.DefineToken(RE.Literal("while")); K_SYSTEM = lex.DefineToken(RE.Literal("System")); K_CONSOLE = lex.DefineToken(RE.Literal("Console")); K_WRITELINE = lex.DefineToken(RE.Literal("WriteLine")); K_LENGTH = lex.DefineToken(RE.Literal("Length")); K_TRUE = lex.DefineToken(RE.Literal("true")); K_FALSE = lex.DefineToken(RE.Literal("false")); K_THIS = lex.DefineToken(RE.Literal("this")); K_NEW = lex.DefineToken(RE.Literal("new")); //id & literals ID = lex.DefineToken(RE_IdChar >> (RE_IdChar | RE.Range('0', '9')).Many(), "identifier"); INTEGER_LITERAL = lex.DefineToken(RE.Range('0', '9').Many1(), "integer literal"); //symbols LOGICAL_AND = lex.DefineToken(RE.Literal("&&")); LOGICAL_OR = lex.DefineToken(RE.Literal("||")); LOGICAL_NOT = lex.DefineToken(RE.Symbol('!')); LESS = lex.DefineToken(RE.Symbol('<')); GREATER = lex.DefineToken(RE.Symbol('>')); EQUAL = lex.DefineToken(RE.Literal("==")); ASSIGN = lex.DefineToken(RE.Symbol('=')); PLUS = lex.DefineToken(RE.Symbol('+')); MINUS = lex.DefineToken(RE.Symbol('-')); ASTERISK = lex.DefineToken(RE.Symbol('*')); SLASH = lex.DefineToken(RE.Symbol('/')); LEFT_PH = lex.DefineToken(RE.Symbol('(')); RIGHT_PH = lex.DefineToken(RE.Symbol(')')); LEFT_BK = lex.DefineToken(RE.Symbol('[')); RIGHT_BK = lex.DefineToken(RE.Symbol(']')); LEFT_BR = lex.DefineToken(RE.Symbol('{')); RIGHT_BR = lex.DefineToken(RE.Symbol('}')); COMMA = lex.DefineToken(RE.Symbol(',')); COLON = lex.DefineToken(RE.Symbol(':')); SEMICOLON = lex.DefineToken(RE.Symbol(';')); DOT = lex.DefineToken(RE.Symbol('.')); //skips WHITESPACE = lex.DefineToken(RE_SpaceChar | RE.CharSet("\u0009\u000B\u000C")); LINE_BREAKER = lex.DefineToken( RE.CharSet("\u000D\u000A\u0085\u2028\u2029") | RE.Literal("\r\n") ); var RE_DelimitedCommentSection = RE.Symbol('/') | (RE.Symbol('*').Many() >> RE_NotSlashOrAsterisk); COMMENT = lex.DefineToken( (RE.Literal("//") >> RE_InputChar.Many()) | (RE.Literal("/*") >> RE_DelimitedCommentSection.Many() >> RE.Symbol('*').Many1() >> RE.Symbol('/')), "comment"); skippedTokens.Add(WHITESPACE); skippedTokens.Add(LINE_BREAKER); skippedTokens.Add(COMMENT); }
public RegularExpression Concat(RegularExpression follow) { return(new ConcatenationExpression(this, follow)); }
public void WhereGrammaTest() { Lexicon test = new Lexicon(); var ID = test.Lexer.DefineToken(RE.Range('a', 'z').Concat( (RE.Range('a', 'z') | RE.Range('0', '9')).Many()), "ID"); var NUM = test.Lexer.DefineToken(RE.Range('0', '9').Many1(), "NUM"); var GREATER = test.Lexer.DefineToken(RE.Symbol('>')); var WHITESPACE = test.Lexer.DefineToken(RE.Symbol(' ').Union(RE.Symbol('\t')), "[ ]"); var p1 = from i in ID from g in GREATER from g2 in GREATER where Grammar.Check(g2.PrefixTrivia.Count == 0, 4, g2.Value.Span) from n in NUM select "A"; var p2 = from i in ID from g in GREATER from g2 in GREATER from n in NUM select "B"; var parser1 = p1 | p2; parser1.AmbiguityAggregator = (a, b) => a == "A" ? a : b; var info = test.CreateScannerInfo(); var errorManager = new CompilationErrorManager(); errorManager.DefineError(1, 0, CompilationStage.Parsing, "Unexpected token '{0}'"); errorManager.DefineError(2, 0, CompilationStage.Parsing, "Missing token '{0}'"); errorManager.DefineError(3, 0, CompilationStage.Parsing, "Syntax error"); errorManager.DefineError(4, 0, CompilationStage.Parsing, "White spaces between >> are not allowed"); var el = errorManager.CreateErrorList(); ProductionInfoManager pim = new ProductionInfoManager(parser1.SuffixedBy(Grammar.Eos())); LR0Model lr0 = new LR0Model(pim); lr0.BuildModel(); string dot = lr0.ToString(); TransitionTable tt = TransitionTable.Create(lr0, info); var errdef = new SyntaxErrors() { TokenUnexpectedId = 1, TokenMissingId = 2, OtherErrorId = 3 }; ParserEngine driver = new ParserEngine(tt, errdef); string source1 = "abc >> 123"; var sr1 = new SourceReader(new StringReader(source1)); Scanner scanner = new Scanner(info); scanner.SetTriviaTokens(WHITESPACE.Index); scanner.SetSource(sr1); Lexeme r; do { r = scanner.Read(); driver.Input(r); } while (!r.IsEndOfStream); Assert.AreEqual(1, driver.AcceptedCount); Assert.AreEqual("A", driver.GetResult(0, el)); Assert.AreEqual(0, el.Count); ParserEngine driver2 = new ParserEngine(tt, errdef); string source2 = "abc > > 123"; var sr2 = new SourceReader(new StringReader(source2)); scanner.SetSource(sr2); do { r = scanner.Read(); driver2.Input(r); } while (!r.IsEndOfStream); var el2 = errorManager.CreateErrorList(); Assert.AreEqual(1, driver2.AcceptedCount); Assert.AreEqual("B", driver2.GetResult(0, el2)); Assert.AreEqual(0, el2.Count); }
public static RegularExpression op_Concatenate(RegularExpression left, RegularExpression right) { return(new ConcatenationExpression(left, right)); }
public void CompactCharSetTest() { Lexicon lexicon = new Lexicon(); LexerState global = lexicon.DefaultLexer; LexerState keywords = global.CreateSubState(); LexerState xml = keywords.CreateSubState(); var lettersCategories = new[] { UnicodeCategory.LetterNumber, UnicodeCategory.LowercaseLetter, UnicodeCategory.ModifierLetter, UnicodeCategory.OtherLetter, UnicodeCategory.TitlecaseLetter, UnicodeCategory.UppercaseLetter }; var RE_IDCHAR = RE.CharsOf(c => lettersCategories.Contains(Char.GetUnicodeCategory(c))); var ID = global.DefineToken(RE_IDCHAR.Concat( (RE_IDCHAR | RE.Range('0', '9')).Many())); var NUM = global.DefineToken(RE.Range('0', '9').Many1()); var WHITESPACE = global.DefineToken(RE.Symbol(' ').Many()); var IF = keywords.DefineToken(RE.Literal("if")); var ELSE = keywords.DefineToken(RE.Literal("else")); var XMLNS = xml.DefineToken(RE.Literal("xmlns")); var scannerInfo = lexicon.CreateScannerInfo(); scannerInfo.LexerStateIndex = xml.Index; Scanner s = new Scanner(scannerInfo); string source = "xmlns 你好吗1 123 蘏臦囧綗 ABCD if"; SourceReader sr = new SourceReader(new StringReader(source)); s.SetSource(sr); s.SetTriviaTokens(WHITESPACE.Index); var l1 = s.Read(); Assert.AreEqual(XMLNS.Index, l1.TokenIndex); var l2 = s.Read(); Assert.AreEqual(ID.Index, l2.TokenIndex); var l3 = s.Read(); Assert.AreEqual(NUM.Index, l3.TokenIndex); var l4 = s.Read(); Assert.AreEqual(ID.Index, l4.TokenIndex); var l5 = s.Read(); Assert.AreEqual(ID.Index, l5.TokenIndex); var l6 = s.Read(); Assert.AreEqual(IF.Index, l6.TokenIndex); }
public void SkipTokenTest() { Lexicon lexicon = new Lexicon(); LexerState global = lexicon.DefaultLexer; LexerState keywords = global.CreateSubState(); LexerState xml = keywords.CreateSubState(); var ID = global.DefineToken(RE.Range('a', 'z').Concat( (RE.Range('a', 'z') | RE.Range('0', '9')).Many())); var NUM = global.DefineToken(RE.Range('0', '9').Many1()); var WHITESPACE = global.DefineToken(RE.Symbol(' ').Many()); var ERROR = global.DefineToken(RE.Range(Char.MinValue, (char)255)); var IF = keywords.DefineToken(RE.Literal("if")); var ELSE = keywords.DefineToken(RE.Literal("else")); var XMLNS = xml.DefineToken(RE.Literal("xmlns")); ScannerInfo info = lexicon.CreateScannerInfo(); PeekableScanner scanner = new PeekableScanner(info); string source = "asdf04a 1107 else Z if vvv xmlns 772737"; StringReader sr = new StringReader(source); scanner.SetSource(new SourceReader(sr)); scanner.SetTriviaTokens(WHITESPACE.Index, ERROR.Index); info.LexerStateIndex = xml.Index; Lexeme l1 = scanner.Read(); Assert.AreEqual(ID.Index, l1.TokenIndex); Assert.AreEqual("asdf04a", l1.Value); Assert.AreEqual(0, l1.PrefixTrivia.Count); Lexeme l2 = scanner.Read(); Assert.AreEqual(NUM.Index, l2.TokenIndex); Assert.AreEqual("1107", l2.Value); Assert.AreEqual(1, l2.PrefixTrivia.Count); Lexeme l3 = scanner.Read(); Assert.AreEqual(ELSE.Index, l3.TokenIndex); Assert.AreEqual("else", l3.Value); Assert.AreEqual(1, l2.PrefixTrivia.Count); Lexeme l4 = scanner.Read(); Assert.AreEqual(IF.Index, l4.TokenIndex); Assert.AreEqual("if", l4.Value); Assert.AreEqual(3, l4.PrefixTrivia.Count); int p1 = scanner.Peek(); Assert.AreEqual(ID.Index, p1); int p2 = scanner.Peek2(); int p3 = scanner.Peek(3); int peof = scanner.Peek(4); Assert.AreEqual(info.EndOfStreamTokenIndex, peof); Lexeme l6 = scanner.Read(); Lexeme l7 = scanner.Read(); Assert.AreEqual(XMLNS.Index, l7.TokenIndex); Lexeme l8 = scanner.Read(); Assert.AreEqual(NUM.Index, l8.TokenIndex); Lexeme leof = scanner.Read(); Assert.AreEqual(info.EndOfStreamTokenIndex, leof.TokenIndex); Assert.AreEqual(leof.Span.StartLocation.CharIndex, leof.Span.EndLocation.CharIndex); Assert.AreEqual(source.Length, leof.Span.StartLocation.CharIndex); }
public void MultipleLexerParsingTest() { Lexicon lexicon = new Lexicon(); Lexer global = lexicon.Lexer; Lexer keywords = global.CreateSubLexer(); var PROPERTY = global.DefineToken(RE.Literal("property")); var ID = global.DefineToken(RE.Range('a', 'z').Concat( (RE.Range('a', 'z') | RE.Range('0', '9')).Many()), "ID"); var NUM = global.DefineToken(RE.Range('0', '9').Many1(), "NUM"); var EQ = global.DefineToken(RE.Symbol('=')); var SEMICOLON = global.DefineToken(RE.Symbol(';')); var LB = global.DefineToken(RE.Symbol('{')); var RB = global.DefineToken(RE.Symbol('}')); var WHITESPACE = global.DefineToken(RE.Symbol(' ').Union(RE.Symbol('\t')), "[ ]"); var GET = keywords.DefineToken(RE.Literal("get")); var assignStatement = from id in ID from eq in EQ from value in NUM from st in SEMICOLON select id.Value + "=" + value.Value; var getDef = from _get in GET from lb in LB from statements in assignStatement.Many() from rb in RB select new GetDef { Statements = statements }; var propDef = from _prop in PROPERTY from id in ID from lb in LB from getdef in getDef from rb in RB select new PropDef { PropName = id.Value.Content, GetDef = getdef }; string source = "property get { get { get = 1; } }"; SourceReader sr = new SourceReader( new StringReader(source)); var info = lexicon.CreateScannerInfo(); Scanner scanner = new Scanner(info); scanner.SetTriviaTokens(WHITESPACE.Index); scanner.SetSource(sr); CompilationErrorManager errorManager = new CompilationErrorManager(); errorManager.DefineError(1, 0, CompilationStage.Parsing, "Unexpected token '{0}'"); errorManager.DefineError(2, 0, CompilationStage.Parsing, "Missing token '{0}'"); errorManager.DefineError(3, 0, CompilationStage.Parsing, "Syntax error"); ProductionInfoManager pim = new ProductionInfoManager(propDef.SuffixedBy(Grammar.Eos())); LR0Model lr0 = new LR0Model(pim); lr0.BuildModel(); string dot = lr0.ToString(); TransitionTable tt = TransitionTable.Create(lr0, info); SyntaxErrors errDef = new SyntaxErrors() { TokenUnexpectedId = 1, TokenMissingId = 2, OtherErrorId = 3 }; ParserEngine driver = new ParserEngine(tt, errDef); Lexeme r; do { r = scanner.Read(); driver.Input(r); } while (!r.IsEndOfStream); var el = errorManager.CreateErrorList(); var result = (PropDef)driver.GetResult(0, el); Assert.AreEqual(0, el.Count); Assert.AreEqual("get", result.PropName); Assert.AreEqual("get=1", result.GetDef.Statements.First()); }
public void ScannerTest() { Lexicon lexicon = new Lexicon(); LexerState global = lexicon.DefaultLexer; LexerState keywords = global.CreateSubState(); LexerState xml = keywords.CreateSubState(); var ID = global.DefineToken(RE.Range('a', 'z').Concat( (RE.Range('a', 'z') | RE.Range('0', '9')).Many())); var NUM = global.DefineToken(RE.Range('0', '9').Many1()); var WHITESPACE = global.DefineToken(RE.Symbol(' ').Many()); var ERROR = global.DefineToken(RE.Range(Char.MinValue, (char)255)); var IF = keywords.DefineToken(RE.Literal("if")); var ELSE = keywords.DefineToken(RE.Literal("else")); var XMLNS = xml.DefineToken(RE.Literal("xmlns")); ScannerInfo info = lexicon.CreateScannerInfo(); PeekableScanner scanner = new PeekableScanner(info); string source = "asdf04a 1107 else Z if vvv xmlns 772737"; StringReader sr = new StringReader(source); scanner.SetSource(new SourceReader(sr)); Lexeme l1 = scanner.Read(); Assert.AreEqual(ID.Index, l1.TokenIndex); Assert.AreEqual("asdf04a", l1.Value); Assert.AreEqual(0, l1.Span.StartLocation.Column); Assert.AreEqual(6, l1.Span.EndLocation.Column); Lexeme l2 = scanner.Read(); Assert.AreEqual(WHITESPACE.Index, l2.TokenIndex); Assert.AreEqual(" ", l2.Value); Lexeme l3 = scanner.Read(); Assert.AreEqual(NUM.Index, l3.TokenIndex); Assert.AreEqual("1107", l3.Value); Lexeme l4 = scanner.Read(); Assert.AreEqual(WHITESPACE.Index, l4.TokenIndex); Lexeme l5 = scanner.Read(); Assert.AreEqual(ID.Index, l5.TokenIndex); int p1 = scanner.Peek(); Assert.AreEqual(WHITESPACE.Index, p1); int p2 = scanner.Peek2(); Assert.AreEqual(ERROR.Index, p2); int p3 = scanner.Peek(3); Assert.AreEqual(WHITESPACE.Index, p3); int p4 = scanner.Peek(4); Assert.AreEqual(ID.Index, p4); int p5 = scanner.Peek(5); Assert.AreEqual(WHITESPACE.Index, p5); Lexeme l6 = scanner.Read(); Lexeme l7 = scanner.Read(); Assert.AreEqual(ERROR.Index, l7.TokenIndex); int p3_2 = scanner.Peek(); Assert.AreEqual(p3, p3_2); Lexeme l8 = scanner.Read(); // whitespace Lexeme l9 = scanner.Read(); // ID:if Lexeme l10 = scanner.Read(); // whitespace Lexeme l11 = scanner.Read(); // ID:vvv Lexeme l12 = scanner.Read(); // whitespace Lexeme l13 = scanner.Read(); // ID:xmlns Lexeme l14 = scanner.Read(); // whitespace Lexeme l15 = scanner.Read(); // NUM:772737 Lexeme leof = scanner.Read(); // eof Assert.AreEqual(info.EndOfStreamTokenIndex, leof.TokenIndex); Assert.AreEqual(leof.Span.StartLocation.CharIndex, leof.Span.EndLocation.CharIndex); Assert.AreEqual(source.Length, leof.Span.StartLocation.CharIndex); Lexeme leof2 = scanner.Read(); //after eof, should return eof again Assert.AreEqual(info.EndOfStreamTokenIndex, leof2.TokenIndex); Assert.AreEqual(leof.Span.StartLocation.CharIndex, leof2.Span.StartLocation.CharIndex); }
public void ParserDriverSimpleTest() { Lexicon test = new Lexicon(); var X = test.Lexer.DefineToken(RE.Symbol('x')); var PLUS = test.Lexer.DefineToken(RE.Symbol('+')); var scannerinfo = test.CreateScannerInfo(); Production <object> E = new Production <object>(), T = new Production <object>(); E.Rule = (from t in T from plus in PLUS from e in E select(object)(((int)t) + ((int)e))) | T; T.Rule = from x in X select(object) 1; ProductionInfoManager pim = new ProductionInfoManager(E.SuffixedBy(Grammar.Eos())); LR0Model lr0 = new LR0Model(pim); lr0.BuildModel(); string dot = lr0.ToString(); TransitionTable tt = TransitionTable.Create(lr0, scannerinfo); ParserEngine driver = new ParserEngine(tt, new SyntaxErrors() { TokenUnexpectedId = 1 }); ForkableScannerBuilder builder = new ForkableScannerBuilder(scannerinfo); var em = new CompilationErrorManager();; var el = em.CreateErrorList(); builder.ErrorList = el; var scanner = builder.Create(new SourceReader(new StringReader("x+x+x"))); var z1 = scanner.Read(); driver.Input(z1); var z2 = scanner.Read(); driver.Input(z2); var z3 = scanner.Read(); driver.Input(z3); var z4 = scanner.Read(); driver.Input(z4); var z5 = scanner.Read(); driver.Input(z5); var z6 = scanner.Read(); driver.Input(z6); Assert.AreEqual(0, driver.CurrentStackCount); Assert.AreEqual(1, driver.AcceptedCount); Assert.AreEqual(3, driver.GetResult(0, null)); }
public void ParserErrorRecoveryTest() { Lexicon binaryTreeSyntax = new Lexicon(); var lex = binaryTreeSyntax.Lexer; //lex Token LEFTPH = lex.DefineToken(RE.Symbol('(')); Token RIGHTPH = lex.DefineToken(RE.Symbol(')')); Token COMMA = lex.DefineToken(RE.Symbol(',')); Token LETTER = lex.DefineToken(RE.Range('a', 'z') | RE.Range('A', 'Z'), "ID"); //grammar Production <Node> NodeParser = new Production <Node>(); NodeParser.Rule = (from a in LETTER from _1 in LEFTPH from left in NodeParser from _2 in COMMA from right in NodeParser from _3 in RIGHTPH select new Node(a.Value.Content, left, right)) | Grammar.Empty <Node>(null); var builder = new ForkableScannerBuilder(binaryTreeSyntax.CreateScannerInfo()); const string correct = "A(B(,),C(,))"; string source = "A((B(,),C(,)"; SourceReader sr = new SourceReader( new StringReader(source)); var info = binaryTreeSyntax.CreateScannerInfo(); Scanner scanner = new Scanner(info); scanner.SetSource(sr); CompilationErrorManager errorManager = new CompilationErrorManager(); errorManager.DefineError(1, 0, CompilationStage.Parsing, "Unexpected token '{0}'"); errorManager.DefineError(2, 0, CompilationStage.Parsing, "Missing token '{0}'"); errorManager.DefineError(3, 0, CompilationStage.Parsing, "Invalid token found, did you mean '{0}' ?"); errorManager.DefineError(4, 0, CompilationStage.Parsing, "Syntax error"); ProductionInfoManager pim = new ProductionInfoManager(NodeParser.SuffixedBy(Grammar.Eos())); LR0Model lr0 = new LR0Model(pim); lr0.BuildModel(); string dot = lr0.ToString(); TransitionTable tt = TransitionTable.Create(lr0, info); SyntaxErrors errDef = new SyntaxErrors() { TokenUnexpectedId = 1, TokenMissingId = 2, OtherErrorId = 4, TokenMistakeId = 3 }; ParserEngine driver = new ParserEngine(tt, errDef); Lexeme r; do { r = scanner.Read(); driver.Input(r); } while (!r.IsEndOfStream); var result = driver.GetResult(0, errorManager.CreateErrorList()); ; }
public void ProductionInfoManagerTest() { Lexicon test = new Lexicon(); var A = test.Lexer.DefineToken(RE.Symbol('a')); var D = test.Lexer.DefineToken(RE.Symbol('d')); var C = test.Lexer.DefineToken(RE.Symbol('c')); Production <object> X = new Production <object>(), Y = new Production <object>(), Z = new Production <object>(); Z.Rule = (from d in D select d as object) | (from x in X from y in Y from z in Z select new { x, y, z } as object); Y.Rule = Grammar.Empty(new object()) | (from c in C select c as object); X.Rule = Y | (from a in A select a as object); ProductionInfoManager pis = new ProductionInfoManager(Z); var xInfo = pis.GetInfo(X); var yInfo = pis.GetInfo(Y); var zInfo = pis.GetInfo(Z); Assert.IsTrue(xInfo.IsNullable, "X should be nullable"); Assert.IsTrue(yInfo.IsNullable, "Y should be nullable"); Assert.IsFalse(zInfo.IsNullable, "Z should not be nullable"); Assert.AreEqual(xInfo.First.Count, 2); Assert.AreEqual(xInfo.Follow.Count, 3); Assert.IsTrue(xInfo.First.Contains(A.AsTerminal())); Assert.IsTrue(xInfo.First.Contains(C.AsTerminal())); Assert.IsTrue(xInfo.Follow.Contains(A.AsTerminal())); Assert.IsTrue(xInfo.Follow.Contains(C.AsTerminal())); Assert.IsTrue(xInfo.Follow.Contains(D.AsTerminal())); Assert.AreEqual(yInfo.First.Count, 1); Assert.AreEqual(yInfo.Follow.Count, 3); Assert.IsTrue(yInfo.First.Contains(C.AsTerminal())); Assert.IsTrue(yInfo.Follow.Contains(A.AsTerminal())); Assert.IsTrue(yInfo.Follow.Contains(C.AsTerminal())); Assert.IsTrue(yInfo.Follow.Contains(D.AsTerminal())); Assert.AreEqual(zInfo.First.Count, 3); Assert.AreEqual(zInfo.Follow.Count, 0); Assert.IsTrue(zInfo.First.Contains(A.AsTerminal())); Assert.IsTrue(zInfo.First.Contains(C.AsTerminal())); Assert.IsTrue(zInfo.First.Contains(D.AsTerminal())); }
public void LexerStateToDFATest() { Lexicon lexicon = new Lexicon(); LexerState global = lexicon.DefaultLexer; LexerState keywords = global.CreateSubState(); LexerState xml = keywords.CreateSubState(); var ID = global.DefineToken(RE.Range('a', 'z').Concat( (RE.Range('a', 'z') | RE.Range('0', '9')).Many())); var NUM = global.DefineToken(RE.Range('0', '9').Many1()); var ERROR = global.DefineToken(RE.Range(Char.MinValue, (char)255)); var IF = keywords.DefineToken(RE.Literal("if")); var ELSE = keywords.DefineToken(RE.Literal("else")); var XMLNS = xml.DefineToken(RE.Literal("xmlns")); DFAModel dfa = DFAModel.Create(lexicon); CompressedTransitionTable tc = CompressedTransitionTable.Compress(dfa); ScannerInfo si = lexicon.CreateScannerInfo(); FiniteAutomationEngine engine = new FiniteAutomationEngine(si.TransitionTable, si.CharClassTable); engine.InputString("if"); Assert.AreEqual(ID.Index, si.GetTokenIndex(engine.CurrentState)); engine.Reset(); engine.InputString("12345"); Assert.AreEqual(NUM.Index, si.GetTokenIndex(engine.CurrentState)); engine.Reset(); engine.InputString("asdf12dd"); Assert.AreEqual(ID.Index, si.GetTokenIndex(engine.CurrentState)); engine.Reset(); engine.InputString("A"); Assert.AreEqual(ERROR.Index, si.GetTokenIndex(engine.CurrentState)); engine.Reset(); engine.InputString("AAA"); Assert.IsTrue(engine.IsAtStoppedState); engine.Reset(); engine.InputString("if "); Assert.IsTrue(engine.IsAtStoppedState); engine.Reset(); si.LexerStateIndex = keywords.Index; engine.InputString("if"); Assert.AreEqual(IF.Index, si.GetTokenIndex(engine.CurrentState)); engine.Reset(); engine.InputString("else"); Assert.AreEqual(ELSE.Index, si.GetTokenIndex(engine.CurrentState)); engine.Reset(); engine.InputString("xmlns"); Assert.AreEqual(ID.Index, si.GetTokenIndex(engine.CurrentState)); engine.Reset(); si.LexerStateIndex = xml.Index; engine.InputString("if"); Assert.AreEqual(IF.Index, si.GetTokenIndex(engine.CurrentState)); engine.Reset(); engine.InputString("xml"); Assert.IsFalse(engine.IsAtStoppedState); engine.Reset(); engine.InputString("xmlns"); Assert.AreEqual(XMLNS.Index, si.GetTokenIndex(engine.CurrentState)); ; }
public Token DefineToken(RegularExpression regex) { return(DefineToken(regex, null)); }