internal TokenInfo(RegularExpression definition, Lexicon lexicon, Lexer state, Token tag) { Lexicon = lexicon; Definition = definition; State = state; Tag = tag; }
public void CompactCharSetTest() { Lexicon lexicon = new Lexicon(); Lexer global = lexicon.Lexer; Lexer keywords = global.CreateSubLexer(); Lexer xml = keywords.CreateSubLexer(); var lettersCategories = new[] { UnicodeCategory.LetterNumber, UnicodeCategory.LowercaseLetter, UnicodeCategory.ModifierLetter, UnicodeCategory.OtherLetter, UnicodeCategory.TitlecaseLetter, UnicodeCategory.UppercaseLetter}; var RE_IDCHAR = RE.CharsOf(c => lettersCategories.Contains(Char.GetUnicodeCategory(c))); var ID = global.DefineToken(RE_IDCHAR.Concat( (RE_IDCHAR | RE.Range('0', '9')).Many())); var NUM = global.DefineToken(RE.Range('0', '9').Many1()); var WHITESPACE = global.DefineToken(RE.Symbol(' ').Many()); var IF = keywords.DefineToken(RE.Literal("if")); var ELSE = keywords.DefineToken(RE.Literal("else")); var XMLNS = xml.DefineToken(RE.Literal("xmlns")); var scannerInfo = lexicon.CreateScannerInfo(); scannerInfo.CurrentLexerIndex = xml.Index; Scanner s = new Scanner(scannerInfo); string source = "xmlns 你好吗1 123 蘏臦囧綗 ABCD if"; SourceReader sr = new SourceReader(new StringReader(source)); s.SetSource(sr); s.SetTriviaTokens(WHITESPACE.Index); var l1 = s.Read(); Assert.AreEqual(XMLNS.Index, l1.TokenIndex); var l2 = s.Read(); Assert.AreEqual(ID.Index, l2.TokenIndex); var l3 = s.Read(); Assert.AreEqual(NUM.Index, l3.TokenIndex); var l4 = s.Read(); Assert.AreEqual(ID.Index, l4.TokenIndex); var l5 = s.Read(); Assert.AreEqual(ID.Index, l5.TokenIndex); var l6 = s.Read(); Assert.AreEqual(IF.Index, l6.TokenIndex); }
public void ParserFuncTest() { Lexicon test = new Lexicon(); var ID = test.DefaultLexer.DefineToken(RE.Range('a', 'z').Concat( (RE.Range('a', 'z') | RE.Range('0', '9')).Many())); var NUM = test.DefaultLexer.DefineToken(RE.Range('0', '9').Many1()); var GREATER = test.DefaultLexer.DefineToken(RE.Symbol('>')); var WHITESPACE = test.DefaultLexer.DefineToken(RE.Symbol(' ').Union(RE.Symbol('\t'))); var p1 = from i in ID from g in GREATER from g2 in GREATER.AsParser(true) from n in NUM select "A"; var p2 = from i in ID from g in GREATER from g2 in GREATER from n in NUM select "B"; var parser1 = p1 | p2; var info = test.CreateScannerInfo(); ForkableScannerBuilder builder = new ForkableScannerBuilder(info); builder.SetSkipTokens(WHITESPACE.Index); var errorManager = new CompilationErrorManager(); var context = new ParserContext(errorManager, 1, 2); context.DefineDefaultCompilationErrorInfo(0); ParserRunner<string> runner = new ParserRunner<string>(parser1, context); string source1 = "abc >> 123"; var sr1 = new SourceReader(new StringReader(source1)); ForkableScanner scanner1 = builder.Create(sr1); var result1 = runner.Run(scanner1); Assert.AreEqual("A", result1); Assert.AreEqual(0, errorManager.Errors.Count); string source2 = "abc > > 123"; var sr2 = new SourceReader(new StringReader(source2)); ForkableScanner scanner2 = builder.Create(sr2); var result2 = runner.Run(scanner2); Assert.AreEqual("B", result2); Assert.AreEqual(0, errorManager.Errors.Count); }
private void SetUpScanner() { var lexcion = new Lexicon(); var lexer = lexcion.Lexer; PLUS = lexer.DefineToken(RE.Symbol('+')); ASTERISK = lexer.DefineToken(RE.Symbol('*')); LEFT_PARENTHESIS = lexer.DefineToken(RE.Symbol('(')); RIGHT_PARENTHESIS = lexer.DefineToken(RE.Symbol(')')); NUMBER = lexer.DefineToken(RE.Range('0', '9').Many1(), "number"); SPACE = lexer.DefineToken(RE.Symbol(' ').Many1()); m_scannerInfo = lexcion.CreateScannerInfo(); }
internal Lexer(Lexicon lexicon, int index, Lexer baseLexer) { Children = new List<Lexer>(); Lexicon = lexicon; BaseLexer = baseLexer; m_tokens = new List<TokenInfo>(); Index = index; if (baseLexer == null) { Level = 0; } else { Level = baseLexer.Level + 1; baseLexer.Children.Add(this); } }
internal LexerState(Lexicon lexicon, int index, LexerState baseState) { Children = new List<LexerState>(); Lexicon = lexicon; BaseState = baseState; m_tokens = new List<Token>(); Index = index; if (baseState == null) { Level = 0; } else { Level = baseState.Level + 1; baseState.Children.Add(this); } }
public void ParserConvertTest() { Lexicon test = new Lexicon(); var ID = test.Lexer.DefineToken(RE.Range('a', 'z').Concat( (RE.Range('a', 'z') | RE.Range('0', '9')).Many())); var NUM = test.Lexer.DefineToken(RE.Range('0', '9').Many1()); var GREATER = test.Lexer.DefineToken(RE.Symbol('>')); var WHITESPACE = test.Lexer.DefineToken(RE.Symbol(' ').Union(RE.Symbol('\t'))); var p1 = from i in ID from g in GREATER from g2 in GREATER from n in NUM select 1; var parser1 = p1.Convert<float>(); var info = test.CreateScannerInfo(); ForkableScannerBuilder builder = new ForkableScannerBuilder(info); builder.SetTriviaTokens(WHITESPACE.Index); var errorManager = new CompilationErrorManager(); var el = errorManager.CreateErrorList(); var context = new ParserContext(errorManager, 1, 2); context.DefineDefaultCompilationErrorInfo(0); context.ErrorList = el; ParserRunner<float> runner = new ParserRunner<float>(parser1, context); string source1 = "abc >> 123"; var sr1 = new SourceReader(new StringReader(source1)); ForkableScanner scanner1 = builder.Create(sr1); var result1 = runner.Run(scanner1); Assert.AreEqual(1.0f, result1); Assert.AreEqual(0, el.Count); }
public void SkipTokenTest() { Lexicon lexicon = new Lexicon(); Lexer global = lexicon.Lexer; Lexer keywords = global.CreateSubLexer(); Lexer xml = keywords.CreateSubLexer(); var ID = global.DefineToken(RE.Range('a', 'z').Concat( (RE.Range('a', 'z') | RE.Range('0', '9')).Many())); var NUM = global.DefineToken(RE.Range('0', '9').Many1()); var WHITESPACE = global.DefineToken(RE.Symbol(' ').Many()); var ERROR = global.DefineToken(RE.Range(Char.MinValue, (char)255)); var IF = keywords.DefineToken(RE.Literal("if")); var ELSE = keywords.DefineToken(RE.Literal("else")); var XMLNS = xml.DefineToken(RE.Literal("xmlns")); ScannerInfo info = lexicon.CreateScannerInfo(); PeekableScanner scanner = new PeekableScanner(info); string source = "asdf04a 1107 else Z if vvv xmlns 772737"; StringReader sr = new StringReader(source); scanner.SetSource(new SourceReader(sr)); scanner.SetTriviaTokens(WHITESPACE.Index, ERROR.Index); info.CurrentLexerIndex = xml.Index; Lexeme l1 = scanner.Read(); Assert.AreEqual(ID.Index, l1.TokenIndex); Assert.AreEqual("asdf04a", l1.Value.Content); Assert.AreEqual(0, l1.PrefixTrivia.Count); Lexeme l2 = scanner.Read(); Assert.AreEqual(NUM.Index, l2.TokenIndex); Assert.AreEqual("1107", l2.Value.Content); Assert.AreEqual(1, l2.PrefixTrivia.Count); Lexeme l3 = scanner.Read(); Assert.AreEqual(ELSE.Index, l3.TokenIndex); Assert.AreEqual("else", l3.Value.Content); Assert.AreEqual(1, l2.PrefixTrivia.Count); Lexeme l4 = scanner.Read(); Assert.AreEqual(IF.Index, l4.TokenIndex); Assert.AreEqual("if", l4.Value.Content); Assert.AreEqual(3, l4.PrefixTrivia.Count); int p1 = scanner.Peek(); Assert.AreEqual(ID.Index, p1); int p2 = scanner.Peek2(); int p3 = scanner.Peek(3); int peof = scanner.Peek(4); Assert.AreEqual(info.EndOfStreamTokenIndex, peof); Lexeme l6 = scanner.Read(); Lexeme l7 = scanner.Read(); Assert.AreEqual(XMLNS.Index, l7.TokenIndex); Lexeme l8 = scanner.Read(); Assert.AreEqual(NUM.Index, l8.TokenIndex); Lexeme leof = scanner.Read(); Assert.AreEqual(info.EndOfStreamTokenIndex, leof.TokenIndex); Assert.AreEqual(leof.Value.Span.StartLocation.CharIndex, leof.Value.Span.EndLocation.CharIndex); Assert.AreEqual(source.Length, leof.Value.Span.StartLocation.CharIndex); }
public void ScannerTest() { Lexicon lexicon = new Lexicon(); Lexer global = lexicon.Lexer; Lexer keywords = global.CreateSubLexer(); Lexer xml = keywords.CreateSubLexer(); var ID = global.DefineToken(RE.Range('a', 'z').Concat( (RE.Range('a', 'z') | RE.Range('0', '9')).Many())); var NUM = global.DefineToken(RE.Range('0', '9').Many1()); var WHITESPACE = global.DefineToken(RE.Symbol(' ').Many()); var ERROR = global.DefineToken(RE.Range(Char.MinValue, (char)255)); var IF = keywords.DefineToken(RE.Literal("if")); var ELSE = keywords.DefineToken(RE.Literal("else")); var XMLNS = xml.DefineToken(RE.Literal("xmlns")); ScannerInfo info = lexicon.CreateScannerInfo(); PeekableScanner scanner = new PeekableScanner(info); string source = "asdf04a 1107 else Z if vvv xmlns 772737"; StringReader sr = new StringReader(source); scanner.SetSource(new SourceReader(sr)); Lexeme l1 = scanner.Read(); Assert.AreEqual(ID.Index, l1.TokenIndex); Assert.AreEqual("asdf04a", l1.Value.Content); Assert.AreEqual(0, l1.Value.Span.StartLocation.Column); Assert.AreEqual(6, l1.Value.Span.EndLocation.Column); Lexeme l2 = scanner.Read(); Assert.AreEqual(WHITESPACE.Index, l2.TokenIndex); Assert.AreEqual(" ", l2.Value.Content); Lexeme l3 = scanner.Read(); Assert.AreEqual(NUM.Index, l3.TokenIndex); Assert.AreEqual("1107", l3.Value.Content); Lexeme l4 = scanner.Read(); Assert.AreEqual(WHITESPACE.Index, l4.TokenIndex); Lexeme l5 = scanner.Read(); Assert.AreEqual(ID.Index, l5.TokenIndex); int p1 = scanner.Peek(); Assert.AreEqual(WHITESPACE.Index, p1); int p2 = scanner.Peek2(); Assert.AreEqual(ERROR.Index, p2); int p3 = scanner.Peek(3); Assert.AreEqual(WHITESPACE.Index, p3); int p4 = scanner.Peek(4); Assert.AreEqual(ID.Index, p4); int p5 = scanner.Peek(5); Assert.AreEqual(WHITESPACE.Index, p5); Lexeme l6 = scanner.Read(); Lexeme l7 = scanner.Read(); Assert.AreEqual(ERROR.Index, l7.TokenIndex); int p3_2 = scanner.Peek(); Assert.AreEqual(p3, p3_2); Lexeme l8 = scanner.Read(); // whitespace Lexeme l9 = scanner.Read(); // ID:if Lexeme l10 = scanner.Read(); // whitespace Lexeme l11 = scanner.Read(); // ID:vvv Lexeme l12 = scanner.Read(); // whitespace Lexeme l13 = scanner.Read(); // ID:xmlns Lexeme l14 = scanner.Read(); // whitespace Lexeme l15 = scanner.Read(); // NUM:772737 Lexeme leof = scanner.Read(); // eof Assert.AreEqual(info.EndOfStreamTokenIndex, leof.TokenIndex); Assert.AreEqual(leof.Value.Span.StartLocation.CharIndex, leof.Value.Span.EndLocation.CharIndex); Assert.AreEqual(source.Length, leof.Value.Span.StartLocation.CharIndex); Lexeme leof2 = scanner.Read(); //after eof, should return eof again Assert.AreEqual(info.EndOfStreamTokenIndex, leof2.TokenIndex); Assert.AreEqual(leof.Value.Span.StartLocation.CharIndex, leof2.Value.Span.StartLocation.CharIndex); }
public void RegExToDFATest() { //var RE_IF = RE.Literal("if"); //var RE_ELSE = RE.Literal("else"); var RE_ID = RE.Range('a', 'z').Concat( (RE.Range('a', 'z') | RE.Range('0', '9')).Many()); //var RE_NUM = RE.Range('0', '9').Many1(); //var RE_ERROR = RE.Range(Char.MinValue, (char)255); Lexicon lexicon = new Lexicon(); var ID = lexicon.Lexer.DefineToken(RE_ID); NFAConverter nfaConverter = new NFAConverter(lexicon.CreateCompactCharSetManager()); DFAModel D_ID = DFAModel.Create(lexicon); //verify state 0 var state0 = D_ID.States[0]; Assert.AreEqual(3, state0.OutEdges.Count); foreach (var edge in state0.OutEdges) { Assert.AreEqual(0, edge.TargetState.Index); } //verify initialization state var state1 = D_ID.States[1]; foreach (var edge in state1.OutEdges) { if (edge.Symbol == 1) //a..z { Assert.IsTrue(edge.TargetState.Index > 0); } else { Assert.AreEqual(0, edge.TargetState.Index); } } }
public void LexerStateToDFATest() { Lexicon lexicon = new Lexicon(); Lexer global = lexicon.Lexer; Lexer keywords = global.CreateSubLexer(); Lexer xml = keywords.CreateSubLexer(); var ID = global.DefineToken(RE.Range('a', 'z').Concat( (RE.Range('a', 'z') | RE.Range('0', '9')).Many())); var NUM = global.DefineToken(RE.Range('0', '9').Many1()); var ERROR = global.DefineToken(RE.Range(Char.MinValue, (char)255)); var IF = keywords.DefineToken(RE.Literal("if")); var ELSE = keywords.DefineToken(RE.Literal("else")); var XMLNS = xml.DefineToken(RE.Literal("xmlns")); DFAModel dfa = DFAModel.Create(lexicon); CompressedTransitionTable tc = CompressedTransitionTable.Compress(dfa); ScannerInfo si = lexicon.CreateScannerInfo(); FiniteAutomationEngine engine = new FiniteAutomationEngine(si.TransitionTable, si.CharClassTable); engine.InputString("if"); Assert.AreEqual(ID.Index, si.GetTokenIndex(engine.CurrentState)); engine.Reset(); engine.InputString("12345"); Assert.AreEqual(NUM.Index, si.GetTokenIndex(engine.CurrentState)); engine.Reset(); engine.InputString("asdf12dd"); Assert.AreEqual(ID.Index, si.GetTokenIndex(engine.CurrentState)); engine.Reset(); engine.InputString("A"); Assert.AreEqual(ERROR.Index, si.GetTokenIndex(engine.CurrentState)); engine.Reset(); engine.InputString("AAA"); Assert.IsTrue(engine.IsAtStoppedState); engine.Reset(); engine.InputString("if "); Assert.IsTrue(engine.IsAtStoppedState); engine.Reset(); si.CurrentLexerIndex = keywords.Index; engine.InputString("if"); Assert.AreEqual(IF.Index, si.GetTokenIndex(engine.CurrentState)); engine.Reset(); engine.InputString("else"); Assert.AreEqual(ELSE.Index, si.GetTokenIndex(engine.CurrentState)); engine.Reset(); engine.InputString("xmlns"); Assert.AreEqual(ID.Index, si.GetTokenIndex(engine.CurrentState)); engine.Reset(); si.CurrentLexerIndex = xml.Index; engine.InputString("if"); Assert.AreEqual(IF.Index, si.GetTokenIndex(engine.CurrentState)); engine.Reset(); engine.InputString("xml"); Assert.IsFalse(engine.IsAtStoppedState); engine.Reset(); engine.InputString("xmlns"); Assert.AreEqual(XMLNS.Index, si.GetTokenIndex(engine.CurrentState)); ; }
public void ForkableScannerTest() { Lexicon lexicon = new Lexicon(); var A = lexicon.Lexer.DefineToken(RE.Range('a', 'z')); ScannerInfo si = lexicon.CreateScannerInfo(); string source = "abcdefghijklmnopqrstuvwxyz"; ForkableScannerBuilder fsBuilder = new ForkableScannerBuilder(si); ForkableScanner fscanner = fsBuilder.Create(new SourceReader(new StringReader(source))); var l1 = fscanner.Read(); Assert.AreEqual("a", l1.Value.Content); var l2 = fscanner.Read(); Assert.AreEqual("b", l2.Value.Content); //fork ForkableScanner fscanner2 = fscanner.Fork(); for (int i = 2; i <= 4; i++) { var l = fscanner.Read(); Assert.AreEqual(source[i].ToString(), l.Value.Content); } for (int i = 2; i <= 5; i++) { var l = fscanner2.Read(); Assert.AreEqual(source[i].ToString(), l.Value.Content); } ForkableScanner fscanner3 = fscanner.Fork(); var l5a = fscanner.Read(); var l5b = fscanner3.Read(); Assert.AreEqual(source[5].ToString(), l5a.Value.Content); Assert.AreEqual(source[5].ToString(), l5b.Value.Content); var l6b = fscanner2.Read(); var l6a = fscanner3.Read(); Assert.AreEqual(source[6].ToString(), l6a.Value.Content); Assert.AreEqual(source[6].ToString(), l6b.Value.Content); var l7a = fscanner2.Read(); for (int i = 7; i < 9; i++) { var l = fscanner3.Read(); Assert.AreEqual(source[i].ToString(), l.Value.Content); } }
public void ErrorRecoveryTest() { Lexicon lexicon = new Lexicon(); Lexer global = lexicon.Lexer; var ID = global.DefineToken(RE.Range('a', 'z').Concat( (RE.Range('a', 'z') | RE.Range('0', '9')).Many())); var NUM = global.DefineToken(RE.Range('0', '9').Many1()); var WHITESPACE = global.DefineToken(RE.Symbol(' ').Many()); ScannerInfo info = lexicon.CreateScannerInfo(); PeekableScanner scanner = new PeekableScanner(info); string source = "asdf04a 1107 !@#$!@ Z if vvv xmlns 772737"; StringReader sr = new StringReader(source); scanner.SetSource(new SourceReader(sr)); scanner.SetTriviaTokens(WHITESPACE.Index); scanner.RecoverErrors = true; CompilationErrorManager em = new CompilationErrorManager(); em.DefineError(101, 0, CompilationStage.Scanning, "Invalid token: {0}"); scanner.ErrorManager = em; scanner.LexicalErrorId = 101; Lexeme l1 = scanner.Read(); Assert.AreEqual(ID.Index, l1.TokenIndex); Lexeme l2 = scanner.Read(); Assert.AreEqual(NUM.Index, l2.TokenIndex); Assert.AreEqual(0, em.Errors.Count); Lexeme l3 = scanner.Read(); Assert.AreEqual(ID.Index, l3.TokenIndex); Assert.IsTrue(em.Errors.Count > 0); Assert.AreEqual(101, em.Errors[0].Info.Id); }
internal Lexer(Lexicon lexicon, int index) : this(lexicon, index, null) { }
public LexerState CreateSubState() { return(Lexicon.DefineLexerState(this)); }
internal LexerState(Lexicon lexicon, int index) : this(lexicon, index, null) { }