public void GetByAutomata_应该能够识别名字() { var res = AutomataTokenizer.GetByAutomata(_nameState, "name1", 0, out var end); res.ShouldMatchRight(new Token("name1", "name", 0, 5)); end.ShouldBe(5); }
public void 自动机_在无法识别Decimal_Arrow2_Arrow3时能够报错(string buffer, int expectCurrentIdx) { var autoMata = AutomataTokenizerState.ForBegin(Ex1Automata.DecimalOrArrow2OrArrow3); AutomataTokenizer.GetByAutomata(autoMata, buffer, 0, out _) .ShouldBeLeft(exception => { exception.CurrentIdx.ShouldBe(expectCurrentIdx); }); }
public void GetAllTokenByAutomata_能够获取到大段文字的Token流() { var whiteSpace = AutomataTokenizerState.ForEnd(' '.MatchCurrentPosition(), "space"); whiteSpace.NextStates.Add(whiteSpace); _nameWithCommentState.NextStates.Add(whiteSpace); const string buffer = "abc def ggg /*fdsafdsf fff */ llll a/**/a "; var res = AutomataTokenizer.GetAllTokenByAutomata(_nameWithCommentState, buffer).Rights(); var enumerable = res as Token[] ?? res.ToArray(); enumerable.Select(item => item.TokenType).ToArray().ShouldMatchObject(new[] { "name", "space", "name", "space", "name", "space", "comment", "space", "name", "space", "name", "comment", "name", "space", }); enumerable.Select(item => item.Lexeme).ToArray().ShouldMatchObject(new[] { "abc", " ", "def", " ", "ggg", " ", "/*fdsafdsf fff */", " ", "llll", " ", "a", "/**/", "a", " ", }); }
public void GetByAutomata_能够识别单个字符作为的名字() { AutomataTokenizer.GetByAutomata(_nameState, "a", 0, out var end) .ShouldMatchRight(new Token("a", "name", 0, 1)); end.ShouldBe(1); }
public void GetByAutomata_可以识别目前识别的子串() { AutomataTokenizer.GetByAutomata(_nameState, "name1.1", 0, out var end) .ShouldMatchRight(new Token("name1", "name", 0, 5)); end.ShouldBe(5); }
public void 自动机_能够识别Decimal_Arrow2_Arrow3(string buffer, int expectEndAt, string expectedTokenType) { var autoMata = AutomataTokenizerState.ForBegin(Ex1Automata.DecimalOrArrow2OrArrow3); var res = AutomataTokenizer.GetByAutomata(autoMata, buffer, 0, out var endIdx); res.ShouldBeRight(token => token.TokenType.ShouldBe(expectedTokenType)); endIdx.ShouldBe(expectEndAt); }
public void 自动机_无法识别Symbol时能够报错(string buffer, int expectCurrentIdx) { var autoMata = AutomataTokenizerState.ForBegin(new List <AutomataTokenizerState> { Ex1Automata.Symbol }); AutomataTokenizer.GetByAutomata(autoMata, buffer, 0, out _) .ShouldBeLeft(exception => { exception.CurrentIdx.ShouldBe(expectCurrentIdx); }); }
public void GetAllTokenByAutomata_能够获取Token流() { var res = AutomataTokenizer.GetAllTokenByAutomata(_nameWithCommentState, "/*aaaaa*/abc"); res.Rights().ToArray().ShouldMatchObject(new[] { new Token("/*aaaaa*/", "comment", 0, 9), new Token("abc", "name", 9, 12), }); }
public void 自动机_能够识别Symbol(string buffer, int expectEndAt) { var autoMata = AutomataTokenizerState.ForBegin(new List <AutomataTokenizerState> { Ex1Automata.Symbol }); var res = AutomataTokenizer.GetByAutomata(autoMata, buffer, 0, out var endIdx); res.ShouldBeRight(token => token.TokenType.ShouldBe(Ex1Automata.SymbolToken)); endIdx.ShouldBe(expectEndAt); }
public static Either <List <WrongTokenException>, List <Token> > TokenizeBuffer(string buffer) { return(AutomataTokenizer.GetAllTokenByAutomata(Ex1Automata.ForBegin, buffer) .Seprate() .Match <Either <List <WrongTokenException>, List <Token> > >( Right: r => r.ExcludeTokenType(Ex1Automata.SpaceToken) .TransformTokenTypeMatched(Ex1Automata.SymbolToken, GetSymbolConvertSet(), TokenTypes.Identifier) .ToList(), Left: l => l )); }
public void GetByAutomata_不能识别的时候应返回异常() { AutomataTokenizer.GetByAutomata(_nameState, "123", 0, out var nextBeginIdx) .ShouldBeLeft(exception => { exception.Buffer.ShouldBe("123"); exception.TokenBegin.ShouldBe(0); exception.CurrentIdx.ShouldBe(0); exception.CurrentState.ShouldBe(_nameState); }); nextBeginIdx.ShouldBe(1); }
public void GetByAutomata_在包含多个终止状态时能够正确识别_comment在前() { const string buffer = "/*asddg*/aaa"; var res = AutomataTokenizer.GetByAutomata(_nameWithCommentState, buffer, 0, out var end); res.ShouldMatchRight(new Token("/*asddg*/", "comment", 0, 9)); end.ShouldBe(9); res = AutomataTokenizer.GetByAutomata(_nameWithCommentState, buffer, end, out end); res.ShouldMatchRight(new Token("aaa", "name", 9, 12)); end.ShouldBe(12); }
public void GetByAutomata_在包含多个终止状态时能够正确识别_夹心() { const string buffer = "aaa/*asddg*/bbb"; var res = AutomataTokenizer.GetByAutomata(_nameWithCommentState, buffer, 0, out var end); res.ShouldMatchRight(new Token("aaa", "name", 0, 3)); end.ShouldBe(3); res = AutomataTokenizer.GetByAutomata(_nameWithCommentState, buffer, end, out end); res.ShouldMatchRight(new Token("/*asddg*/", "comment", 3, 12)); end.ShouldBe(12); res = AutomataTokenizer.GetByAutomata(_nameWithCommentState, buffer, end, out end); res.ShouldMatchRight(new Token("bbb", "name", 12, 15)); end.ShouldBe(15); }
public void GetAllTokenByAutomata_能够获取所有的错误信息() { const string buffer = ".name/*asdf"; var res = AutomataTokenizer.GetAllTokenByAutomata(_nameWithCommentState, buffer).ToArray(); res.Seprate().ShouldBeLeft(l => l.Count.ShouldBe(2)); res[0].ShouldBeLeft(l => { l.TokenBegin.ShouldBe(0); l.CurrentIdx.ShouldBe(0); }); res[1].ShouldMatchRight(new Token("name", "name", 1, 5)); res[2].ShouldBeLeft(l => { l.TokenBegin.ShouldBe(5); l.CurrentIdx.ShouldBe(11); }); }
public void GetByAutomata_在无法识别时能够返回含有终点位置的异常(string buffer, int expectEndAt, int expectNextBegin, char expectStateAt) { // 使用有限状态机进行遍历不应该有回溯操作,所以这里应该直接返回已经识别的Token AutomataTokenizer.GetByAutomata(_certainState, buffer, 0, out var nextBeginIdx) .ShouldBeLeft(exception => { exception.CurrentIdx.ShouldBe(expectEndAt); exception.TokenBegin.ShouldBe(0); if (expectStateAt == 0) { exception.CurrentState.Asserter.ShouldBeNull(); } else { exception.CurrentState.Asserter(expectStateAt).ShouldBeTrue(); } }); nextBeginIdx.ShouldBe(expectNextBegin); }