public void GetAllTokenByAutomata_能够获取到大段文字的Token流() { var whiteSpace = AutomataTokenizerState.ForEnd(' '.MatchCurrentPosition(), "space"); whiteSpace.NextStates.Add(whiteSpace); _nameWithCommentState.NextStates.Add(whiteSpace); const string buffer = "abc def ggg /*fdsafdsf fff */ llll a/**/a "; var res = AutomataTokenizer.GetAllTokenByAutomata(_nameWithCommentState, buffer).Rights(); var enumerable = res as Token[] ?? res.ToArray(); enumerable.Select(item => item.TokenType).ToArray().ShouldMatchObject(new[] { "name", "space", "name", "space", "name", "space", "comment", "space", "name", "space", "name", "comment", "name", "space", }); enumerable.Select(item => item.Lexeme).ToArray().ShouldMatchObject(new[] { "abc", " ", "def", " ", "ggg", " ", "/*fdsafdsf fff */", " ", "llll", " ", "a", "/**/", "a", " ", }); }
public void 自动机_在无法识别Decimal_Arrow2_Arrow3时能够报错(string buffer, int expectCurrentIdx) { var autoMata = AutomataTokenizerState.ForBegin(Ex1Automata.DecimalOrArrow2OrArrow3); AutomataTokenizer.GetByAutomata(autoMata, buffer, 0, out _) .ShouldBeLeft(exception => { exception.CurrentIdx.ShouldBe(expectCurrentIdx); }); }
public void 自动机_能够识别Decimal_Arrow2_Arrow3(string buffer, int expectEndAt, string expectedTokenType) { var autoMata = AutomataTokenizerState.ForBegin(Ex1Automata.DecimalOrArrow2OrArrow3); var res = AutomataTokenizer.GetByAutomata(autoMata, buffer, 0, out var endIdx); res.ShouldBeRight(token => token.TokenType.ShouldBe(expectedTokenType)); endIdx.ShouldBe(expectEndAt); }
public void 自动机_无法识别Symbol时能够报错(string buffer, int expectCurrentIdx) { var autoMata = AutomataTokenizerState.ForBegin(new List <AutomataTokenizerState> { Ex1Automata.Symbol }); AutomataTokenizer.GetByAutomata(autoMata, buffer, 0, out _) .ShouldBeLeft(exception => { exception.CurrentIdx.ShouldBe(expectCurrentIdx); }); }
public WrongTokenException(string message, string buffer, int tokenBegin, int currentIdx, AutomataTokenizerState currentState) : base(message) { CurrentIdx = currentIdx; CurrentState = currentState; Buffer = buffer; TokenBegin = tokenBegin; }
public AutomataTokenizer_Test() { var nameEndState = AutomataTokenizerState.ForEnd(char.IsLetterOrDigit, "name"); nameEndState.NextStates.Add(nameEndState); _nameState = AutomataTokenizerState.ForBegin(new List <AutomataTokenizerState> { AutomataTokenizerState.ForEnd(char.IsLetter, "name", new List <AutomataTokenizerState> { nameEndState }), }); var commentEnd = AutomataTokenizerState.ForMiddle('*'.MatchCurrentPosition(), new List <AutomataTokenizerState> { AutomataTokenizerState.ForEnd('/'.MatchCurrentPosition(), "comment"), }); var commentBody = AutomataTokenizerState.ForMiddle(c => true, new List <AutomataTokenizerState> { commentEnd, // commentBody }); commentBody.NextStates.Add(commentBody); _nameWithCommentState = AutomataTokenizerState.ForBegin(new List <AutomataTokenizerState> { AutomataTokenizerState.ForEnd(char.IsLetter, "name", new List <AutomataTokenizerState> { nameEndState }), AutomataTokenizerState.ForMiddle('/'.MatchCurrentPosition(), new List <AutomataTokenizerState> { AutomataTokenizerState.ForMiddle('*'.MatchCurrentPosition(), new List <AutomataTokenizerState> { // 状态机是按顺序遍历的,假如将 body 放在前面,就不会回溯了 commentEnd, commentBody, }), }) }); _certainState = AutomataTokenizerState.ForBegin(new List <AutomataTokenizerState> { AutomataTokenizerState.ForMiddle('1'.MatchCurrentPosition(), new List <AutomataTokenizerState> { AutomataTokenizerState.ForMiddle('2'.MatchCurrentPosition(), new List <AutomataTokenizerState> { AutomataTokenizerState.ForEnd('3'.MatchCurrentPosition(), "certain") }) }) }); }
public void 自动机_能够识别Symbol(string buffer, int expectEndAt) { var autoMata = AutomataTokenizerState.ForBegin(new List <AutomataTokenizerState> { Ex1Automata.Symbol }); var res = AutomataTokenizer.GetByAutomata(autoMata, buffer, 0, out var endIdx); res.ShouldBeRight(token => token.TokenType.ShouldBe(Ex1Automata.SymbolToken)); endIdx.ShouldBe(expectEndAt); }