public void GetAllTokenByAutomata_能够获取到大段文字的Token流() { var whiteSpace = AutomataTokenizerState.ForEnd(' '.MatchCurrentPosition(), "space"); whiteSpace.NextStates.Add(whiteSpace); _nameWithCommentState.NextStates.Add(whiteSpace); const string buffer = "abc def ggg /*fdsafdsf fff */ llll a/**/a "; var res = AutomataTokenizer.GetAllTokenByAutomata(_nameWithCommentState, buffer).Rights(); var enumerable = res as Token[] ?? res.ToArray(); enumerable.Select(item => item.TokenType).ToArray().ShouldMatchObject(new[] { "name", "space", "name", "space", "name", "space", "comment", "space", "name", "space", "name", "comment", "name", "space", }); enumerable.Select(item => item.Lexeme).ToArray().ShouldMatchObject(new[] { "abc", " ", "def", " ", "ggg", " ", "/*fdsafdsf fff */", " ", "llll", " ", "a", "/**/", "a", " ", }); }
public AutomataTokenizer_Test() { var nameEndState = AutomataTokenizerState.ForEnd(char.IsLetterOrDigit, "name"); nameEndState.NextStates.Add(nameEndState); _nameState = AutomataTokenizerState.ForBegin(new List <AutomataTokenizerState> { AutomataTokenizerState.ForEnd(char.IsLetter, "name", new List <AutomataTokenizerState> { nameEndState }), }); var commentEnd = AutomataTokenizerState.ForMiddle('*'.MatchCurrentPosition(), new List <AutomataTokenizerState> { AutomataTokenizerState.ForEnd('/'.MatchCurrentPosition(), "comment"), }); var commentBody = AutomataTokenizerState.ForMiddle(c => true, new List <AutomataTokenizerState> { commentEnd, // commentBody }); commentBody.NextStates.Add(commentBody); _nameWithCommentState = AutomataTokenizerState.ForBegin(new List <AutomataTokenizerState> { AutomataTokenizerState.ForEnd(char.IsLetter, "name", new List <AutomataTokenizerState> { nameEndState }), AutomataTokenizerState.ForMiddle('/'.MatchCurrentPosition(), new List <AutomataTokenizerState> { AutomataTokenizerState.ForMiddle('*'.MatchCurrentPosition(), new List <AutomataTokenizerState> { // 状态机是按顺序遍历的,假如将 body 放在前面,就不会回溯了 commentEnd, commentBody, }), }) }); _certainState = AutomataTokenizerState.ForBegin(new List <AutomataTokenizerState> { AutomataTokenizerState.ForMiddle('1'.MatchCurrentPosition(), new List <AutomataTokenizerState> { AutomataTokenizerState.ForMiddle('2'.MatchCurrentPosition(), new List <AutomataTokenizerState> { AutomataTokenizerState.ForEnd('3'.MatchCurrentPosition(), "certain") }) }) }); }