コード例 #1
0
        public void GetAllTokenByAutomata_能够获取到大段文字的Token流()
        {
            var whiteSpace = AutomataTokenizerState.ForEnd(' '.MatchCurrentPosition(), "space");

            whiteSpace.NextStates.Add(whiteSpace);

            _nameWithCommentState.NextStates.Add(whiteSpace);

            const string buffer = "abc def ggg /*fdsafdsf  fff  */ llll a/**/a ";
            var          res    = AutomataTokenizer.GetAllTokenByAutomata(_nameWithCommentState, buffer).Rights();

            var enumerable = res as Token[] ?? res.ToArray();

            enumerable.Select(item => item.TokenType).ToArray().ShouldMatchObject(new[]
            {
                "name", "space", "name", "space", "name", "space",
                "comment", "space", "name", "space", "name",
                "comment", "name", "space",
            });
            enumerable.Select(item => item.Lexeme).ToArray().ShouldMatchObject(new[]
            {
                "abc", " ", "def", " ", "ggg", " ", "/*fdsafdsf  fff  */",
                " ", "llll", " ", "a", "/**/", "a", " ",
            });
        }
コード例 #2
0
        public AutomataTokenizer_Test()
        {
            var nameEndState = AutomataTokenizerState.ForEnd(char.IsLetterOrDigit, "name");

            nameEndState.NextStates.Add(nameEndState);

            _nameState = AutomataTokenizerState.ForBegin(new List <AutomataTokenizerState>
            {
                AutomataTokenizerState.ForEnd(char.IsLetter, "name", new List <AutomataTokenizerState> {
                    nameEndState
                }),
            });

            var commentEnd = AutomataTokenizerState.ForMiddle('*'.MatchCurrentPosition(),
                                                              new List <AutomataTokenizerState>
            {
                AutomataTokenizerState.ForEnd('/'.MatchCurrentPosition(), "comment"),
            });

            var commentBody = AutomataTokenizerState.ForMiddle(c => true, new List <AutomataTokenizerState>
            {
                commentEnd,
                // commentBody
            });

            commentBody.NextStates.Add(commentBody);

            _nameWithCommentState = AutomataTokenizerState.ForBegin(new List <AutomataTokenizerState>
            {
                AutomataTokenizerState.ForEnd(char.IsLetter, "name", new List <AutomataTokenizerState> {
                    nameEndState
                }),
                AutomataTokenizerState.ForMiddle('/'.MatchCurrentPosition(), new List <AutomataTokenizerState>
                {
                    AutomataTokenizerState.ForMiddle('*'.MatchCurrentPosition(),
                                                     new List <AutomataTokenizerState>
                    {
                        // 状态机是按顺序遍历的,假如将 body 放在前面,就不会回溯了
                        commentEnd,
                        commentBody,
                    }),
                })
            });

            _certainState = AutomataTokenizerState.ForBegin(new List <AutomataTokenizerState>
            {
                AutomataTokenizerState.ForMiddle('1'.MatchCurrentPosition(), new List <AutomataTokenizerState>
                {
                    AutomataTokenizerState.ForMiddle('2'.MatchCurrentPosition(), new List <AutomataTokenizerState>
                    {
                        AutomataTokenizerState.ForEnd('3'.MatchCurrentPosition(), "certain")
                    })
                })
            });
        }