示例#1
0
        public void GetAllTokenByAutomata_能够获取到大段文字的Token流()
        {
            var whiteSpace = AutomataTokenizerState.ForEnd(' '.MatchCurrentPosition(), "space");

            whiteSpace.NextStates.Add(whiteSpace);

            _nameWithCommentState.NextStates.Add(whiteSpace);

            const string buffer = "abc def ggg /*fdsafdsf  fff  */ llll a/**/a ";
            var          res    = AutomataTokenizer.GetAllTokenByAutomata(_nameWithCommentState, buffer).Rights();

            var enumerable = res as Token[] ?? res.ToArray();

            enumerable.Select(item => item.TokenType).ToArray().ShouldMatchObject(new[]
            {
                "name", "space", "name", "space", "name", "space",
                "comment", "space", "name", "space", "name",
                "comment", "name", "space",
            });
            enumerable.Select(item => item.Lexeme).ToArray().ShouldMatchObject(new[]
            {
                "abc", " ", "def", " ", "ggg", " ", "/*fdsafdsf  fff  */",
                " ", "llll", " ", "a", "/**/", "a", " ",
            });
        }
示例#2
0
        public void 自动机_在无法识别Decimal_Arrow2_Arrow3时能够报错(string buffer, int expectCurrentIdx)
        {
            var autoMata = AutomataTokenizerState.ForBegin(Ex1Automata.DecimalOrArrow2OrArrow3);

            AutomataTokenizer.GetByAutomata(autoMata, buffer, 0, out _)
            .ShouldBeLeft(exception => { exception.CurrentIdx.ShouldBe(expectCurrentIdx); });
        }
示例#3
0
        public void 自动机_能够识别Decimal_Arrow2_Arrow3(string buffer, int expectEndAt, string expectedTokenType)
        {
            var autoMata = AutomataTokenizerState.ForBegin(Ex1Automata.DecimalOrArrow2OrArrow3);

            var res = AutomataTokenizer.GetByAutomata(autoMata, buffer, 0, out var endIdx);

            res.ShouldBeRight(token => token.TokenType.ShouldBe(expectedTokenType));
            endIdx.ShouldBe(expectEndAt);
        }
示例#4
0
        public void 自动机_无法识别Symbol时能够报错(string buffer, int expectCurrentIdx)
        {
            var autoMata = AutomataTokenizerState.ForBegin(new List <AutomataTokenizerState> {
                Ex1Automata.Symbol
            });

            AutomataTokenizer.GetByAutomata(autoMata, buffer, 0, out _)
            .ShouldBeLeft(exception => { exception.CurrentIdx.ShouldBe(expectCurrentIdx); });
        }
 public WrongTokenException(string message, string buffer, int tokenBegin, int currentIdx,
                            AutomataTokenizerState currentState)
     : base(message)
 {
     CurrentIdx   = currentIdx;
     CurrentState = currentState;
     Buffer       = buffer;
     TokenBegin   = tokenBegin;
 }
示例#6
0
        public AutomataTokenizer_Test()
        {
            var nameEndState = AutomataTokenizerState.ForEnd(char.IsLetterOrDigit, "name");

            nameEndState.NextStates.Add(nameEndState);

            _nameState = AutomataTokenizerState.ForBegin(new List <AutomataTokenizerState>
            {
                AutomataTokenizerState.ForEnd(char.IsLetter, "name", new List <AutomataTokenizerState> {
                    nameEndState
                }),
            });

            var commentEnd = AutomataTokenizerState.ForMiddle('*'.MatchCurrentPosition(),
                                                              new List <AutomataTokenizerState>
            {
                AutomataTokenizerState.ForEnd('/'.MatchCurrentPosition(), "comment"),
            });

            var commentBody = AutomataTokenizerState.ForMiddle(c => true, new List <AutomataTokenizerState>
            {
                commentEnd,
                // commentBody
            });

            commentBody.NextStates.Add(commentBody);

            _nameWithCommentState = AutomataTokenizerState.ForBegin(new List <AutomataTokenizerState>
            {
                AutomataTokenizerState.ForEnd(char.IsLetter, "name", new List <AutomataTokenizerState> {
                    nameEndState
                }),
                AutomataTokenizerState.ForMiddle('/'.MatchCurrentPosition(), new List <AutomataTokenizerState>
                {
                    AutomataTokenizerState.ForMiddle('*'.MatchCurrentPosition(),
                                                     new List <AutomataTokenizerState>
                    {
                        // 状态机是按顺序遍历的,假如将 body 放在前面,就不会回溯了
                        commentEnd,
                        commentBody,
                    }),
                })
            });

            _certainState = AutomataTokenizerState.ForBegin(new List <AutomataTokenizerState>
            {
                AutomataTokenizerState.ForMiddle('1'.MatchCurrentPosition(), new List <AutomataTokenizerState>
                {
                    AutomataTokenizerState.ForMiddle('2'.MatchCurrentPosition(), new List <AutomataTokenizerState>
                    {
                        AutomataTokenizerState.ForEnd('3'.MatchCurrentPosition(), "certain")
                    })
                })
            });
        }
示例#7
0
        public void 自动机_能够识别Symbol(string buffer, int expectEndAt)
        {
            var autoMata = AutomataTokenizerState.ForBegin(new List <AutomataTokenizerState> {
                Ex1Automata.Symbol
            });

            var res = AutomataTokenizer.GetByAutomata(autoMata, buffer, 0, out var endIdx);

            res.ShouldBeRight(token => token.TokenType.ShouldBe(Ex1Automata.SymbolToken));
            endIdx.ShouldBe(expectEndAt);
        }