Example #1
0
        public void GetByAutomata_应该能够识别名字()
        {
            var res = AutomataTokenizer.GetByAutomata(_nameState, "name1", 0, out var end);

            res.ShouldMatchRight(new Token("name1", "name", 0, 5));
            end.ShouldBe(5);
        }
Example #2
0
        public void 自动机_在无法识别Decimal_Arrow2_Arrow3时能够报错(string buffer, int expectCurrentIdx)
        {
            var autoMata = AutomataTokenizerState.ForBegin(Ex1Automata.DecimalOrArrow2OrArrow3);

            AutomataTokenizer.GetByAutomata(autoMata, buffer, 0, out _)
            .ShouldBeLeft(exception => { exception.CurrentIdx.ShouldBe(expectCurrentIdx); });
        }
Example #3
0
        public void GetAllTokenByAutomata_能够获取到大段文字的Token流()
        {
            var whiteSpace = AutomataTokenizerState.ForEnd(' '.MatchCurrentPosition(), "space");

            whiteSpace.NextStates.Add(whiteSpace);

            _nameWithCommentState.NextStates.Add(whiteSpace);

            const string buffer = "abc def ggg /*fdsafdsf  fff  */ llll a/**/a ";
            var          res    = AutomataTokenizer.GetAllTokenByAutomata(_nameWithCommentState, buffer).Rights();

            var enumerable = res as Token[] ?? res.ToArray();

            enumerable.Select(item => item.TokenType).ToArray().ShouldMatchObject(new[]
            {
                "name", "space", "name", "space", "name", "space",
                "comment", "space", "name", "space", "name",
                "comment", "name", "space",
            });
            enumerable.Select(item => item.Lexeme).ToArray().ShouldMatchObject(new[]
            {
                "abc", " ", "def", " ", "ggg", " ", "/*fdsafdsf  fff  */",
                " ", "llll", " ", "a", "/**/", "a", " ",
            });
        }
Example #4
0
        public void GetByAutomata_能够识别单个字符作为的名字()
        {
            AutomataTokenizer.GetByAutomata(_nameState, "a", 0, out var end)
            .ShouldMatchRight(new Token("a", "name", 0, 1));

            end.ShouldBe(1);
        }
Example #5
0
        public void GetByAutomata_可以识别目前识别的子串()
        {
            AutomataTokenizer.GetByAutomata(_nameState, "name1.1", 0, out var end)
            .ShouldMatchRight(new Token("name1", "name", 0, 5));

            end.ShouldBe(5);
        }
Example #6
0
        public void 自动机_能够识别Decimal_Arrow2_Arrow3(string buffer, int expectEndAt, string expectedTokenType)
        {
            var autoMata = AutomataTokenizerState.ForBegin(Ex1Automata.DecimalOrArrow2OrArrow3);

            var res = AutomataTokenizer.GetByAutomata(autoMata, buffer, 0, out var endIdx);

            res.ShouldBeRight(token => token.TokenType.ShouldBe(expectedTokenType));
            endIdx.ShouldBe(expectEndAt);
        }
Example #7
0
        public void 自动机_无法识别Symbol时能够报错(string buffer, int expectCurrentIdx)
        {
            var autoMata = AutomataTokenizerState.ForBegin(new List <AutomataTokenizerState> {
                Ex1Automata.Symbol
            });

            AutomataTokenizer.GetByAutomata(autoMata, buffer, 0, out _)
            .ShouldBeLeft(exception => { exception.CurrentIdx.ShouldBe(expectCurrentIdx); });
        }
Example #8
0
        public void GetAllTokenByAutomata_能够获取Token流()
        {
            var res = AutomataTokenizer.GetAllTokenByAutomata(_nameWithCommentState, "/*aaaaa*/abc");

            res.Rights().ToArray().ShouldMatchObject(new[]
            {
                new Token("/*aaaaa*/", "comment", 0, 9),
                new Token("abc", "name", 9, 12),
            });
        }
Example #9
0
        public void 自动机_能够识别Symbol(string buffer, int expectEndAt)
        {
            var autoMata = AutomataTokenizerState.ForBegin(new List <AutomataTokenizerState> {
                Ex1Automata.Symbol
            });

            var res = AutomataTokenizer.GetByAutomata(autoMata, buffer, 0, out var endIdx);

            res.ShouldBeRight(token => token.TokenType.ShouldBe(Ex1Automata.SymbolToken));
            endIdx.ShouldBe(expectEndAt);
        }
Example #10
0
 public static Either <List <WrongTokenException>, List <Token> > TokenizeBuffer(string buffer)
 {
     return(AutomataTokenizer.GetAllTokenByAutomata(Ex1Automata.ForBegin, buffer)
            .Seprate()
            .Match <Either <List <WrongTokenException>, List <Token> > >(
                Right: r => r.ExcludeTokenType(Ex1Automata.SpaceToken)
                .TransformTokenTypeMatched(Ex1Automata.SymbolToken, GetSymbolConvertSet(), TokenTypes.Identifier)
                .ToList(),
                Left: l => l
                ));
 }
Example #11
0
        public void GetByAutomata_不能识别的时候应返回异常()
        {
            AutomataTokenizer.GetByAutomata(_nameState, "123", 0, out var nextBeginIdx)
            .ShouldBeLeft(exception =>
            {
                exception.Buffer.ShouldBe("123");
                exception.TokenBegin.ShouldBe(0);
                exception.CurrentIdx.ShouldBe(0);
                exception.CurrentState.ShouldBe(_nameState);
            });

            nextBeginIdx.ShouldBe(1);
        }
Example #12
0
        public void GetByAutomata_在包含多个终止状态时能够正确识别_comment在前()
        {
            const string buffer = "/*asddg*/aaa";

            var res = AutomataTokenizer.GetByAutomata(_nameWithCommentState, buffer, 0, out var end);

            res.ShouldMatchRight(new Token("/*asddg*/", "comment", 0, 9));
            end.ShouldBe(9);

            res = AutomataTokenizer.GetByAutomata(_nameWithCommentState, buffer, end, out end);
            res.ShouldMatchRight(new Token("aaa", "name", 9, 12));
            end.ShouldBe(12);
        }
Example #13
0
        public void GetByAutomata_在包含多个终止状态时能够正确识别_夹心()
        {
            const string buffer = "aaa/*asddg*/bbb";

            var res = AutomataTokenizer.GetByAutomata(_nameWithCommentState, buffer, 0, out var end);

            res.ShouldMatchRight(new Token("aaa", "name", 0, 3));
            end.ShouldBe(3);

            res = AutomataTokenizer.GetByAutomata(_nameWithCommentState, buffer, end, out end);
            res.ShouldMatchRight(new Token("/*asddg*/", "comment", 3, 12));
            end.ShouldBe(12);

            res = AutomataTokenizer.GetByAutomata(_nameWithCommentState, buffer, end, out end);
            res.ShouldMatchRight(new Token("bbb", "name", 12, 15));
            end.ShouldBe(15);
        }
Example #14
0
        public void GetAllTokenByAutomata_能够获取所有的错误信息()
        {
            const string buffer = ".name/*asdf";

            var res = AutomataTokenizer.GetAllTokenByAutomata(_nameWithCommentState, buffer).ToArray();

            res.Seprate().ShouldBeLeft(l => l.Count.ShouldBe(2));

            res[0].ShouldBeLeft(l =>
            {
                l.TokenBegin.ShouldBe(0);
                l.CurrentIdx.ShouldBe(0);
            });
            res[1].ShouldMatchRight(new Token("name", "name", 1, 5));
            res[2].ShouldBeLeft(l =>
            {
                l.TokenBegin.ShouldBe(5);
                l.CurrentIdx.ShouldBe(11);
            });
        }
Example #15
0
        public void GetByAutomata_在无法识别时能够返回含有终点位置的异常(string buffer, int expectEndAt, int expectNextBegin, char expectStateAt)
        {
            // 使用有限状态机进行遍历不应该有回溯操作,所以这里应该直接返回已经识别的Token
            AutomataTokenizer.GetByAutomata(_certainState, buffer, 0, out var nextBeginIdx)
            .ShouldBeLeft(exception =>
            {
                exception.CurrentIdx.ShouldBe(expectEndAt);
                exception.TokenBegin.ShouldBe(0);
                if (expectStateAt == 0)
                {
                    exception.CurrentState.Asserter.ShouldBeNull();
                }
                else
                {
                    exception.CurrentState.Asserter(expectStateAt).ShouldBeTrue();
                }
            });

            nextBeginIdx.ShouldBe(expectNextBegin);
        }