Esempio n. 1
0
        public void LookAheadReturnsNothingWhenOutOfRange()
        {
            //Arrange
            var input     = "123";
            var scanState = new SourceScanState(input.AsMemory());

            //Act
            scanState.LookAhead(4).IsEmpty.Should().BeTrue();
        }
        public void PlainTextSmokeTest()
        {
            //Act
            var sourceScanState = new SourceScanState("hello".AsMemory());
            var token           = Scanner.ScanNextToken(ref sourceScanState);

            //Assert
            token.NewValueStr().Should().Be("hello");
        }
Esempio n. 3
0
        public void LookAheadReturnsStuff()
        {
            //Arrange
            var input     = "123";
            var scanState = new SourceScanState(input.AsMemory());

            //Act
            scanState.LookAhead(2).ToArray().Should().BeEquivalentTo(new[] { '1', '2' });
        }
Esempio n. 4
0
        public string Textorize(string htmlInput)
        {
            if (string.IsNullOrWhiteSpace(htmlInput))
            {
                return("");
            }

            var source       = new SourceScanState(htmlInput.AsMemory());
            var htmlTagStack = new Stack <TokenInfo>(32);
            var state        = new TextorizeState(new StringBuilder(htmlInput.Length),
Esempio n. 5
0
        public void AdvanceReturnsCharacters()
        {
            //Arrange
            var scanState = new SourceScanState("123".AsMemory());

            //Act & Assert
            scanState.Advance().Should().Be('1');
            scanState.Advance().Should().Be('2');
            scanState.Advance().Should().Be('3');
        }
        public void ReturnsHtmlOpenTagTokenForHtmlOpenTagInText(string input)
        {
            //Arrange
            var sourceScanState = new SourceScanState(input.AsMemory());

            //Act
            var token = Scanner.ScanNextToken(ref sourceScanState);

            //Assert
            token.TokenType.Should().Be(TokenType.HtmlOpenTag, $"for : {token.NewValueStr()}");
            token.NewValueStr().Should().Be(input);
        }
        public void TokenTypesTest(string input, TokenType expected)
        {
            //Arrange
            var sourceScanState = new SourceScanState(input.AsMemory());

            //Act
            var token = Scanner.ScanNextToken(ref sourceScanState);

            //Assert
            token.TokenType.Should().Be(expected, $"for : {token.NewValueStr()}");
            token.NewValueStr().Should().Be(input);
        }
        public void ReturnsEofTokenAsLastToken()
        {
            //Arrange
            var sourceScanState = new SourceScanState("Hello".AsMemory());

            //Act
            Scanner.ScanNextToken(ref sourceScanState).NewValueStr().Should().Be("Hello");

            var token = Scanner.ScanNextToken(ref sourceScanState);

            //Assert
            token.TokenType.Should().Be(TokenType.Eof);
        }
Esempio n. 9
0
        public void LookAheadWhenAtEndReturnsEmpty()
        {
            //Arrange
            var input     = "123";
            var scanState = new SourceScanState(input.AsMemory());

            scanState.Advance();
            scanState.Advance();
            scanState.Advance();

            //Act & Assert
            scanState.LookAhead(2).IsEmpty.Should().BeTrue();
        }
Esempio n. 10
0
        public void AdvanceReturnsNullCharWhenAtEnd()
        {
            //Arrange
            var input     = "123";
            var scanState = new SourceScanState(input.AsMemory());

            scanState.Advance();
            scanState.Advance();
            scanState.Advance();

            //Act & Assert
            scanState.Advance().Should().Be('\0');
        }
Esempio n. 11
0
        public void PeekNextAtEndReturnsNullChar()
        {
            //Arrange
            var input = "1";

            var scanState = new SourceScanState(input.AsMemory());

            //Advance once
            scanState.Advance();

            //Act
            scanState.PeekNext().Should().Be('\0');
        }
Esempio n. 12
0
        public void IsAtEndReturnsTrueWhenCurrentIndexAtEnd()
        {
            //Arrange
            var input     = "123".AsMemory();
            var scanState = new SourceScanState(input);

            //Act
            scanState.Advance();
            scanState.Advance();
            scanState.Advance();

            //Assert
            scanState.IsAtEnd().Should().BeTrue();
        }
Esempio n. 13
0
        public void ReturnsHtmlTokenAfterInvalidTag()
        {
            //Arrange
            var sourceScanState = new SourceScanState("aaa<1<p>".AsMemory());

            //Act
            //1.
            Scanner.ScanNextToken(ref sourceScanState).NewValueStr().Should().Be("aaa");

            //2.
            Scanner.ScanNextToken(ref sourceScanState).NewValueStr().Should().Be("<1");

            //3.
            Scanner.ScanNextToken(ref sourceScanState).NewValueStr().Should().Be("<p>");
        }
Esempio n. 14
0
        public void EndsInHtmlOpenTagCharacter()
        {
            //Arrange
            var sourceScanState = new SourceScanState("aaa<".AsMemory());

            //Act & Assert
            var current = Scanner.ScanNextToken(ref sourceScanState);

            current.TokenType.Should().Be(TokenType.Text, $"for: {current.NewValueStr()}");
            current.NewValueStr().Should().Be("aaa");

            current = Scanner.ScanNextToken(ref sourceScanState);
            current.TokenType.Should().Be(TokenType.Text, $"for: {current.NewValueStr()}");
            current.NewValueStr().Should().Be("<");
        }
Esempio n. 15
0
        public void ReturnsEofTokenAsLastTokenForHtml(string input)
        {
            //Arrange
            var   sourceScanState = new SourceScanState(input.AsMemory());
            Token current         = default;

            //Act
            while (current.TokenType != TokenType.Eof)
            {
                //consume
                current = Scanner.ScanNextToken(ref sourceScanState);
            }

            //Assert
            current.TokenType.Should().Be(TokenType.Eof);
        }
Esempio n. 16
0
        // ReSharper restore InconsistentNaming

        public static Token ScanNextToken(ref SourceScanState state)
        {
            if (state.IsAtEnd())
            {
                return(EOF_TOKEN);
            }

            state.StartIndex = state.CurrentPos;

            var current = state.Advance();

            // A '&' in plain text
            //Possible HTML entity
            // hello &gt; world
            //       ^
            if (current == HTML_ENTITY_START)
            {
                //a valid entity ends on ';'
                // the only spaces that are allowed are before the terminating ';'
                // next should be either a letter or a '#'
                var next = state.PeekNext();
                if (char.IsLetter(next) || next == '#')
                {
                    state.Advance();

                    next = state.PeekNext();
                    if (char.IsLetterOrDigit(next))
                    {
                        //&[a-z][a-z] &#2 or &#x
                        //        ^     ^      ^
                        current = state.Advance();

                        while (current != '\0' && char.IsLetterOrDigit(state.PeekNext()))
                        {
                            current = state.Advance();

                            if (current == ';')
                            {
                                //end of entity
                                return(CreateToken(TokenType.HtmlEntity, HtmlElementType.None, state));
                            }
                        }

                        //end of contiguous chars, but no ';' found
                        //next non-whitespace character should be a ';'
                        // ex: &gt    ;
                        //            ^
                        current = AdvanceWhiteSpaces(ref state, current);

                        if (state.PeekNext() == ';' /*current == ';'*/)
                        {
                            state.Advance();
                            //end of entity
                            return(CreateToken(TokenType.HtmlEntity, HtmlElementType.None, state));
                        }

                        if (!char.IsLetterOrDigit(current))
                        {
                            //Invalid entity, treat as text
                            return(CreateToken(TokenType.Text, HtmlElementType.None, state));
                        }

                        // else it is not a valid entity and we can ignore it
                    }
                }
            }

            if (current == HTML_TAG_START)
            {
                current = state.Advance();
                if (state.IsInTag)
                {
                    //We found a < inside a tag, ex: "<u<p>>
                    //                                  ^
                    //We found a < inside a tag, ex: "<<p>>
                    //                                 ^
                    //Finish the current Tag and start a new one
                    return(CreateToken(TokenType.HtmlOpenTag, HtmlElementType.Invalid, state));
                }

                if (state.IsAtEnd())
                {
                    //we ended on a '<' character
                    return(CreateToken(TokenType.Text, HtmlElementType.Invalid, state));
                }

                state.IsInTag = true;
            }

            if (state.IsInTag)
            {
                return(ScanTag(ref state, current));
            }

            // not in a tag and we found a '<'
            if (current == HTML_TAG_END)
            {
                if (state.IsInTag)
                {
                    state.IsInTag = false;
                }

                //hello > world
                //      ^
                return(CreateToken(TokenType.Text, HtmlElementType.Invalid, state));
            }

            while (!state.IsAtEnd() &&
                   state.PeekNextUnsafe() != HTML_TAG_START &&
                   state.PeekNextUnsafe() != HTML_ENTITY_START)
            {
                state.Advance();
            }

            return(CreateToken(TokenType.Text, HtmlElementType.None, state));
        }