Exemplo n.º 1
0
        public void IsAtEndReturnsTrueWhenCurrentIndexAtEnd()
        {
            //Arrange
            var input     = "123".AsMemory();
            var scanState = new SourceScanState(input);

            //Act
            scanState.Advance();
            scanState.Advance();
            scanState.Advance();

            //Assert
            scanState.IsAtEnd().Should().BeTrue();
        }
Exemplo n.º 2
0
        // ReSharper restore InconsistentNaming

        public static Token ScanNextToken(ref SourceScanState state)
        {
            if (state.IsAtEnd())
            {
                return(EOF_TOKEN);
            }

            state.StartIndex = state.CurrentPos;

            var current = state.Advance();

            // A '&' in plain text
            //Possible HTML entity
            // hello > world
            //       ^
            if (current == HTML_ENTITY_START)
            {
                //a valid entity ends on ';'
                // the only spaces that are allowed are before the terminating ';'
                // next should be either a letter or a '#'
                var next = state.PeekNext();
                if (char.IsLetter(next) || next == '#')
                {
                    state.Advance();

                    next = state.PeekNext();
                    if (char.IsLetterOrDigit(next))
                    {
                        //&[a-z][a-z] &#2 or &#x
                        //        ^     ^      ^
                        current = state.Advance();

                        while (current != '\0' && char.IsLetterOrDigit(state.PeekNext()))
                        {
                            current = state.Advance();

                            if (current == ';')
                            {
                                //end of entity
                                return(CreateToken(TokenType.HtmlEntity, HtmlElementType.None, state));
                            }
                        }

                        //end of contiguous chars, but no ';' found
                        //next non-whitespace character should be a ';'
                        // ex: &gt    ;
                        //            ^
                        current = AdvanceWhiteSpaces(ref state, current);

                        if (state.PeekNext() == ';' /*current == ';'*/)
                        {
                            state.Advance();
                            //end of entity
                            return(CreateToken(TokenType.HtmlEntity, HtmlElementType.None, state));
                        }

                        if (!char.IsLetterOrDigit(current))
                        {
                            //Invalid entity, treat as text
                            return(CreateToken(TokenType.Text, HtmlElementType.None, state));
                        }

                        // else it is not a valid entity and we can ignore it
                    }
                }
            }

            if (current == HTML_TAG_START)
            {
                current = state.Advance();
                if (state.IsInTag)
                {
                    //We found a < inside a tag, ex: "<u<p>>
                    //                                  ^
                    //We found a < inside a tag, ex: "<<p>>
                    //                                 ^
                    //Finish the current Tag and start a new one
                    return(CreateToken(TokenType.HtmlOpenTag, HtmlElementType.Invalid, state));
                }

                if (state.IsAtEnd())
                {
                    //we ended on a '<' character
                    return(CreateToken(TokenType.Text, HtmlElementType.Invalid, state));
                }

                state.IsInTag = true;
            }

            if (state.IsInTag)
            {
                return(ScanTag(ref state, current));
            }

            // not in a tag and we found a '<'
            if (current == HTML_TAG_END)
            {
                if (state.IsInTag)
                {
                    state.IsInTag = false;
                }

                //hello > world
                //      ^
                return(CreateToken(TokenType.Text, HtmlElementType.Invalid, state));
            }

            while (!state.IsAtEnd() &&
                   state.PeekNextUnsafe() != HTML_TAG_START &&
                   state.PeekNextUnsafe() != HTML_ENTITY_START)
            {
                state.Advance();
            }

            return(CreateToken(TokenType.Text, HtmlElementType.None, state));
        }