Beispiel #1
0
        /// <summary>
        /// Parse a start tag
        /// </summary>
        protected ParsedToken ParseStartTag()
        {
            _StartTagPosition = _CurrentPosition;
            _TagBuffer        = SourceReader.OpenBuffer();
            CharInfo c = ReadChar();

            // Comments ?
            if (c == '!')
            {
                // Expect '--' or 'DOCTYPE'
                c = ReadChar();
                if (Char.IsLetter(c.AsChar))
                {
                    SaveChar(c);
                    return(ParseDoctype());
                }
                else if (c == '-')
                {
                    if (ReadChar() != '-')
                    {
                        throw new ParseError("Comments need to start with '<!--'.", ReadPosition);
                    }
                    return(ParseComment());
                }
                throw new ParseError("Comment or DOCTYPE expected.", ReadPosition);
            }
            // Process instruction ?
            if (c == '?')
            {
                _State = ParseState.ProcessInstruction;
                c      = ReadChar(false);
                // Pass whitespace
                while (c != CharInfo.EOF && Char.IsWhiteSpace(c.AsChar))
                {
                    c = ReadChar(false);
                }
            }
            else if (c == '/')
            {
                _State = ParseState.EndTag;
                c      = ReadChar(false);
                if (c == CharInfo.EOF || !Char.IsLetterOrDigit(c.AsChar))
                {
                    SaveChar(c);
                    throw new ParseError("Invalid tag name. Need to start with an alphanumeric", ReadPosition);
                }
            }
            else
            {
                _State = ParseState.Tag;
            }
            // Tagname
            if (c == CharInfo.EOF || !Char.IsLetterOrDigit(c.AsChar))
            {
                throw new ParseError("Invalid tag name. Need to start with an alphanumeric", ReadPosition);
            }
            // Loop tag name
            _CurrentRead = null;
            AddToCurrentRead(c);
            while ((c = ReadChar(false)) != CharInfo.EOF && (Char.IsLetterOrDigit(c.AsChar) || c == '.' || c == ':' || c == '-'))
            {
                AddToCurrentRead(c);
            }
            // If EndTag
            if (_State == ParseState.EndTag)
            {
                _CurrentToken          = ParsedTag.EndTag(GetCurrentRead(true));
                _CurrentToken.Position = _StartTagPosition;

                // Pass whitespace
                while (c != CharInfo.EOF && Char.IsWhiteSpace(c.AsChar))
                {
                    c = ReadChar(false);
                }
                try
                {
                    if (c == CharInfo.EOF)
                    {
                        throw new ParseError("Unexpected end of stream.", ReadPosition);
                    }
                    if (IsAttributeNameChar(c.AsChar))
                    {
                        throw new ParseError("End tag can't contains attribute.", ReadPosition);
                    }
                    if (c != '>')
                    {
                        throw new ParseError("Unexpected char. End tag not closed.", ReadPosition);
                    }
                }
                catch
                {
                    // Reset steam
                    while (c != CharInfo.EOF && c != '<' && c != '>')
                    {
                        c = ReadChar(false);
                    }
                    if (c == '<')
                    {
                        SaveChar(c);
                    }
                    throw;
                }
                _State = ParseState.Content;
                ResetTagBuffer();
                var result = _CurrentToken;
                _CurrentToken = null;
                return(result);
            }
            // Create the tag
            if (c != CharInfo.EOF)
            {
                SaveChar(c);
            }
            _CurrentToken          = _State == ParseState.Tag ? ParsedTag.OpenTag(GetCurrentRead(true)) : ParsedTag.OpenProcessInstruction(GetCurrentRead(true));
            _CurrentToken.Position = _StartTagPosition;
            return(_CurrentToken);
        }
Beispiel #2
0
        /// <summary>
        /// Parse a content as raw text.
        /// </summary>
        /// <remarks>
        /// This method is used for parsing the content of the script, style tag content.
        /// The parsing is continue until matching the end of the <paramref name="tag"/>.
        /// If <paramref name="tag"/> is null or empty then we accept all endtag.
        /// </remarks>
        /// <exception cref="InvalidOperationException">
        /// Raised when the parser is not in a normal state : all tag need to be closed.
        /// </exception>
        /// <param name="tag">Tag name for the end tag expected.</param>
        /// <returns>Content</returns>
        public ParsedText ParseContentText(String tag)
        {
            // Verify
            if (this._State != ParseState.Content)
            {
                throw new InvalidOperationException("Can't read a content in a opened tag.");
            }
            // Read loop
            var      start = SourceReader.Position;
            CharInfo c;

            while ((c = ReadChar(false)) != CharInfo.EOF)
            {
                // End detected ?
                if (c == '<')
                {
                    var           endTagPos = c.Position;
                    StringBuilder saveTag   = new StringBuilder(15);
                    saveTag.Append(c);
                    while ((c = ReadChar(false)) != CharInfo.EOF && Char.IsWhiteSpace(c.AsChar))
                    {
                        saveTag.Append(c.AsChar);
                    }
                    if (c == '/')
                    {
                        // Pass '/'
                        saveTag.Append(c);
                        while ((c = ReadChar(false)) != CharInfo.EOF && Char.IsWhiteSpace(c.AsChar))
                        {
                            saveTag.Append(c.AsChar);
                        }
                        if (c != CharInfo.EOF)
                        {
                            // Pass tag name
                            StringBuilder tagName = new StringBuilder(10);
                            saveTag.Append(c.AsChar);
                            tagName.Append(c.AsChar);
                            while ((c = ReadChar(false)) != CharInfo.EOF && IsAttributeNameChar(c.AsChar))
                            {
                                saveTag.Append(c.AsChar);
                                tagName.Append(c.AsChar);
                            }
                            // We find the good end tag ?
                            if (c != CharInfo.EOF)
                            {
                                if (String.IsNullOrEmpty(tag) || String.Equals(tagName.ToString(), tag, StringComparison.OrdinalIgnoreCase))
                                {
                                    SaveChar(c);
                                    // Search the good end
                                    while ((c = ReadChar(false)) != CharInfo.EOF && Char.IsWhiteSpace(c.AsChar))
                                    {
                                        saveTag.Append(c.AsChar);
                                    }
                                    if (c == '>')
                                    {
                                        // Save the end tag for the next parse
                                        _CurrentToken          = ParsedTag.EndTag(tagName.ToString());
                                        _CurrentToken.Position = endTagPos;
                                        c = CharInfo.EOF;   // Don't save the char
                                        // Exit the loop
                                        break;
                                    }
                                }
                            }
                        }
                    }
                    // If here then we don't find a good end tag we convert to 'text'
                    var etp = endTagPos;
                    foreach (var st in saveTag.ToString())
                    {
                        AddToCurrentRead(new CharInfo(st, etp++));
                    }
                }
                //
                AddToCurrentRead(c);
            }
            if (c != CharInfo.EOF)
            {
                SaveChar(c);
            }
            // Returns parse result
            LastParsed = new ParsedText()
            {
                Position = start,
                Text     = HEntity.HtmlDecode(GetCurrentRead(true), RemoveUnknownOrInvalidEntities)
            };
            return((ParsedText)LastParsed);
        }