Exemple #1
0
            private bool TryParseAttributes(out HtmlObjectLinkedList <HtmlAttribute> attributes)
            {
                attributes = new HtmlObjectLinkedList <HtmlAttribute>();
                ReadAndSkipWhitespace();

                while (currentChar != '/' && currentChar != '>')
                {
                    HtmlAttribute attribute;
                    if (!TryParseAttribute(out attribute))
                    {
                        // Could not parse an attribute
                        return(false);
                    }
                    attributes.AddAfter(attributes._last, attribute);
                }
                return(true);
            }
Exemple #2
0
            public bool TryParseOpeningTag()
            {
                if (currentChar != '<')
                {
                    // No opening tag, e.g. "abc", "<abc>", "<abc>  " or "<abc>InnerText"
                    parseError = HtmlParseError.NoOpeningTag;
                    return(false);
                }
                ReadAndSkipWhitespace();
                if (currentChar == '/')
                {
                    if (!TryParseClosingTag())
                    {
                        return(false);
                    }
                    if (currentElement != null || currentIndex + 1 < text.Length)
                    {
                        // If the element we just finished parsing has a parent, that parent needs to have a closing tag too, so keep parsing.
                        // If we have more text, then there may be more to parse.
                        return(TryParseInnerText());
                    }
                    // Finished parsing
                    return(true);
                }
                else if (currentChar == '!')
                {
                    if (!TryParseComment())
                    {
                        // Couldn't parse the comment
                        return(false);
                    }
                    if (currentIndex + 1 < text.Length)
                    {
                        // Got more to parse?
                        return(TryParseInnerText());
                    }
                    else
                    {
                        // Doctype or comment on its own, e.g. "<!--comment-->" or "<div><!--comment-->"
                        parseError = HtmlParseError.LoneDoctype;
                        return(false);
                    }
                }
                if (!IsLetter(currentChar))
                {
                    // No valid tag, e.g. "<>", "<1"
                    parseError = HtmlParseError.InvalidTag;
                    return(false);
                }

                int  tagStartIndex = currentIndex;
                int  tagEndIndex   = -1;
                bool foundTagEnd   = false;

                while (ReadNext())
                {
                    foundTagEnd = currentChar == '/' || currentChar == '>';
                    if (foundTagEnd || char.IsWhiteSpace(currentChar))
                    {
                        tagEndIndex = currentIndex - 1;
                        break;
                    }
                }
                if (tagEndIndex == -1)
                {
                    // No end of tag, e.g. "<abc", "<abc "
                    parseError = HtmlParseError.OpeningTagNotClosed;
                    return(false);
                }

                string tag = text.ToAsciiLower(tagStartIndex, tagEndIndex - tagStartIndex + 1);
                HtmlObjectLinkedList <HtmlAttribute> attributes = null;

                if (!foundTagEnd && !TryParseAttributes(out attributes))
                {
                    // Could not parse attributes
                    return(false);
                }

                bool isVoid = false;

                if (currentChar == '/')
                {
                    // Void element?
                    ReadAndSkipWhitespace();
                    if (currentChar != '>')
                    {
                        // No end of void tag, e.g. "<abc/", "<abc/a>"
                        parseError = HtmlParseError.NodeNotClosed;
                        return(false);
                    }
                    isVoid = true;
                    if (currentElement != null)
                    {
                        // Read on if this void element is a child
                        ReadAndSkipWhitespace();
                    }
                }
                else
                {
                    ReadAndSkipWhitespace();
                }

                HtmlElement element;

                if (rootElement == null)
                {
                    if (tag == "html" && !isVoid)
                    {
                        element = new HtmlDocument()
                        {
                            Doctype = doctype
                        };
                    }
                    else if (parsingDocument)
                    {
                        // First tag of a document has to be an open html tag
                        parseError = HtmlParseError.FirstElementInDocumentNotHtml;
                        return(false);
                    }
                    else
                    {
                        element = new HtmlElement(tag, isVoid);
                    }
                }
                else
                {
                    element = new HtmlElement(tag, isVoid);
                }
                if (attributes != null)
                {
                    element._attributes = attributes;
                }
                SetParsing(element);

                if (element.IsVoid && element.Parent == null)
                {
                    if (!ReadNext() || (char.IsWhiteSpace(currentChar) && !ReadAndSkipWhitespace()))
                    {
                        // Valid void element without a parent, e.g. "<abc/>", "<abc/>  "
                        return(true);
                    }

                    // Invalid text after a void element without a parent, e.g. "<abc/>a"
                    parseError = HtmlParseError.InvalidTextAfterNode;
                    return(false);
                }
                if (element.Parent != null || !element.IsVoid)
                {
                    // If the element has a parent, we need to make sure the parent has a closing tag.
                    // If the element has no parent, but is non-void, we also need to make sure the element has a closing tag.
                    if (!TryParseInnerText())
                    {
                        // Couldn't parse inner text, e.g. "<abc>Inner<def></def>"
                        return(false);
                    }
                }

                return(true);
            }