private bool TryParseAttributes(out HtmlObjectLinkedList <HtmlAttribute> attributes) { attributes = new HtmlObjectLinkedList <HtmlAttribute>(); ReadAndSkipWhitespace(); while (currentChar != '/' && currentChar != '>') { HtmlAttribute attribute; if (!TryParseAttribute(out attribute)) { // Could not parse an attribute return(false); } attributes.AddAfter(attributes._last, attribute); } return(true); }
public bool TryParseOpeningTag() { if (currentChar != '<') { // No opening tag, e.g. "abc", "<abc>", "<abc> " or "<abc>InnerText" parseError = HtmlParseError.NoOpeningTag; return(false); } ReadAndSkipWhitespace(); if (currentChar == '/') { if (!TryParseClosingTag()) { return(false); } if (currentElement != null || currentIndex + 1 < text.Length) { // If the element we just finished parsing has a parent, that parent needs to have a closing tag too, so keep parsing. // If we have more text, then there may be more to parse. return(TryParseInnerText()); } // Finished parsing return(true); } else if (currentChar == '!') { if (!TryParseComment()) { // Couldn't parse the comment return(false); } if (currentIndex + 1 < text.Length) { // Got more to parse? return(TryParseInnerText()); } else { // Doctype or comment on its own, e.g. "<!--comment-->" or "<div><!--comment-->" parseError = HtmlParseError.LoneDoctype; return(false); } } if (!IsLetter(currentChar)) { // No valid tag, e.g. "<>", "<1" parseError = HtmlParseError.InvalidTag; return(false); } int tagStartIndex = currentIndex; int tagEndIndex = -1; bool foundTagEnd = false; while (ReadNext()) { foundTagEnd = currentChar == '/' || currentChar == '>'; if (foundTagEnd || char.IsWhiteSpace(currentChar)) { tagEndIndex = currentIndex - 1; break; } } if (tagEndIndex == -1) { // No end of tag, e.g. "<abc", "<abc " parseError = HtmlParseError.OpeningTagNotClosed; return(false); } string tag = text.ToAsciiLower(tagStartIndex, tagEndIndex - tagStartIndex + 1); HtmlObjectLinkedList <HtmlAttribute> attributes = null; if (!foundTagEnd && !TryParseAttributes(out attributes)) { // Could not parse attributes return(false); } bool isVoid = false; if (currentChar == '/') { // Void element? ReadAndSkipWhitespace(); if (currentChar != '>') { // No end of void tag, e.g. "<abc/", "<abc/a>" parseError = HtmlParseError.NodeNotClosed; return(false); } isVoid = true; if (currentElement != null) { // Read on if this void element is a child ReadAndSkipWhitespace(); } } else { ReadAndSkipWhitespace(); } HtmlElement element; if (rootElement == null) { if (tag == "html" && !isVoid) { element = new HtmlDocument() { Doctype = doctype }; } else if (parsingDocument) { // First tag of a document has to be an open html tag parseError = HtmlParseError.FirstElementInDocumentNotHtml; return(false); } else { element = new HtmlElement(tag, isVoid); } } else { element = new HtmlElement(tag, isVoid); } if (attributes != null) { element._attributes = attributes; } SetParsing(element); if (element.IsVoid && element.Parent == null) { if (!ReadNext() || (char.IsWhiteSpace(currentChar) && !ReadAndSkipWhitespace())) { // Valid void element without a parent, e.g. "<abc/>", "<abc/> " return(true); } // Invalid text after a void element without a parent, e.g. "<abc/>a" parseError = HtmlParseError.InvalidTextAfterNode; return(false); } if (element.Parent != null || !element.IsVoid) { // If the element has a parent, we need to make sure the parent has a closing tag. // If the element has no parent, but is non-void, we also need to make sure the element has a closing tag. if (!TryParseInnerText()) { // Couldn't parse inner text, e.g. "<abc>Inner<def></def>" return(false); } } return(true); }