protected override void VisitDocumentType(DomDocumentType documentType) { if (documentType == null) throw new ArgumentNullException("documentType"); DefaultVisit(documentType); }
protected override void VisitDocumentType(DomDocumentType documentType) { if (documentType == null) throw new ArgumentNullException("documentType"); writer.WriteDocumentType(documentType.Name, documentType.PublicId, documentType.SystemId); }
protected override void WriteDocumentType(DomDocumentType documentType) { // Drop document type if we're running a master if (TemplateContext.MasterInfo == null) { TextUtility.OuterText(_writer, documentType); } }
protected override void VisitDocumentType(DomDocumentType documentType) { string varName = SafeNewVariable(documentType.Name); CurrentOutput.WriteLine( "{0} = this.__document.CreateDocumentType(\"{1}\", \"{2}\", \"{3}\");", varName, documentType.Name, documentType.PublicId, documentType.SystemId); DoAppend(varName); }
/// <summary> /// Recursively appends a {@link Node} child to {@link DomNode} parent. /// </summary> /// <param name="page">the owner page of {@link DomElement}s to be created</param> /// <param name="parent">the parent DomNode</param> /// <param name="child">the child Node</param> /// <param name="handleXHTMLAsHTML">if true elements from the XHTML namespace are handled as HTML elements instead of DOM elements</param> public static void AppendChild(SgmlPage page, DomNode parent, XmlNode child, bool handleXHTMLAsHTML) { XmlDocumentType documentType = child.OwnerDocument.DocumentType; if (documentType != null && page is XmlPage) { DomDocumentType domDoctype = new DomDocumentType( page, documentType.Name, documentType.PublicId, documentType.SystemId); ((XmlPage)page).setDocumentType(domDoctype); } DomNode childXml = CreateFrom(page, child, handleXHTMLAsHTML); parent.appendChild(childXml); Copy(page, child, childXml, handleXHTMLAsHTML); }
protected override void VisitDocumentType(DomDocumentType node) { _output.Append("<!DOCTYPE ").Append(node.Name); if (!StringUtil.IsBlank(node.PublicId)) { _output.Append(" PUBLIC \"") .Append(node.PublicId) .Append("\""); } if (!StringUtil.IsBlank(node.SystemId)) { _output.Append(" \"") .Append(node.SystemId) .Append("\""); } _output.Append('>'); }
public static void OuterText(TextWriter w, DomDocumentType documentType) { DomDocumentType node = documentType; w.Write("<!DOCTYPE "); w.Write(node.Name); if (!string.IsNullOrWhiteSpace(node.PublicId)) { w.Write(" PUBLIC \""); w.Write(node.PublicId); w.Write("\""); } if (!string.IsNullOrWhiteSpace(node.SystemId)) { w.Write(" \""); w.Write(node.SystemId); w.Write("\""); } w.Write('>'); }
/// <summary> /// When CsQuery is provided, an initial indexing context can be used /// </summary> /// <param name="csq"></param> /// <param name="allowLiterals"></param> /// <returns></returns> protected IEnumerable<IDomObject> Parse(bool allowLiterals) { int pos=0; Stack<IterationData> stack = new Stack<IterationData>(); while (pos <= EndPos) { IterationData current = new IterationData(); current.AllowLiterals = allowLiterals; current.Reset(pos); stack.Push(current); while (stack.Count != 0) { current = stack.Pop(); //Debug.Assert(current.Object == null); while (!current.Finished && current.Pos <= EndPos) { char c = BaseHtml[current.Pos]; switch (current.Step) { case 0: current.Pos = CharIndexOf(BaseHtml, '<', current.Pos); if (current.Pos < 0) { // done - no new tags found current.Pos = EndPos + 1; } else { // deal with when we're in a literal block (script/textarea) if (current.ReadTextOnly) { int endPos = current.Pos; while (endPos >= 0) { // keep going until we find the closing tag for this element int caretPos = CharIndexOf(BaseHtml, '>', endPos + 1); if (caretPos > 0) { string tag = BaseHtml.SubstringBetween(endPos + 1, caretPos).Trim().ToLower(); if (tag == "/" +current.Parent.Element.NodeName) { // this is the end tag -- exit the block current.Pos=endPos; break; } } endPos = CharIndexOf(BaseHtml, '<', endPos + 1); } } // even if we fell through from ReadTextOnly (e.g. was never closed), we should proceeed to finish current.Step=1; } break; case 1: if (current.Pos > current.HtmlStart) { IDomObject literal = GetLiteral(current); if (literal != null) { yield return literal; } continue; } int tagStartPos = current.Pos; string newTag; newTag = GetTagOpener(current); string newTagLower = newTag.ToLower(); // when Element exists, it's because a previous iteration created it: it's our parent string parentTag = String.Empty; if (current.Parent != null) { parentTag = current.Parent.Element.NodeName.ToLower(); } if (newTag == String.Empty) { // It's a tag closer. Make sure it's the right one. current.Pos = tagStartPos + 1; string closeTag = GetCloseTag(current); // Ignore empty tags, or closing tags found when no parent is open bool isProperClose = closeTag.ToLower() == parentTag; if (closeTag == String.Empty) { // ignore empty tags continue; } else { // locate match for this closer up the heirarchy IterationData actualParent =null; if (!isProperClose) { actualParent = current.Parent; while (actualParent != null && actualParent.Element.NodeName.ToLower() != closeTag.ToLower()) { actualParent = actualParent.Parent; } } // if no matching close tag was found up the tree, ignore it // otherwise always close this and repeat at the same position until the match is found if (!isProperClose && actualParent == null) { current.Invalid = true; continue; } } // element is closed if (current.Parent.Parent == null) { yield return current.Parent.Element; } current.Finished = true; if (isProperClose) { current.Parent.Reset(current.Pos); } else { current.Parent.Reset(tagStartPos); } // already been returned before we added the children continue; } // Before we keep going see if this is an implicit close if (parentTag != String.Empty) { if (TagHasImplicitClose(parentTag,newTag) && parentTag == newTag) { // same tag for a repeater like li occcurred - treat like a close tag if (current.Parent.Parent == null) { yield return current.Parent.Element; } current.Parent.Reset(tagStartPos); current.Finished = true; continue; } } // seems to be a new tag. Parse it IDomSpecialElement specialElement = null; if (newTagLower[0] == '!') { if (newTagLower.StartsWith("!doctype")) { specialElement = new DomDocumentType(); current.Object = specialElement; } else if (newTagLower.StartsWith("![cdata[")) { specialElement = new DomCData(); current.Object = specialElement; current.Pos = tagStartPos + 9; } else { specialElement = new DomComment(); current.Object = specialElement; if (newTagLower.StartsWith("!--")) { ((DomComment)specialElement).IsQuoted = true; current.Pos = tagStartPos + 4; } else { current.Pos = tagStartPos+1; } } } else { current.Object = new DomElement(newTag); if (!current.Element.InnerHtmlAllowed && current.Element.InnerTextAllowed) { current.ReadTextOnly = true; current.Step = 0; } } // Handle non-element/text types -- they have data inside the tag construct if (current.Object is IDomSpecialElement) { string endTag = (current.Object is IDomComment && ((IDomComment)current.Object).IsQuoted) ? "-->" : ">"; int tagEndPos = BaseHtml.Seek(endTag, current.Pos); if (tagEndPos <0) { // if a tag is unclosed entirely, then just find a new line. tagEndPos = BaseHtml.Seek(System.Environment.NewLine, current.Pos); } if (tagEndPos < 0) { // Never closed, no newline - junk, treat it like such tagEndPos = EndPos; } specialElement.NonAttributeData = BaseHtml.SubstringBetween(current.Pos, tagEndPos); current.Pos = tagEndPos; } else { // Parse attribute data while (current.Pos <= EndPos) { if (!GetTagAttribute(current)) break; } } bool hasChildren = MoveOutsideTag(current); // tricky part: if there are children, push ourselves back on the stack and start with a new object // from this position. The children will add themselves as they are created, avoiding recursion. // When the close tag is found, the parent will be yielded if it's a root element. // I think there's a slightly better way to do this, capturing all the yield logic at the end of the // stack but it works for now. if (current.Parent != null) { current.Parent.Element.AppendChild(current.Object); } else if (!hasChildren) { yield return current.Object; } if (!hasChildren) { current.Reset(); continue; } stack.Push(current); IterationData subItem = new IterationData(); subItem.Parent = current; subItem.AllowLiterals = true; subItem.Reset(current.Pos); subItem.ReadTextOnly = current.ReadTextOnly; current = subItem; break; } } // Catchall for unclosed tags -- if there's an "unfinished" carrier here, it's because top-level tag was unclosed. // THis will wrap up any straggling text and close any open tags after it. if (!current.Finished) { if (current.Pos > current.HtmlStart) { IDomObject literal = GetLiteral(current); if (literal != null) { yield return literal; } } if (current.Parent != null) { if (current.Parent.Parent == null) { yield return current.Parent.Element; } current.Parent.Reset(current.Pos); current.Finished = true; } } } pos = current.Pos; } }
/// <summary> /// When CsQuery is provided, an initial indexing context can be used /// </summary> /// <param name="csq"></param> /// <param name="allowLiterals"></param> /// <returns></returns> protected IEnumerable <IDomObject> Parse(bool allowLiterals) { int pos = 0; Stack <IterationData> stack = new Stack <IterationData>(); while (pos <= EndPos) { IterationData current = new IterationData(); current.AllowLiterals = allowLiterals; current.Reset(pos); stack.Push(current); while (stack.Count != 0) { current = stack.Pop(); //Debug.Assert(current.Object == null); while (!current.Finished && current.Pos <= EndPos) { char c = BaseHtml[current.Pos]; switch (current.Step) { case 0: current.Pos = CharIndexOf(BaseHtml, '<', current.Pos); if (current.Pos < 0) { // done - no new tags found current.Pos = EndPos + 1; } else { // deal with when we're in a literal block (script/textarea) if (current.ReadTextOnly) { int endPos = current.Pos; while (endPos >= 0) { // keep going until we find the closing tag for this element int caretPos = CharIndexOf(BaseHtml, '>', endPos + 1); if (caretPos > 0) { string tag = BaseHtml.SubstringBetween(endPos + 1, caretPos).Trim().ToLower(); if (tag == "/" + current.Parent.Element.NodeName) { // this is the end tag -- exit the block current.Pos = endPos; break; } } endPos = CharIndexOf(BaseHtml, '<', endPos + 1); } } // even if we fell through from ReadTextOnly (e.g. was never closed), we should proceeed to finish current.Step = 1; } break; case 1: if (current.Pos > current.HtmlStart) { IDomObject literal = GetLiteral(current); if (literal != null) { yield return(literal); } continue; } int tagStartPos = current.Pos; string newTag; newTag = GetTagOpener(current); string newTagLower = newTag.ToLower(); // when Element exists, it's because a previous iteration created it: it's our parent string parentTag = String.Empty; if (current.Parent != null) { parentTag = current.Parent.Element.NodeName.ToLower(); } if (newTag == String.Empty) { // It's a tag closer. Make sure it's the right one. current.Pos = tagStartPos + 1; string closeTag = GetCloseTag(current); // Ignore empty tags, or closing tags found when no parent is open bool isProperClose = closeTag.ToLower() == parentTag; if (closeTag == String.Empty) { // ignore empty tags continue; } else { // locate match for this closer up the heirarchy IterationData actualParent = null; if (!isProperClose) { actualParent = current.Parent; while (actualParent != null && actualParent.Element.NodeName.ToLower() != closeTag.ToLower()) { actualParent = actualParent.Parent; } } // if no matching close tag was found up the tree, ignore it // otherwise always close this and repeat at the same position until the match is found if (!isProperClose && actualParent == null) { current.Invalid = true; continue; } } // element is closed if (current.Parent.Parent == null) { yield return(current.Parent.Element); } current.Finished = true; if (isProperClose) { current.Parent.Reset(current.Pos); } else { current.Parent.Reset(tagStartPos); } // already been returned before we added the children continue; } // Before we keep going see if this is an implicit close if (parentTag != String.Empty) { if (TagHasImplicitClose(parentTag, newTag) && parentTag == newTag) { // same tag for a repeater like li occcurred - treat like a close tag if (current.Parent.Parent == null) { yield return(current.Parent.Element); } current.Parent.Reset(tagStartPos); current.Finished = true; continue; } } // seems to be a new tag. Parse it IDomSpecialElement specialElement = null; if (newTagLower[0] == '!') { if (newTagLower.StartsWith("!doctype")) { specialElement = new DomDocumentType(); current.Object = specialElement; } else if (newTagLower.StartsWith("![cdata[")) { specialElement = new DomCData(); current.Object = specialElement; current.Pos = tagStartPos + 9; } else { specialElement = new DomComment(); current.Object = specialElement; if (newTagLower.StartsWith("!--")) { ((DomComment)specialElement).IsQuoted = true; current.Pos = tagStartPos + 4; } else { current.Pos = tagStartPos + 1; } } } else { current.Object = new DomElement(newTag); if (!current.Element.InnerHtmlAllowed && current.Element.InnerTextAllowed) { current.ReadTextOnly = true; current.Step = 0; } } // Handle non-element/text types -- they have data inside the tag construct if (current.Object is IDomSpecialElement) { string endTag = (current.Object is IDomComment && ((IDomComment)current.Object).IsQuoted) ? "-->" : ">"; int tagEndPos = BaseHtml.Seek(endTag, current.Pos); if (tagEndPos < 0) { // if a tag is unclosed entirely, then just find a new line. tagEndPos = BaseHtml.Seek(System.Environment.NewLine, current.Pos); } if (tagEndPos < 0) { // Never closed, no newline - junk, treat it like such tagEndPos = EndPos; } specialElement.NonAttributeData = BaseHtml.SubstringBetween(current.Pos, tagEndPos); current.Pos = tagEndPos; } else { // Parse attribute data while (current.Pos <= EndPos) { if (!GetTagAttribute(current)) { break; } } } bool hasChildren = MoveOutsideTag(current); // tricky part: if there are children, push ourselves back on the stack and start with a new object // from this position. The children will add themselves as they are created, avoiding recursion. // When the close tag is found, the parent will be yielded if it's a root element. // I think there's a slightly better way to do this, capturing all the yield logic at the end of the // stack but it works for now. if (current.Parent != null) { current.Parent.Element.AppendChild(current.Object); } else if (!hasChildren) { yield return(current.Object); } if (!hasChildren) { current.Reset(); continue; } stack.Push(current); IterationData subItem = new IterationData(); subItem.Parent = current; subItem.AllowLiterals = true; subItem.Reset(current.Pos); subItem.ReadTextOnly = current.ReadTextOnly; current = subItem; break; } } // Catchall for unclosed tags -- if there's an "unfinished" carrier here, it's because top-level tag was unclosed. // THis will wrap up any straggling text and close any open tags after it. if (!current.Finished) { if (current.Pos > current.HtmlStart) { IDomObject literal = GetLiteral(current); if (literal != null) { yield return(literal); } } if (current.Parent != null) { if (current.Parent.Parent == null) { yield return(current.Parent.Element); } current.Parent.Reset(current.Pos); current.Finished = true; } } } pos = current.Pos; } }
public virtual DomDocumentType CreateDocumentType(string name, string publicId, string systemId) { var result = new DomDocumentType(name); // TODO Look up based on name result.PublicId = publicId; result.SystemId = systemId; return result; }
/// <summary> /// In the future I will update the parser to do this directly, since this requires binding to a Document to work. /// </summary> public static void ReorganizeStrandedTextNodes(IDomDocument document) { if (document.DocTypeNode == null) { var docType = new DomDocumentType(DocType.HTML5); document.ChildNodes.Insert(0, docType); } // ignore everything before <html> except text; if found, start adding to <body> // if there's anything before <doctype> then it gets trashed IDomElement html = (IDomElement)document.GetElementsByTagName("html").FirstOrDefault(); if (html != null && document.GetElementsByTagName("head").FirstOrDefault() == null) { html.ChildNodes.Insert(0, document.CreateElement("head")); } IDomElement body = (IDomElement)document.GetElementsByTagName("body").FirstOrDefault(); if (body != null) { bool textYet = false; bool anythingYet = false; int bodyIndex = 0; int index = 0; // there should only be DocType & HTML. while (index < document.ChildNodes.Count) { IDomObject obj = document.ChildNodes[index]; switch (obj.NodeType) { case NodeType.DOCUMENT_TYPE_NODE: if (!anythingYet) { index++; } else { document.ChildNodes.RemoveAt(index); } break; case NodeType.ELEMENT_NODE: if (obj.NodeName == "HTML") { bodyIndex = body.ChildNodes.Length; index++; } else { if (textYet) { body.ChildNodes.Insert(bodyIndex++, obj); } else { index++; } continue; } break; case NodeType.TEXT_NODE: if (!textYet) { // if a node is only whitespace and there has not yet been a non-whitespace text node, // then ignore it. var scanner = StringScanner.Scanner.Create(obj.NodeValue); scanner.SkipWhitespace(); if (scanner.Finished) { document.ChildNodes.RemoveAt(index); continue; } else { textYet = true; } } body.ChildNodes.Insert(bodyIndex++, obj); break; default: body.ChildNodes.Insert(bodyIndex++, obj); break; } } } }
protected override void VisitDocumentType(DomDocumentType documentType) { TextUtility.OuterText(_sb, documentType); }
/// <summary> /// Parse the HTML, and return it, based on options set. /// </summary> /// /// <returns> /// An enumerator of the top-level elements. /// </returns> protected IEnumerable <IDomObject> ParseImplementation() { int pos = 0; Stack <IterationData> stack = new Stack <IterationData>(); while (pos <= EndPos) { IterationData current = new IterationData(); if (WrapRootTextNodes) { current.WrapLiterals = true; } current.Reset(pos); stack.Push(current); while (stack.Count != 0) { current = stack.Pop(); while (current.TokenizerState != TokenizerState.Finished && current.Pos <= EndPos) { char c = Html[current.Pos]; switch (current.TokenizerState) { case TokenizerState.Default: if (current.FindNextTag(Html)) { // even if we fell through from ReadTextOnly (e.g. was never closed), we should proceeed to finish current.TokenizerState = TokenizerState.TagStart; } break; case TokenizerState.TagStart: IDomObject literal; if (current.TryGetLiteral(this, out literal)) { yield return(literal); } int tagStartPos = current.Pos; string newTag = current.GetTagOpener(Html); if (newTag == String.Empty) { // It's a tag closer. Make sure it's the right one. current.Pos = tagStartPos + 1; ushort closeTagId = HtmlData.Tokenize(current.GetCloseTag(Html)); // Ignore empty tags, or closing tags found when no parent is open bool isProperClose = closeTagId == current.ParentTagID(); if (closeTagId == 0) { // ignore empty tags continue; } else { // locate match for this closer up the heirarchy IterationData actualParent = null; if (!isProperClose) { actualParent = current.Parent; while (actualParent != null && actualParent.Element.NodeNameID != closeTagId) { actualParent = actualParent.Parent; } } // if no matching close tag was found up the tree, ignore it // otherwise always close this and repeat at the same position until the match is found if (!isProperClose && actualParent == null) { current.InsertionMode = InsertionMode.Invalid; continue; } } // element is closed if (current.Parent.Parent == null) { yield return(current.Parent.Element); } current.TokenizerState = TokenizerState.Finished; if (isProperClose) { current.Parent.Reset(current.Pos); } else { current.Parent.Reset(tagStartPos); } // already been returned before we added the children continue; } else if (newTag[0] == '!') { IDomSpecialElement specialElement = null; string newTagUpper = newTag.ToUpper(); if (newTagUpper.StartsWith("!DOCTYPE")) { specialElement = new DomDocumentType(); current.Element = specialElement; } else if (newTagUpper.StartsWith("![CDATA[")) { specialElement = new DomCData(); current.Element = specialElement; current.Pos = tagStartPos + 9; } else { specialElement = new DomComment(); current.Element = specialElement; if (newTag.StartsWith("!--")) { ((DomComment)specialElement).IsQuoted = true; current.Pos = tagStartPos + 4; } else { current.Pos = tagStartPos + 1; } } string endTag = (current.Element is IDomComment && ((IDomComment)current.Element).IsQuoted) ? "-->" : ">"; int tagEndPos = Html.Seek(endTag, current.Pos); if (tagEndPos < 0) { // if a tag is unclosed entirely, then just find a new line. tagEndPos = Html.Seek(System.Environment.NewLine, current.Pos); } if (tagEndPos < 0) { // Never closed, no newline - junk, treat it like such tagEndPos = EndPos; } specialElement.NonAttributeData = Html.SubstringBetween(current.Pos, tagEndPos); current.Pos = tagEndPos; } else { // seems to be a new element tag, parse it. ushort newTagId = HtmlData.Tokenize(newTag); // Before we keep going see if this is an implicit close ushort parentTagId = current.ParentTagID(); int lastPos = current.Pos; if (parentTagId == 0 && IsDocument) { if (newTagId != HtmlData.tagHTML) { current.Element = DomElement.Create(HtmlData.tagHTML); current = current.AddNewChild(); parentTagId = HtmlData.tagHTML; } } if (parentTagId != 0) { ushort action = SpecialTagActionDelegate(parentTagId, newTagId); while (action != HtmlData.tagActionNothing) { if (action == HtmlData.tagActionClose) { // track the next parent up the chain var newNode = (current.Parent != null) ? current.Parent : null; // same tag for a repeater like li occcurred - treat like a close tag if (current.Parent.Parent == null) { yield return(current.Parent.Element); } current.TokenizerState = TokenizerState.Finished; //current.Parent.Reset(tagStartPos); if (newNode != null && newNode.Parent != null && newNode.Parent.Element != null) { action = SpecialTagActionDelegate(newNode.Parent.Element.NodeNameID, newTagId); if (action != HtmlData.tagActionNothing) { current = newNode; } } else { action = HtmlData.tagActionNothing; } } else { if (GenerateOptionalElements) { stack.Push(current); current = current.AddNewParent(action, lastPos); } action = HtmlData.tagActionNothing; } } if (current.TokenizerState == TokenizerState.Finished) { current.Parent.Reset(tagStartPos); continue; } } current.Element = DomElement.Create(newTagId); if (!current.Element.InnerHtmlAllowed && current.Element.InnerTextAllowed) { current.InsertionMode = InsertionMode.Text; current.TokenizerState = TokenizerState.Default; } // Parse attribute data while (current.Pos <= EndPos) { if (!current.GetTagAttribute(Html)) { break; } } } IDomObject el; if (current.FinishTagOpener(Html, out el)) { stack.Push(current); current = current.AddNewChild(); } if (el != null) { yield return(el); } break; } } // Catchall for unclosed tags -- if there's an "unfinished" carrier here, it's because top-level tag was unclosed. // THis will wrap up any straggling text and close any open tags after it. if (current.TokenizerState != TokenizerState.Finished) { foreach (var el in current.CloseElement(this)) { yield return(el); } } } pos = current.Pos; } }
protected override void VisitDocumentType(DomDocumentType node) { var docType = Document.CreateDocumentType(node.Name, node.PublicId, node.SystemId); _current.Append(docType); }
void IDomNodeVisitor.Visit(DomDocumentType documentType) { VisitDocumentType(documentType); }
/// <summary> /// Parse the HTML, and return it, based on options set. /// </summary> /// /// <returns> /// An enumerator of the top-level elements. /// </returns> protected IEnumerable<IDomObject> ParseImplementation() { int pos=0; Stack<IterationData> stack = new Stack<IterationData>(); while (pos <= EndPos) { IterationData current = new IterationData(); if (WrapRootTextNodes) { current.WrapLiterals = true; } current.Reset(pos); stack.Push(current); while (stack.Count != 0) { current = stack.Pop(); while (current.TokenizerState != TokenizerState.Finished && current.Pos <= EndPos) { char c = Html[current.Pos]; switch (current.TokenizerState) { case TokenizerState.Default: if (current.FindNextTag(Html)) { // even if we fell through from ReadTextOnly (e.g. was never closed), we should proceeed to finish current.TokenizerState = TokenizerState.TagStart; } break; case TokenizerState.TagStart: IDomObject literal; if (current.TryGetLiteral(this, out literal)) { yield return literal; } int tagStartPos = current.Pos; string newTag=current.GetTagOpener(Html); if (newTag == String.Empty) { // It's a tag closer. Make sure it's the right one. current.Pos = tagStartPos + 1; ushort closeTagId = HtmlData.Tokenize(current.GetCloseTag(Html)); // Ignore empty tags, or closing tags found when no parent is open bool isProperClose = closeTagId == current.ParentTagID(); if (closeTagId == 0) { // ignore empty tags continue; } else { // locate match for this closer up the heirarchy IterationData actualParent =null; if (!isProperClose) { actualParent = current.Parent; while (actualParent != null && actualParent.Element.NodeNameID != closeTagId) { actualParent = actualParent.Parent; } } // if no matching close tag was found up the tree, ignore it // otherwise always close this and repeat at the same position until the match is found if (!isProperClose && actualParent == null) { current.InsertionMode = InsertionMode.Invalid; continue; } } // element is closed if (current.Parent.Parent == null) { yield return current.Parent.Element; } current.TokenizerState = TokenizerState.Finished ; if (isProperClose) { current.Parent.Reset(current.Pos); } else { current.Parent.Reset(tagStartPos); } // already been returned before we added the children continue; } else if (newTag[0] == '!') { IDomSpecialElement specialElement = null; string newTagUpper = newTag.ToUpper(); if (newTagUpper.StartsWith("!DOCTYPE")) { specialElement = new DomDocumentType(); current.Element = specialElement; } else if (newTagUpper.StartsWith("![CDATA[")) { specialElement = new DomCData(); current.Element = specialElement; current.Pos = tagStartPos + 9; } else { specialElement = new DomComment(); current.Element = specialElement; if (newTag.StartsWith("!--")) { ((DomComment)specialElement).IsQuoted = true; current.Pos = tagStartPos + 4; } else { current.Pos = tagStartPos+1; } } string endTag = (current.Element is IDomComment && ((IDomComment)current.Element).IsQuoted) ? "-->" : ">"; int tagEndPos = Html.Seek(endTag, current.Pos); if (tagEndPos < 0) { // if a tag is unclosed entirely, then just find a new line. tagEndPos = Html.Seek(System.Environment.NewLine, current.Pos); } if (tagEndPos < 0) { // Never closed, no newline - junk, treat it like such tagEndPos = EndPos; } specialElement.NonAttributeData = Html.SubstringBetween(current.Pos, tagEndPos); current.Pos = tagEndPos; } else { // seems to be a new element tag, parse it. ushort newTagId = HtmlData.Tokenize(newTag); // Before we keep going see if this is an implicit close ushort parentTagId = current.ParentTagID(); int lastPos = current.Pos; if (parentTagId ==0 && IsDocument) { if (newTagId != HtmlData.tagHTML) { current.Element =DomElement.Create(HtmlData.tagHTML); current = current.AddNewChild(); parentTagId = HtmlData.tagHTML; } } if (parentTagId != 0) { ushort action = SpecialTagActionDelegate(parentTagId, newTagId); while (action != HtmlData.tagActionNothing) { if (action == HtmlData.tagActionClose) { // track the next parent up the chain var newNode = (current.Parent != null) ? current.Parent : null; // same tag for a repeater like li occcurred - treat like a close tag if (current.Parent.Parent == null) { yield return current.Parent.Element; } current.TokenizerState = TokenizerState.Finished; //current.Parent.Reset(tagStartPos); if (newNode != null && newNode.Parent != null && newNode.Parent.Element != null) { action = SpecialTagActionDelegate(newNode.Parent.Element.NodeNameID, newTagId); if (action != HtmlData.tagActionNothing) { current = newNode; } } else { action = HtmlData.tagActionNothing; } } else { if (GenerateOptionalElements) { stack.Push(current); current = current.AddNewParent(action, lastPos); } action = HtmlData.tagActionNothing; } } if (current.TokenizerState == TokenizerState.Finished) { current.Parent.Reset(tagStartPos); continue; } } current.Element = DomElement.Create(newTagId); if (!current.Element.InnerHtmlAllowed && current.Element.InnerTextAllowed) { current.InsertionMode = InsertionMode.Text; current.TokenizerState = TokenizerState.Default; } // Parse attribute data while (current.Pos <= EndPos) { if (!current.GetTagAttribute(Html)) break; } } IDomObject el; if (current.FinishTagOpener(Html, out el)) { stack.Push(current); current = current.AddNewChild(); } if (el != null) { yield return el; } break; } } // Catchall for unclosed tags -- if there's an "unfinished" carrier here, it's because top-level tag was unclosed. // THis will wrap up any straggling text and close any open tags after it. if (current.TokenizerState != TokenizerState.Finished) { foreach (var el in current.CloseElement(this)) { yield return el; } } } pos = current.Pos; } }