/// <summary> /// Gets corresponding opening tag to the closing tag name. /// </summary> /// <param name="tagName"></param> /// <param name="element"></param> /// <returns>The opening DOMElement</returns> private static DOMElement GetOpeningTag(string tagName, DOMElement element) { if (element != null) { if (element.TagName == tagName) { return(element); } else { return(GetOpeningTag(tagName, element.ParentNode)); } } return(null); }
/// <summary> /// Builds DOM tree using tokens generated from tokenizer. /// </summary> /// <param name="tokens"></param> /// <returns>List of parent elements</returns> public static List <DOMElement> BuildTree(List <string> tokens) { List <DOMElement> elements = new List <DOMElement>(); List <string> openedTags = new List <string>(); DOMElement parent = null; for (int i = 0; i < tokens.Count; i++) { string token = tokens[i]; string tagName = GetTagName(token); TagType tagType = GetTagType(token, tagName); NodeType nodeType = GetNodeType(token, tagName); if (tagType == TagType.Closing) { if (parent != null) { // Check if there's a similar tag previously opened with the same name. var openedTagIndex = openedTags.LastIndexOf(tagName); if (openedTagIndex != -1) { if (parent.TagName == tagName) { // When the current parent element has the same tag name, just go level up. parent = parent.ParentNode; // Example: // <a> // <b> // </b> // parent = <a> // </a> // parent = null } else { // Now, we have to find the corresponding opening tag to the current closing tag. var el = GetOpeningTag(tagName, parent); if (el != null) { // Set current parent to parent of the found element. parent = el.ParentNode; } // Here's an example where it might occur: // parent = null // <a> // <b> // <c> // </b> // parent = <a> } // Remove the opening tag from the collection. openedTags.RemoveAt(openedTagIndex); } } } else { DOMElement element = new DOMElement() { NodeType = nodeType, }; if (parent != null) { element.ParentNode = parent; parent.Children.Add(element); } else { elements.Add(element); } if (nodeType == NodeType.Element) { element.TagName = tagName; // Extracts all attributes from token. element.Attributes = GetAttributes(token, tagName); // Collect opened tags, to correctly close other tags. openedTags.Add(tagName); if (tagType == TagType.Opening) { // Set current parent to currently processed element. parent = element; } } else if (nodeType == NodeType.Text) { element.NodeValue = token; } else if (nodeType == NodeType.Comment) { element.NodeValue = GetCommentText(token); } } } return(elements); }