예제 #1
0
        /// <summary>
        /// Gets corresponding opening tag to the closing tag name.
        /// </summary>
        /// <param name="tagName"></param>
        /// <param name="element"></param>
        /// <returns>The opening DOMElement</returns>
        private static DOMElement GetOpeningTag(string tagName, DOMElement element)
        {
            if (element != null)
            {
                if (element.TagName == tagName)
                {
                    return(element);
                }
                else
                {
                    return(GetOpeningTag(tagName, element.ParentNode));
                }
            }

            return(null);
        }
예제 #2
0
        /// <summary>
        /// Builds DOM tree using tokens generated from tokenizer.
        /// </summary>
        /// <param name="tokens"></param>
        /// <returns>List of parent elements</returns>
        public static List <DOMElement> BuildTree(List <string> tokens)
        {
            List <DOMElement> elements   = new List <DOMElement>();
            List <string>     openedTags = new List <string>();

            DOMElement parent = null;

            for (int i = 0; i < tokens.Count; i++)
            {
                string token = tokens[i];

                string   tagName  = GetTagName(token);
                TagType  tagType  = GetTagType(token, tagName);
                NodeType nodeType = GetNodeType(token, tagName);

                if (tagType == TagType.Closing)
                {
                    if (parent != null)
                    {
                        // Check if there's a similar tag previously opened with the same name.
                        var openedTagIndex = openedTags.LastIndexOf(tagName);

                        if (openedTagIndex != -1)
                        {
                            if (parent.TagName == tagName)
                            {
                                // When the current parent element has the same tag name, just go level up.
                                parent = parent.ParentNode;

                                // Example:
                                // <a>
                                //   <b>
                                //   </b>
                                //   parent = <a>
                                // </a>
                                // parent = null
                            }
                            else
                            {
                                // Now, we have to find the corresponding opening tag to the current closing tag.
                                var el = GetOpeningTag(tagName, parent);
                                if (el != null)
                                {
                                    // Set current parent to parent of the found element.
                                    parent = el.ParentNode;
                                }

                                // Here's an example where it might occur:
                                // parent = null
                                // <a>
                                //   <b>
                                //     <c>
                                //   </b>
                                // parent = <a>
                            }

                            // Remove the opening tag from the collection.
                            openedTags.RemoveAt(openedTagIndex);
                        }
                    }
                }
                else
                {
                    DOMElement element = new DOMElement()
                    {
                        NodeType = nodeType,
                    };

                    if (parent != null)
                    {
                        element.ParentNode = parent;
                        parent.Children.Add(element);
                    }
                    else
                    {
                        elements.Add(element);
                    }

                    if (nodeType == NodeType.Element)
                    {
                        element.TagName = tagName;

                        // Extracts all attributes from token.
                        element.Attributes = GetAttributes(token, tagName);

                        // Collect opened tags, to correctly close other tags.
                        openedTags.Add(tagName);

                        if (tagType == TagType.Opening)
                        {
                            // Set current parent to currently processed element.
                            parent = element;
                        }
                    }
                    else if (nodeType == NodeType.Text)
                    {
                        element.NodeValue = token;
                    }
                    else if (nodeType == NodeType.Comment)
                    {
                        element.NodeValue = GetCommentText(token);
                    }
                }
            }

            return(elements);
        }