示例#1
0
 public Document(string html_)
 {
     HTML = String.Copy(html_);
     Root = new Node("DocumentRoot", null);
     Root.setInnerHTML(0, HTML.Length, HTML);
     Position = 0;
     MaxDepth = 0;
     NodeCount = 0;
 }
示例#2
0
        public Node(string name_, Node parent_ = null)
        {
            Name = name_;
            Parent = parent_;
            Attributes = new Dictionary<string, string>();
            Children = new List<Node>();

            TrailingSlash = false;

            HTMLDoc = Parent != null ? Parent.HTMLDoc : null;
            OpenTagStart = -1;
            InnerHTMLStart = -1;
            InnerHTMLEnd = -1;
            CloseTagEnd = -1;

            if (Parent != null)
            {
                Parent.addChild(this);
            }
        }
示例#3
0
 public void addChild(Node tag)
 {
     Children.Add(tag);
 }
示例#4
0
        public bool Parse()
        {
            int depth = 0;
            Node currentTag = Root;
            Stack<string> hierarchy = new Stack<string>();

            while (SkipToNextTag())
            {
                Move();

                char c = Peek();
                if (c == '/')
                {
                    // Found the closing tag, close the node.

                    currentTag.InnerHTMLEnd = Position - 1;
                    Move();
                    string closeName = ParseTagName();

                    // NOTE:
                    // Despite the point of this program being the use of a Stack,
                    // the Stack is actually unneeded. The way the DOM tree is parsed,
                    // children are effectively "pushed" and "popped" by changing the
                    // node currentTag is refrencing.
                    // As a result, the line below could be written:
                    // (closeName == currentTag.Name)
                    //
                    // This completely eliminates the need for a stack.

                    if (closeName == hierarchy.Peek())
                    {
                        Move();

                        currentTag.CloseTagEnd = Position;
                        currentTag = currentTag.Parent;
                        depth--;

                        hierarchy.Pop();
                    }
                    else
                    {
                        Console.WriteLine("Error: tried to close {0} before closing {1}!", closeName, currentTag.Name);
                        return false;
                    }
                }
                else
                {
                    NodeCount++;
                    depth++;
                    if (depth > MaxDepth) MaxDepth = depth;

                    currentTag = new Node(ParseTagName(), currentTag);
                    if (currentTag.OpenTagStart == -1) currentTag.OpenTagStart = Position - currentTag.Name.Length - 1;

                    hierarchy.Push(currentTag.Name);

                    SkipWhitespace();
                    while (Peek() != '>')
                    {
                        if (Peek() == '/')
                        {
                            // Handle trailing slashes on tags like <br/> and close the node.

                            Move();

                            currentTag.TrailingSlash = true;
                            currentTag.CloseTagEnd = Position;
                            currentTag = currentTag.Parent;
                            depth--;

                            hierarchy.Pop();

                            SkipWhitespace();
                        }
                        else
                        {
                            // Parse Attributes

                            string attrName = ParseAttributeName();
                            string attrValue = "";

                            SkipWhitespace();
                            if (Peek() == '=')
                            {
                                Move();
                                SkipWhitespace();
                                attrValue = ParseAttributeValue();
                                SkipWhitespace();
                            }

                            currentTag.addAttribute(attrName, attrValue);
                        }
                    }

                    Move();
                    currentTag.InnerHTMLStart = Position;
                }
            }
            return true;
        }