public Document(string html_) { HTML = String.Copy(html_); Root = new Node("DocumentRoot", null); Root.setInnerHTML(0, HTML.Length, HTML); Position = 0; MaxDepth = 0; NodeCount = 0; }
public Node(string name_, Node parent_ = null) { Name = name_; Parent = parent_; Attributes = new Dictionary<string, string>(); Children = new List<Node>(); TrailingSlash = false; HTMLDoc = Parent != null ? Parent.HTMLDoc : null; OpenTagStart = -1; InnerHTMLStart = -1; InnerHTMLEnd = -1; CloseTagEnd = -1; if (Parent != null) { Parent.addChild(this); } }
public void addChild(Node tag) { Children.Add(tag); }
public bool Parse() { int depth = 0; Node currentTag = Root; Stack<string> hierarchy = new Stack<string>(); while (SkipToNextTag()) { Move(); char c = Peek(); if (c == '/') { // Found the closing tag, close the node. currentTag.InnerHTMLEnd = Position - 1; Move(); string closeName = ParseTagName(); // NOTE: // Despite the point of this program being the use of a Stack, // the Stack is actually unneeded. The way the DOM tree is parsed, // children are effectively "pushed" and "popped" by changing the // node currentTag is refrencing. // As a result, the line below could be written: // (closeName == currentTag.Name) // // This completely eliminates the need for a stack. if (closeName == hierarchy.Peek()) { Move(); currentTag.CloseTagEnd = Position; currentTag = currentTag.Parent; depth--; hierarchy.Pop(); } else { Console.WriteLine("Error: tried to close {0} before closing {1}!", closeName, currentTag.Name); return false; } } else { NodeCount++; depth++; if (depth > MaxDepth) MaxDepth = depth; currentTag = new Node(ParseTagName(), currentTag); if (currentTag.OpenTagStart == -1) currentTag.OpenTagStart = Position - currentTag.Name.Length - 1; hierarchy.Push(currentTag.Name); SkipWhitespace(); while (Peek() != '>') { if (Peek() == '/') { // Handle trailing slashes on tags like <br/> and close the node. Move(); currentTag.TrailingSlash = true; currentTag.CloseTagEnd = Position; currentTag = currentTag.Parent; depth--; hierarchy.Pop(); SkipWhitespace(); } else { // Parse Attributes string attrName = ParseAttributeName(); string attrValue = ""; SkipWhitespace(); if (Peek() == '=') { Move(); SkipWhitespace(); attrValue = ParseAttributeValue(); SkipWhitespace(); } currentTag.addAttribute(attrName, attrValue); } } Move(); currentTag.InnerHTMLStart = Position; } } return true; }