public List<HtmlElement> GetElementsByTag(string tag, HtmlElement element, List<HtmlElement> list) { if (list == null) list = new List<HtmlElement>(); if (element.GetTagName() == tag) list.Add(element); if (element.ChildCount > 0) foreach (HtmlElement child in element.Childs) GetElementsByTag(tag, child, list); return list; }
public List<HtmlElement> GetElementsByTag(string tag, string attribute, string value, HtmlElement element) { List<HtmlElement> allTags = GetElementsByTag(tag, element, null); List<HtmlElement> result = new List<HtmlElement>(); foreach (HtmlElement e in allTags) { if (e.GetAttribute(attribute) == value) result.Add(e); } return result; }
public HtmlElement GetFirst(string tag, HtmlElement element) { if (element.GetTagName() == tag) return element; if (element.ChildCount > 0) foreach (HtmlElement child in element.Childs) { HtmlElement e = GetFirst(tag, child); if (e != null) return e; } return null; ; }
public void ParseOld(string html) { if (string.IsNullOrEmpty(html)) return; HtmlElement currentElement = Root; bool inTag = false; string buffer = null; for (int i = 0; i < html.Length; i++) { switch (currentElement.GetTagName()) { case "script": case "style": switch (html[i]) { case '<': if (html.Substring(i + 1, 7) == "/script" || html.Substring(i + 1, 6) == "/style") { if (!string.IsNullOrEmpty(buffer)) currentElement.InnerText += buffer; buffer = null; } else buffer += html[i]; break; case '>': if (buffer.StartsWith("/script") || buffer.StartsWith("/style")) { if (buffer.StartsWith("/")) { if (currentElement.Parent != null) currentElement = currentElement.Parent; } else { HtmlElement newElement = new HtmlElement() { Tag = buffer }; newElement.Parent = currentElement; currentElement = newElement; } buffer = null; } else buffer += html[i]; break; default: buffer += html[i]; break; } break; default: switch (html[i]) { case '<': if (!string.IsNullOrEmpty(buffer)) { currentElement.Childs.Add( new HtmlElement() { InnerText = buffer }); } buffer = null; break; case '>': if (buffer != null && buffer.StartsWith("!--")) { buffer = null; continue; } if (buffer.StartsWith("/") || buffer.EndsWith("/")) { if (buffer.EndsWith("/") && buffer.Contains(" ")) //self closing tag { HtmlElement newElement = new HtmlElement() { Tag = buffer }; newElement.Parent = currentElement; currentElement.Childs.Add(newElement); } else { string tempTag = buffer.Replace("/", "").ToLowerInvariant(); if (currentElement.Parent != null) { if (currentElement.GetTagName() == tempTag) { currentElement = currentElement.Parent; } else if (currentElement.Parent.Parent != null && currentElement.Parent.Parent.GetTagName() == tempTag) { currentElement = currentElement.Parent.Parent; } } } } else { HtmlElement newElement = new HtmlElement() { Tag = buffer }; newElement.Parent = currentElement; currentElement.Childs.Add(newElement); if (newElement.GetTagName() != "br" && newElement.GetTagName() != "meta" && newElement.GetTagName() != "img" && newElement.GetTagName() != "!doctype") { currentElement = newElement; } } buffer = null; break; default: buffer += html[i]; break; } break; } } }
//public void Parse2(string html) //{ // this.html = html; // state = 0; // index = 0; // while (index < html.Length) // { // char c = getChar(); // switch (state) // { // case 0: // while (getChar() != '<') { } // currentTag = new HtmlElement(); // Root.Childs.Add(currentTag); // state = 1; // break; // case 1: //tag start // if (c == ' ') // state= 2; // else if (c == '/') // state = 3; // else if (c == '>') // state = 3; // else // currentTag.Tag += c; // break; // case 2: //attributes // default: // break; // } // } //} public void Parse(string html) { OriginalText = html; if (string.IsNullOrEmpty(html)) return; HtmlElement currentElement = Root; bool inTag = false; string buffer = null; for (int i = 0; i < html.Length; i++) { switch (html[i]) { case '<': //script içinde direkt > karakteri kullanılmasını destekliyoruz. if ((i + 1) < html.Length && html[i + 1] != '/' && currentElement.GetTagName() == "script") { buffer += html[i]; continue; } if (!inTag) { if (!string.IsNullOrEmpty(buffer)) { HtmlElement element = new HtmlElement() { InnerText = buffer }; element.Parent = currentElement; currentElement.Childs.Add(element); } inTag = true; buffer = null; } else buffer += html[i]; break; case '>': if (!inTag) //html içinde direkt > karakteri kullanılmasını destekliyoruz. Hatalı kullanım olmasına rağmen browserlarda çalışıyor. { buffer += html[i]; continue; } HtmlElement element2 = new HtmlElement() { Tag = buffer }; if (element2.Tag.StartsWith("/")) { string temp = element2.Tag.Substring(1).ToLowerInvariant(); if (currentElement.GetTagName() == temp) { if (currentElement.Parent != null) currentElement = currentElement.Parent; } else if (currentElement.Parent != null && currentElement.Parent.GetTagName() == temp) { if (currentElement.Parent != null) currentElement = currentElement.Parent.Parent; } else { while (currentElement.Parent != null && inlineTags.ContainsKey(currentElement.GetTagName())) { currentElement = currentElement.Parent; } } } else if (element2.GetTagName() != "br" && element2.GetTagName() != "meta" && element2.GetTagName() != "img" && element2.GetTagName() != "hr" && element2.GetTagName() != "link" && !element2.GetTagName().StartsWith("!--") && element2.GetTagName() != "!doctype" && !element2.GetTagName().EndsWith("/")) { element2.Parent = currentElement; currentElement.Childs.Add(element2); currentElement = element2; } else { element2.Parent = currentElement; currentElement.Childs.Add(element2); } inTag = false; buffer = null; break; default: buffer += html[i]; break; } } if (!string.IsNullOrEmpty(buffer)) { HtmlElement lastElement = new HtmlElement(); lastElement.InnerText = buffer; Root.Childs.Add(lastElement); } }