예제 #1
0
        public List<HtmlElement> GetElementsByTag(string tag, HtmlElement element, List<HtmlElement> list)
        {
            if (list == null) list = new List<HtmlElement>();

            if (element.GetTagName() == tag) list.Add(element);

            if (element.ChildCount > 0)
                foreach (HtmlElement child in element.Childs)
                    GetElementsByTag(tag, child, list);

            return list;
        }
예제 #2
0
        public List<HtmlElement> GetElementsByTag(string tag, string attribute, string value, HtmlElement element)
        {
            List<HtmlElement> allTags = GetElementsByTag(tag, element, null);

            List<HtmlElement> result = new List<HtmlElement>();
            foreach (HtmlElement e in allTags)
            {
                if (e.GetAttribute(attribute) == value) result.Add(e);
            }

            return result;
        }
예제 #3
0
        public HtmlElement GetFirst(string tag, HtmlElement element)
        {
            if (element.GetTagName() == tag) return element;

            if (element.ChildCount > 0)
                foreach (HtmlElement child in element.Childs)
                {
                    HtmlElement e = GetFirst(tag, child);
                    if (e != null) return e;
                }

            return null; ;
        }
예제 #4
0
        public void ParseOld(string html)
        {
            if (string.IsNullOrEmpty(html)) return;

            HtmlElement currentElement = Root;
            bool inTag = false;
            string buffer = null;

            for (int i = 0; i < html.Length; i++)
            {
                switch (currentElement.GetTagName())
                {
                    case "script":
                    case "style":
                        switch (html[i])
                        {
                            case '<':
                                if (html.Substring(i + 1, 7) == "/script" || html.Substring(i + 1, 6) == "/style")
                                {
                                    if (!string.IsNullOrEmpty(buffer))
                                        currentElement.InnerText += buffer;
                                    buffer = null;
                                }
                                else
                                    buffer += html[i];
                                break;
                            case '>':
                                if (buffer.StartsWith("/script") || buffer.StartsWith("/style"))
                                {
                                    if (buffer.StartsWith("/"))
                                    {
                                        if (currentElement.Parent != null) currentElement = currentElement.Parent;
                                    }
                                    else
                                    {
                                        HtmlElement newElement = new HtmlElement()
                                        {
                                            Tag = buffer
                                        };
                                        newElement.Parent = currentElement;
                                        currentElement = newElement;
                                    }
                                    buffer = null;
                                }
                                else
                                    buffer += html[i];
                                break;
                            default:
                                buffer += html[i];
                                break;
                        }
                        break;
                    default:
                        switch (html[i])
                        {
                            case '<':
                                if (!string.IsNullOrEmpty(buffer))
                                {
                                    currentElement.Childs.Add(
                                        new HtmlElement() { InnerText = buffer });
                                }
                                buffer = null;
                                break;
                            case '>':
                                if (buffer != null && buffer.StartsWith("!--"))
                                {
                                    buffer = null;
                                    continue;
                                }
                                if (buffer.StartsWith("/") || buffer.EndsWith("/"))
                                {
                                    if (buffer.EndsWith("/") && buffer.Contains(" ")) //self closing tag
                                    {
                                        HtmlElement newElement = new HtmlElement()
                                        {
                                            Tag = buffer
                                        };
                                        newElement.Parent = currentElement;
                                        currentElement.Childs.Add(newElement);
                                    }
                                    else
                                    {
                                        string tempTag = buffer.Replace("/", "").ToLowerInvariant();
                                        if (currentElement.Parent != null)
                                        {
                                            if (currentElement.GetTagName() == tempTag)
                                            {
                                                currentElement = currentElement.Parent;
                                            }
                                            else if (currentElement.Parent.Parent != null
                                                && currentElement.Parent.Parent.GetTagName() == tempTag)
                                            {
                                                currentElement = currentElement.Parent.Parent;
                                            }
                                        }
                                    }
                                }
                                else
                                {
                                    HtmlElement newElement = new HtmlElement()
                                    {
                                        Tag = buffer
                                    };
                                    newElement.Parent = currentElement;
                                    currentElement.Childs.Add(newElement);

                                    if (newElement.GetTagName() != "br"
                                        && newElement.GetTagName() != "meta"
                                        && newElement.GetTagName() != "img"
                                        && newElement.GetTagName() != "!doctype")
                                    {
                                        currentElement = newElement;
                                    }
                                }
                                buffer = null;
                                break;
                            default:
                                buffer += html[i];
                                break;
                        }
                        break;
                }
            }
        }
예제 #5
0
        //public void Parse2(string html)
        //{
        //    this.html = html;
        //    state = 0;
        //    index = 0;
        //    while (index < html.Length)
        //    {
        //        char c = getChar();
        //        switch (state)
        //        {
        //            case 0:
        //                while (getChar() != '<') { }
        //                currentTag = new HtmlElement();
        //                Root.Childs.Add(currentTag);
        //                state = 1;
        //                break;
        //            case 1: //tag start
        //                if (c == ' ')
        //                    state= 2;
        //                else if (c == '/')
        //                    state = 3;
        //                else if (c == '>')
        //                    state = 3;
        //                else
        //                    currentTag.Tag += c;
        //                break;
        //            case 2: //attributes
        //            default:
        //                break;
        //        }
        //    }
        //}
        public void Parse(string html)
        {
            OriginalText = html;

            if (string.IsNullOrEmpty(html)) return;

            HtmlElement currentElement = Root;
            bool inTag = false;
            string buffer = null;

            for (int i = 0; i < html.Length; i++)
            {
                switch (html[i])
                {
                    case '<':
                        //script içinde direkt > karakteri kullanılmasını destekliyoruz.
                        if ((i + 1) < html.Length && html[i + 1] != '/' && currentElement.GetTagName() == "script")
                        {
                            buffer += html[i];
                            continue;
                        }
                        if (!inTag)
                        {
                            if (!string.IsNullOrEmpty(buffer))
                            {
                                HtmlElement element = new HtmlElement() { InnerText = buffer };
                                element.Parent = currentElement;
                                currentElement.Childs.Add(element);
                            }
                            inTag = true;
                            buffer = null;
                        }
                        else
                            buffer += html[i];
                        break;
                    case '>':
                        if (!inTag) //html içinde direkt > karakteri kullanılmasını destekliyoruz. Hatalı kullanım olmasına rağmen browserlarda çalışıyor.
                        {
                            buffer += html[i];
                            continue;
                        }
                        HtmlElement element2 = new HtmlElement() { Tag = buffer };

                        if (element2.Tag.StartsWith("/"))
                        {
                            string temp = element2.Tag.Substring(1).ToLowerInvariant();
                            if (currentElement.GetTagName() == temp)
                            {
                                if (currentElement.Parent != null)
                                    currentElement = currentElement.Parent;
                            }
                            else if (currentElement.Parent != null
                                && currentElement.Parent.GetTagName() == temp)
                            {
                                if (currentElement.Parent != null)
                                    currentElement = currentElement.Parent.Parent;
                            }
                            else
                            {
                                while (currentElement.Parent != null && inlineTags.ContainsKey(currentElement.GetTagName()))
                                {
                                    currentElement = currentElement.Parent;
                                }
                            }
                        }
                        else if (element2.GetTagName() != "br"
                            && element2.GetTagName() != "meta"
                            && element2.GetTagName() != "img"
                            && element2.GetTagName() != "hr"
                            && element2.GetTagName() != "link"
                            && !element2.GetTagName().StartsWith("!--")
                            && element2.GetTagName() != "!doctype"
                            && !element2.GetTagName().EndsWith("/"))
                        {
                            element2.Parent = currentElement;
                            currentElement.Childs.Add(element2);
                            currentElement = element2;
                        }
                        else
                        {
                            element2.Parent = currentElement;
                            currentElement.Childs.Add(element2);
                        }
                        inTag = false;
                        buffer = null;
                        break;
                    default:
                        buffer += html[i];
                        break;
                }
            }
            if (!string.IsNullOrEmpty(buffer))
            {
                HtmlElement lastElement = new HtmlElement();
                lastElement.InnerText = buffer;
                Root.Childs.Add(lastElement);
            }
        }