예제 #1
0
        public static List <string> Extract_Tag_with_AngleSharp(string tagName, string source)
        {
            numberofElement = 0;
            //prepare nodename and its attributes
            List <string[]> res        = TagProcessing.NodenameAndAttributes(tagName);
            List <string>   list_sonuc = new List <string>();
            Stopwatch       stopwatch  = new Stopwatch();

            stopwatch.Start();
            var parser   = new HtmlParser();
            var document = parser.Parse(source);

            stopwatch.Stop();
            preProcessTime = stopwatch.Elapsed.TotalMilliseconds;
            stopwatch.Restart();
            //Do something with LINQ
            if (document != null)
            {
                numberofElement = document.All.Length;
                string[] nodename = res[0];
                List <AngleSharp.Dom.IElement> temp = document.All.Where(m => m.LocalName == nodename[0]).ToList();

                for (int i = 1; i < res.Count; i++)
                {
                    string[] att = res[i];
                    temp = temp.Where(m => m.Attributes[att[0]] != null && m.Attributes[att[0]].Value == att[1]).ToList();
                }

                if (temp != null)
                {
                    foreach (AngleSharp.Dom.IElement node in temp)
                    {
                        list_sonuc.Add(node.InnerHtml);
                    }
                }
                else
                {
                    stopwatch.Stop();
                    searchTime = stopwatch.Elapsed.TotalMilliseconds;
                    return(null);
                }
            }
            else
            {
                stopwatch.Stop();
                searchTime = stopwatch.Elapsed.TotalMilliseconds;
                return(null);
            }

            stopwatch.Stop();
            searchTime = stopwatch.Elapsed.TotalMilliseconds;
            return(list_sonuc);
        }
예제 #2
0
파일: MSDOM.cs 프로젝트: erdincuzun/UzunExt
        public static List <string> Extract_Tag_with_IHTMLDocument(string tagName, string source)
        {
            //prepare nodename and its attributes
            List <string[]> res = TagProcessing.NodenameAndAttributes(tagName);

            List <string> list_sonuc = new List <string>();
            Stopwatch     stopwatch  = new Stopwatch();

            stopwatch.Start();
            HTMLDocument   doc  = new HTMLDocument();
            IHTMLDocument2 doc2 = (IHTMLDocument2)doc;

            doc2.clear();
            doc2.designMode = "On";
            doc2.write(source);

            stopwatch.Stop();
            preProcessTime = stopwatch.Elapsed.TotalMilliseconds;
            stopwatch.Restart();
            if (null != doc)
            {
                string[] nodename = res[0];
                nodename[0] = nodename[0].ToUpper(new CultureInfo("en-US", false));

                for (int i = 1; i < res.Count; i++)
                {
                    string[] att = res[i];
                    if (att[0] == "id")
                    {
                        //id means only one record
                        list_sonuc.Add(doc.getElementById(att[1]).innerHTML);
                        stopwatch.Stop();
                        searchTime = stopwatch.Elapsed.TotalMilliseconds;
                        return(list_sonuc);
                    }
                }

                foreach (IHTMLElement element in doc.getElementsByTagName(nodename[0]))
                {
                    bool sonuc = true;
                    for (int i = 1; i < res.Count; i++)
                    {
                        string[] att = res[i];
                        if (att[0] == "class")
                        {
                            if (element.className != att[1])
                            {
                                sonuc = false;
                                break;
                            }
                        }
                        else
                        {
                            if (element.innerHTML != null)
                            {
                                string tag_temp = element.outerHTML.Substring(0, element.outerHTML.IndexOf(">"));
                                if (!(tag_temp.Contains(att[0]) && tag_temp.Contains(att[1])))
                                {
                                    sonuc = false;
                                    break;
                                }
                            }
                            else
                            {
                                sonuc = false;
                                break;
                            }
                        }
                    }

                    if (sonuc)
                    {
                        list_sonuc.Add(element.innerHTML);
                    }
                }
            }
            else
            {
                stopwatch.Stop();
                searchTime = stopwatch.Elapsed.TotalMilliseconds;
                return(null);
            }


            stopwatch.Stop();
            searchTime = stopwatch.Elapsed.TotalMilliseconds;
            return(list_sonuc);
        }