示例#1
0
 private bool IsDuplicate(List<Website> lstWebsite, Website ws)
 {
     foreach (Website w in lstWebsite)
     {
         if (w.Url == ws.Url) return true;
     }
     return false;
 }
示例#2
0
        private void getalllink(string xpath)
        {
            List<string> links = new List<string>();

            WebBrowser wb = (WebBrowser)GetCurrentWB();
            if (wb != null)
            {
                Application.DoEvents();

                HtmlElementCollection collection = null;
                if (!string.IsNullOrEmpty(xpath))
                {
                    HtmlElement ele = GetElementByXpath(wb.Document, xpath);
                    if (ele != null)
                        collection = ele.GetElementsByTagName("a");
                }
                else
                {
                    collection = wb.Document.GetElementsByTagName("a");
                }
                if (collection != null)
                {
                    foreach (HtmlElement elm in collection)
                    {
                        if (IsStop == false)
                        {
                            string url = elm.GetAttribute("href");
                            if (!string.IsNullOrEmpty(url))
                            {
                                Website w = new Website();
                                w.Url = url;

                                var uri = new Uri(wb.Url.ToString());
                                var host = uri.Host;

                                string t = "";

                                string x = System.Web.HttpUtility.UrlDecode(uri.AbsolutePath);
                                string y = System.Web.HttpUtility.UrlDecode(uri.ToString());
                                if (x != "/" && x != "")
                                {
                                    t = y.Remove(y.IndexOf(x));
                                }
                                else { t = "http://" + host; }

                                if (url.StartsWith(t))
                                {
                                    w.IsInternal = true;
                                }
                                else
                                {
                                    w.IsInternal = false;
                                }

                                if (url.IndexOf("javascript") == -1)
                                {
                                    if (!Contains(links, url))
                                    {
                                        w.IsValid = true;
                                    }
                                }
                                else
                                {
                                    w.IsValid = false;
                                }

                                if (w.Url.IndexOf("q=related:") != -1 || w.Url.IndexOf("q=+site:") != -1 || w.Url.IndexOf("q=cache:") != -1)
                                {
                                    w.IsValid = false;
                                }

                                w.Parent = wb.Url.ToString();

                                links.Add(url);
                                lstTemp.Add(w);
                                lstAllLink.Add(w);
                            }
                        }
                    }
                }
            }
        }