private bool IsDuplicate(List<Website> lstWebsite, Website ws) { foreach (Website w in lstWebsite) { if (w.Url == ws.Url) return true; } return false; }
private void getalllink(string xpath) { List<string> links = new List<string>(); WebBrowser wb = (WebBrowser)GetCurrentWB(); if (wb != null) { Application.DoEvents(); HtmlElementCollection collection = null; if (!string.IsNullOrEmpty(xpath)) { HtmlElement ele = GetElementByXpath(wb.Document, xpath); if (ele != null) collection = ele.GetElementsByTagName("a"); } else { collection = wb.Document.GetElementsByTagName("a"); } if (collection != null) { foreach (HtmlElement elm in collection) { if (IsStop == false) { string url = elm.GetAttribute("href"); if (!string.IsNullOrEmpty(url)) { Website w = new Website(); w.Url = url; var uri = new Uri(wb.Url.ToString()); var host = uri.Host; string t = ""; string x = System.Web.HttpUtility.UrlDecode(uri.AbsolutePath); string y = System.Web.HttpUtility.UrlDecode(uri.ToString()); if (x != "/" && x != "") { t = y.Remove(y.IndexOf(x)); } else { t = "http://" + host; } if (url.StartsWith(t)) { w.IsInternal = true; } else { w.IsInternal = false; } if (url.IndexOf("javascript") == -1) { if (!Contains(links, url)) { w.IsValid = true; } } else { w.IsValid = false; } if (w.Url.IndexOf("q=related:") != -1 || w.Url.IndexOf("q=+site:") != -1 || w.Url.IndexOf("q=cache:") != -1) { w.IsValid = false; } w.Parent = wb.Url.ToString(); links.Add(url); lstTemp.Add(w); lstAllLink.Add(w); } } } } } }