public void AddLinkToCrawl(LinkToCrawl link) { // if the link to add is NOT in the list of links to crawl or crawled links, then add it Thread.Sleep(100); var q = from l in LinksToCrawl.Values where l.SessionId == link.SessionId && string.Compare(l.SourceUrl, link.SourceUrl, true) == 0 && string.Compare(l.TargetUrl, link.TargetUrl, true) == 0 select l; if (!q.Any()) { var q2 = from l in CrawledLinks.Values where l.SessionId == link.SessionId && string.Compare(l.SourceUrl, link.SourceUrl, true) == 0 && string.Compare(l.TargetUrl, link.TargetUrl, true) == 0 select l; if (!q2.Any()) { link.Id = NextId; LinksToCrawl.Add(link.Id, link); } } }
public void GetLinksToCrawl() { for (int i = 1; i < NumPageToCrawl; i++) { var url = BaseUrl + "?p=" + i + "&q=" + UrlEncodedQueryString; Driver.Navigate().GoToUrl(url); var allLinks = Driver.FindElementsByTagName("a"); foreach (var link in allLinks) { if (link.Text.Contains(FileName)) { var linkHref = link.GetAttribute("href"); if (!LinksToCrawl.Contains(linkHref)) { LinksToCrawl.Add(linkHref); } } } } }