示例#1
0
        public void AddLinkToCrawl(LinkToCrawl link)
        {
            // if the link to add is NOT in the list of links to crawl or crawled links, then add it
            Thread.Sleep(100);
            var q = from l in LinksToCrawl.Values
                    where l.SessionId == link.SessionId &&
                    string.Compare(l.SourceUrl, link.SourceUrl, true) == 0 &&
                    string.Compare(l.TargetUrl, link.TargetUrl, true) == 0
                    select l;

            if (!q.Any())
            {
                var q2 = from l in CrawledLinks.Values
                         where l.SessionId == link.SessionId &&
                         string.Compare(l.SourceUrl, link.SourceUrl, true) == 0 &&
                         string.Compare(l.TargetUrl, link.TargetUrl, true) == 0
                         select l;

                if (!q2.Any())
                {
                    link.Id = NextId;
                    LinksToCrawl.Add(link.Id, link);
                }
            }
        }
示例#2
0
        public void GetLinksToCrawl()
        {
            for (int i = 1; i < NumPageToCrawl; i++)
            {
                var url = BaseUrl + "?p=" + i + "&q=" + UrlEncodedQueryString;
                Driver.Navigate().GoToUrl(url);
                var allLinks = Driver.FindElementsByTagName("a");

                foreach (var link in allLinks)
                {
                    if (link.Text.Contains(FileName))
                    {
                        var linkHref = link.GetAttribute("href");
                        if (!LinksToCrawl.Contains(linkHref))
                        {
                            LinksToCrawl.Add(linkHref);
                        }
                    }
                }
            }
        }