/// <summary> /// Adds the URLs found on the page to the queue. /// </summary> /// <param name="urls"></param> /// <param name="referrer"></param> private void AddUrls(string[] urls, string referrer) { foreach (string nextTarget in urls) { SpiderUrl nextUrl = new SpiderUrl(nextTarget, referrer); _urls.Enqueue(nextUrl); } }
/// <summary> /// Crawls the next link in the queue. /// </summary> private void Crawl() { while (_urls.Count > 0) { SpiderUrl nextUrl = _urls.Dequeue(); if ((_onlyFollowUniques && _website.IsMatch(nextUrl.Target.ToString())) || !_onlyFollowUniques) { Agent nextAgent = new Agent(nextUrl, DefaultCookies, _linkHrefPatternsToIgnore, _mimeTypesToIgnore, _contentTypesToInclude); if (!_visitedUrls.Contains(nextAgent.Hash)) { nextAgent.Run(); Logger.LogMessage(nextAgent.ToString(), LoggingType.Both); if (!_mimeTypesToIgnore.Contains(nextAgent.MimeType) && !String.IsNullOrEmpty(nextAgent.ContentType) && _contentTypesToInclude.ContainsKey(nextAgent.ContentType) && _contentTypesToInclude[nextAgent.ContentType]) { if (nextAgent.Urls.Count > 0) { AddUrls(nextAgent.Urls.ToArray(), nextAgent.Referrer.AbsoluteUri); _visitedUrls.Add(nextAgent.Hash); } } _cookies = nextAgent.Cookies; } else { nextAgent = null; } } else { nextUrl = null; } if (RuntimeInMinutes != -1) { if (RuntimeInMinutes >= AllowedRuntimeInMinutes) { return; } } if (AllowedMemoryRemainingInMegabytes != -1) { if (RamCounter.MegabytesAvailable < AllowedMemoryRemainingInMegabytes) { return; } } } Thread.Sleep(MagicWaitPeriod); }
private void AddUrls(string[] urls, string referrer) { foreach (string nextTarget in urls) { SpiderUrl spiderUrl = new SpiderUrl(nextTarget, referrer); if (!spiderUrl.IsJavascript && !spiderUrl.IsMailto && !spiderUrl.Target.Contains("#")) { _urls.Enqueue(spiderUrl); } } }
private void Crawl() { while (_urls.Count > 0) { SpiderUrl nextUrl = _urls.Dequeue(); if ((bool.Parse(ConfigurationManager.AppSettings["onlyFollowUniques"]) && nextUrl.Target.Contains(ConfigurationManager.AppSettings["website"])) || !bool.Parse(ConfigurationManager.AppSettings["onlyFollowUniques"])) { Agent nextAgent = new Agent(nextUrl, DefaultCookies); if (!_visitedUrls.Contains(nextAgent.Hash)) { nextAgent.Run(); AddUrls(nextAgent.Urls.ToArray(), nextAgent.Referrer.AbsoluteUri); _visitedUrls.Add(nextAgent.Hash); Logger.LogMessage(nextAgent.ToString(), LoggingType.Both); _cookies = nextAgent.Cookies; } else { nextAgent = null; } } else { nextUrl = null; } if (RuntimeInMinutes != -1) { if (RuntimeInMinutes >= AllowedRuntimeInMinutes) { return; } } if (AllowedMemoryRemainingInMegabytes != -1) { if (RamCounter.MegabytesAvailable < AllowedMemoryRemainingInMegabytes) { return; } } } Thread.Sleep(MagicWaitPeriod); }