Exemplo n.º 1
0
        /// <summary>
        /// Crawls the next link in the queue.
        /// </summary>
        private void Crawl()
        {
            while (_urls.Count > 0)
            {
                SpiderUrl nextUrl = _urls.Dequeue();

                if ((_onlyFollowUniques && _website.IsMatch(nextUrl.Target.ToString())) || !_onlyFollowUniques)
                {
                    Agent nextAgent = new Agent(nextUrl, DefaultCookies, _linkHrefPatternsToIgnore, _mimeTypesToIgnore, _contentTypesToInclude);

                    if (!_visitedUrls.Contains(nextAgent.Hash))
                    {
                        nextAgent.Run();

                        Logger.LogMessage(nextAgent.ToString(), LoggingType.Both);

                        if (!_mimeTypesToIgnore.Contains(nextAgent.MimeType) && !String.IsNullOrEmpty(nextAgent.ContentType) && _contentTypesToInclude.ContainsKey(nextAgent.ContentType) && _contentTypesToInclude[nextAgent.ContentType])
                        {
                            if (nextAgent.Urls.Count > 0)
                            {
                                AddUrls(nextAgent.Urls.ToArray(), nextAgent.Referrer.AbsoluteUri);

                                _visitedUrls.Add(nextAgent.Hash);
                            }
                        }

                        _cookies = nextAgent.Cookies;
                    }
                    else
                    {
                        nextAgent = null;
                    }
                }
                else
                {
                    nextUrl = null;
                }

                if (RuntimeInMinutes != -1)
                {
                    if (RuntimeInMinutes >= AllowedRuntimeInMinutes)
                    {
                        return;
                    }
                }

                if (AllowedMemoryRemainingInMegabytes != -1)
                {
                    if (RamCounter.MegabytesAvailable < AllowedMemoryRemainingInMegabytes)
                    {
                        return;
                    }
                }
            }

            Thread.Sleep(MagicWaitPeriod);
        }
Exemplo n.º 2
0
        private void Crawl()
        {
            while (_urls.Count > 0)
            {
                SpiderUrl nextUrl = _urls.Dequeue();

                if ((bool.Parse(ConfigurationManager.AppSettings["onlyFollowUniques"]) && nextUrl.Target.Contains(ConfigurationManager.AppSettings["website"])) || !bool.Parse(ConfigurationManager.AppSettings["onlyFollowUniques"]))
                {
                    Agent nextAgent = new Agent(nextUrl, DefaultCookies);

                    if (!_visitedUrls.Contains(nextAgent.Hash))
                    {
                        nextAgent.Run();

                        AddUrls(nextAgent.Urls.ToArray(), nextAgent.Referrer.AbsoluteUri);

                        _visitedUrls.Add(nextAgent.Hash);

                        Logger.LogMessage(nextAgent.ToString(), LoggingType.Both);

                        _cookies = nextAgent.Cookies;
                    }
                    else
                    {
                        nextAgent = null;
                    }
                }
                else
                {
                    nextUrl = null;
                }

                if (RuntimeInMinutes != -1)
                {
                    if (RuntimeInMinutes >= AllowedRuntimeInMinutes)
                    {
                        return;
                    }
                }

                if (AllowedMemoryRemainingInMegabytes != -1)
                {
                    if (RamCounter.MegabytesAvailable < AllowedMemoryRemainingInMegabytes)
                    {
                        return;
                    }
                }
            }

            Thread.Sleep(MagicWaitPeriod);
        }