Пример #1
0
        /// <summary>
        /// Adds the URLs found on the page to the queue.
        /// </summary>
        /// <param name="urls"></param>
        /// <param name="referrer"></param>
        private void AddUrls(string[] urls, string referrer)
        {
            foreach (string nextTarget in urls)
            {
                SpiderUrl nextUrl = new SpiderUrl(nextTarget, referrer);

                _urls.Enqueue(nextUrl);
            }
        }
Пример #2
0
        /// <summary>
        /// Crawls the next link in the queue.
        /// </summary>
        private void Crawl()
        {
            while (_urls.Count > 0)
            {
                SpiderUrl nextUrl = _urls.Dequeue();

                if ((_onlyFollowUniques && _website.IsMatch(nextUrl.Target.ToString())) || !_onlyFollowUniques)
                {
                    Agent nextAgent = new Agent(nextUrl, DefaultCookies, _linkHrefPatternsToIgnore, _mimeTypesToIgnore, _contentTypesToInclude);

                    if (!_visitedUrls.Contains(nextAgent.Hash))
                    {
                        nextAgent.Run();

                        Logger.LogMessage(nextAgent.ToString(), LoggingType.Both);

                        if (!_mimeTypesToIgnore.Contains(nextAgent.MimeType) && !String.IsNullOrEmpty(nextAgent.ContentType) && _contentTypesToInclude.ContainsKey(nextAgent.ContentType) && _contentTypesToInclude[nextAgent.ContentType])
                        {
                            if (nextAgent.Urls.Count > 0)
                            {
                                AddUrls(nextAgent.Urls.ToArray(), nextAgent.Referrer.AbsoluteUri);

                                _visitedUrls.Add(nextAgent.Hash);
                            }
                        }

                        _cookies = nextAgent.Cookies;
                    }
                    else
                    {
                        nextAgent = null;
                    }
                }
                else
                {
                    nextUrl = null;
                }

                if (RuntimeInMinutes != -1)
                {
                    if (RuntimeInMinutes >= AllowedRuntimeInMinutes)
                    {
                        return;
                    }
                }

                if (AllowedMemoryRemainingInMegabytes != -1)
                {
                    if (RamCounter.MegabytesAvailable < AllowedMemoryRemainingInMegabytes)
                    {
                        return;
                    }
                }
            }

            Thread.Sleep(MagicWaitPeriod);
        }
Пример #3
0
        private void AddUrls(string[] urls, string referrer)
        {
            foreach (string nextTarget in urls)
            {
                SpiderUrl spiderUrl = new SpiderUrl(nextTarget, referrer);

                if (!spiderUrl.IsJavascript && !spiderUrl.IsMailto && !spiderUrl.Target.Contains("#"))
                {
                    _urls.Enqueue(spiderUrl);
                }
            }
        }
Пример #4
0
        private void Crawl()
        {
            while (_urls.Count > 0)
            {
                SpiderUrl nextUrl = _urls.Dequeue();

                if ((bool.Parse(ConfigurationManager.AppSettings["onlyFollowUniques"]) && nextUrl.Target.Contains(ConfigurationManager.AppSettings["website"])) || !bool.Parse(ConfigurationManager.AppSettings["onlyFollowUniques"]))
                {
                    Agent nextAgent = new Agent(nextUrl, DefaultCookies);

                    if (!_visitedUrls.Contains(nextAgent.Hash))
                    {
                        nextAgent.Run();

                        AddUrls(nextAgent.Urls.ToArray(), nextAgent.Referrer.AbsoluteUri);

                        _visitedUrls.Add(nextAgent.Hash);

                        Logger.LogMessage(nextAgent.ToString(), LoggingType.Both);

                        _cookies = nextAgent.Cookies;
                    }
                    else
                    {
                        nextAgent = null;
                    }
                }
                else
                {
                    nextUrl = null;
                }

                if (RuntimeInMinutes != -1)
                {
                    if (RuntimeInMinutes >= AllowedRuntimeInMinutes)
                    {
                        return;
                    }
                }

                if (AllowedMemoryRemainingInMegabytes != -1)
                {
                    if (RamCounter.MegabytesAvailable < AllowedMemoryRemainingInMegabytes)
                    {
                        return;
                    }
                }
            }

            Thread.Sleep(MagicWaitPeriod);
        }