Exemplo n.º 1
0
        private void ProcessNextInQueue()
        {
            CrawlerQueueEntry crawlerQueueEntry = m_CrawlerQueue.Pop();

            if (crawlerQueueEntry.IsNull() || !OnBeforeDownload(crawlerQueueEntry.CrawlStep))
            {
                return;
            }

            PropertyBag propertyBag = Download(crawlerQueueEntry.CrawlStep);

            if (propertyBag.IsNull())
            {
                return;
            }

            // Assign initial properties to propertybag
            if (!crawlerQueueEntry.Properties.IsNull())
            {
                crawlerQueueEntry.Properties.
                ForEach(key => propertyBag[key.Key].Value = key.Value);
            }

            propertyBag.Referrer = crawlerQueueEntry.Referrer;
            if (OnAfterDownload(crawlerQueueEntry.CrawlStep, propertyBag))
            {
                ExecutePipeLine(propertyBag);
            }
        }
Exemplo n.º 2
0
        private void StartDownload()
        {
            CrawlerQueueEntry crawlerQueueEntry = m_CrawlerQueue.Pop();

            if (crawlerQueueEntry.IsNull() || !OnBeforeDownload(crawlerQueueEntry.CrawlStep))
            {
                return;
            }

            IWebDownloader webDownloader = m_WebDownloaderFactory();

            webDownloader.MaximumDownloadSizeInRam = MaximumDownloadSizeInRam;
            webDownloader.ConnectionTimeout        = ConnectionTimeout;
            webDownloader.MaximumContentSize       = MaximumContentSize;
            webDownloader.DownloadBufferSize       = DownloadBufferSize;
            webDownloader.UserAgent         = UserAgent;
            webDownloader.UseCookies        = UseCookies;
            webDownloader.ReadTimeout       = ConnectionReadTimeout;
            webDownloader.RetryCount        = DownloadRetryCount;
            webDownloader.RetryWaitDuration = DownloadRetryWaitDuration;
            m_Logger.Verbose("Downloading {0}", crawlerQueueEntry.CrawlStep.Uri);
            ThreadSafeCounter.ThreadSafeCounterCookie threadSafeCounterCookie = m_ThreadInUse.EnterCounterScope(crawlerQueueEntry);
            Interlocked.Increment(ref m_VisitedCount);
            webDownloader.DownloadAsync(crawlerQueueEntry.CrawlStep, crawlerQueueEntry.Referrer, DownloadMethod.GET,
                                        EndDownload, OnDownloadProgress, threadSafeCounterCookie);
        }