示例#1
0
        public Downloader(BlockingCollection <ParentLink> downloadQueue, BlockingCollection <DownloadResult> downloadResults, Uri host, ISettings settings, ICrawlProgress progress, bool separateThread = true)
            : base(separateThread)
        {
            _downloadQueue   = downloadQueue;
            _downloadResults = downloadResults;
            _seed            = string.Intern(host.ToString());
            _progress        = progress;
            ServicePointManager.DefaultConnectionLimit = 50;
            _semaphoreSlim = new SemaphoreSlim(20);
            var handler = Factory.GetInstance <HttpClientHandler>();

            handler.AllowAutoRedirect        = settings.FollowRedirects;
            handler.MaxAutomaticRedirections = 10;
            handler.AutomaticDecompression   = DecompressionMethods.All;
            handler.CookieContainer          = new CookieContainer();
            handler.MaxConnectionsPerServer  = 15;
            handler.UseCookies = settings.UseCookies;

            _followExternalLinks     = settings.FollowExtenalLinks;
            _downloadExternalContent = settings.DownloadExternalContent;

            _client = new HttpClient(handler)
            {
                BaseAddress = host
            };
            _client.DefaultRequestHeaders.TryAddWithoutValidation("User-Agent", "ItsyBitsy");
        }
示例#2
0
 public Feeder(BlockingCollection <ParentLink> newLinks, BlockingCollection <ParentLink> downloadQueue, int websiteId, int sessionId, ICrawlProgress progress, bool separateThread = true)
     : base(separateThread)
 {
     _newLinks       = newLinks;
     _downloadQueue  = downloadQueue;
     _websiteId      = websiteId;
     _sessionId      = sessionId;
     _alreadyCrawled = new HashSet <string>();
     _progress       = progress;
     _repository     = Factory.GetInstance <IRepository>();
 }
示例#3
0
 public Processor(BlockingCollection <DownloadResult> downloadResults, BlockingCollection <ParentLink> newLinks, Website website, int sessionId, ISettings settings, ICrawlProgress progress, bool separateThread = true)
     : base(separateThread)
 {
     _newLinks        = newLinks;
     _downloadResults = downloadResults;
     _settings        = settings;
     _website         = website;
     _sessionId       = sessionId;
     _progress        = progress;
     _repository      = Factory.GetInstance <IRepository>();
 }
示例#4
0
 public Crawler(ICrawlProgress progress, ISettings settings)
 {
     _tokenSource = new CancellationTokenSource();
     _settings    = settings;
     _progress    = progress;
 }
示例#5
0
 public ReportProgress(ICrawlProgress progress, bool separateThread = true)
     : base(separateThread)
 {
     _progress = progress;
 }