public Downloader(BlockingCollection <ParentLink> downloadQueue, BlockingCollection <DownloadResult> downloadResults, Uri host, ISettings settings, ICrawlProgress progress, bool separateThread = true) : base(separateThread) { _downloadQueue = downloadQueue; _downloadResults = downloadResults; _seed = string.Intern(host.ToString()); _progress = progress; ServicePointManager.DefaultConnectionLimit = 50; _semaphoreSlim = new SemaphoreSlim(20); var handler = Factory.GetInstance <HttpClientHandler>(); handler.AllowAutoRedirect = settings.FollowRedirects; handler.MaxAutomaticRedirections = 10; handler.AutomaticDecompression = DecompressionMethods.All; handler.CookieContainer = new CookieContainer(); handler.MaxConnectionsPerServer = 15; handler.UseCookies = settings.UseCookies; _followExternalLinks = settings.FollowExtenalLinks; _downloadExternalContent = settings.DownloadExternalContent; _client = new HttpClient(handler) { BaseAddress = host }; _client.DefaultRequestHeaders.TryAddWithoutValidation("User-Agent", "ItsyBitsy"); }
public Feeder(BlockingCollection <ParentLink> newLinks, BlockingCollection <ParentLink> downloadQueue, int websiteId, int sessionId, ICrawlProgress progress, bool separateThread = true) : base(separateThread) { _newLinks = newLinks; _downloadQueue = downloadQueue; _websiteId = websiteId; _sessionId = sessionId; _alreadyCrawled = new HashSet <string>(); _progress = progress; _repository = Factory.GetInstance <IRepository>(); }
public Processor(BlockingCollection <DownloadResult> downloadResults, BlockingCollection <ParentLink> newLinks, Website website, int sessionId, ISettings settings, ICrawlProgress progress, bool separateThread = true) : base(separateThread) { _newLinks = newLinks; _downloadResults = downloadResults; _settings = settings; _website = website; _sessionId = sessionId; _progress = progress; _repository = Factory.GetInstance <IRepository>(); }
public Crawler(ICrawlProgress progress, ISettings settings) { _tokenSource = new CancellationTokenSource(); _settings = settings; _progress = progress; }
public ReportProgress(ICrawlProgress progress, bool separateThread = true) : base(separateThread) { _progress = progress; }