public SiteDownloader(string sessionId, ISiteProvider siteProvider, IPageDownloader pageDownloader, DownloadOptions options) { _sessionId = sessionId; _siteProvider = siteProvider; _pageDownloader = pageDownloader; _options = options; }
public SiteDownloader(string sessionId, ISiteProvider siteProvider, IPageDownloader pageDownloader, DownloadOptions options) { _sessionId = sessionId; _siteProvider = siteProvider; _pageDownloader = pageDownloader; _options = options; }
public DefaultWebServices(IHtmlParserProvider htmlParserProvider, IWebClientProvider webClientProvider, IPageDownloader pageDownloader) { Guard.VerifyObjectNotNull(htmlParserProvider, nameof(htmlParserProvider)); Guard.VerifyObjectNotNull(webClientProvider, nameof(webClientProvider)); Guard.VerifyObjectNotNull(pageDownloader, nameof(pageDownloader)); _htmlParserProvider = htmlParserProvider; _webClientProvider = webClientProvider; _pageDownloader = pageDownloader; _cachedUrls = new Dictionary <string, CacheableUrlResponse>(); }
public void SetUp() { var instanceIdGenerator = new InstanceIdGenerator(); _htmlParserProvider = new DefaultHtmlParserProvider(); _movementProvider = new DefaultMovementProvider(instanceIdGenerator); _moveProvider = new DefaultMoveProvider(instanceIdGenerator); _pageDownloader = new DefaultPageDownloader(); _webClientProvider = new DefaultWebClientProvider(); _attributeProvider = new DefaultAttributeProvider(instanceIdGenerator); _imageScrapingProvider = new DefaultImageScrapingProvider(); _imageScrapingService = new DefaultImageScrapingService(_imageScrapingProvider); _uniqueDataProvider = new DefaultUniqueDataProvider(instanceIdGenerator); _webServices = new DefaultWebServices(_htmlParserProvider, _webClientProvider, _pageDownloader); _attributeScrapingServices = new DefaultAttributeScrapingServices(_attributeProvider, _webServices); _moveScrapingServices = new DefaultMoveScrapingServices(_moveProvider, _webServices); _movementScrapingServices = new DefaultMovementScrapingServices(_movementProvider, _webServices); _uniqueDataScrapingServices = new DefaultUniqueDataScrapingServices(_uniqueDataProvider, _webServices); _groundMoveScraper = new GroundMoveScraper(_moveScrapingServices); _aerialMoveScraper = new AerialMoveScraper(_moveScrapingServices); _specialMoveScraper = new SpecialMoveScraper(_moveScrapingServices); _throwMovesScraper = new ThrowMoveScraper(_moveScrapingServices); _characterMoveScraper = new DefaultCharacterMoveScraper(new List <IMoveScraper> { _groundMoveScraper, _aerialMoveScraper, _specialMoveScraper, _throwMovesScraper }); var attributeScrapers = new List <IAttributeScraper> { new AirSpeedScraper(_attributeScrapingServices), new AirDodgeScraper(_attributeScrapingServices) }; _movementScraper = new DefaultMovementScraper(_movementScrapingServices); _characterDataScrapingServices = new DefaultCharacterDataScrapingServices(_imageScrapingService, _movementScraper, attributeScrapers, _characterMoveScraper, _uniqueDataScrapingServices, _webServices, instanceIdGenerator); _characterDataScraper = new DefaultCharacterDataScraper(_characterDataScrapingServices); }
public DefaultCharacterDataScraper MakeCharacterDataScraper() { var instanceIdGenerator = new InstanceIdGenerator(); _htmlParserProvider = new DefaultHtmlParserProvider(); _movementProvider = new DefaultMovementProvider(instanceIdGenerator); _moveProvider = new DefaultMoveProvider(instanceIdGenerator); _pageDownloader = new DefaultPageDownloader(); _webClientProvider = new DefaultWebClientProvider(); _attributeProvider = new DefaultAttributeProvider(instanceIdGenerator); _imageScrapingProvider = new DefaultImageScrapingProvider(); _imageScrapingService = new DefaultColorScrapingService(_characterCss); //_imageScrapingProvider); _uniqueDataProvider = new DefaultUniqueDataProvider(instanceIdGenerator); _webServices = new DefaultWebServices(_htmlParserProvider, _webClientProvider, _pageDownloader); _attributeScrapingServices = new DefaultAttributeScrapingServices(_attributeProvider, _webServices); _moveScrapingServices = new DefaultMoveScrapingServices(_moveProvider, _webServices); _movementScrapingServices = new DefaultMovementScrapingServices(_movementProvider, _webServices); _uniqueDataScrapingServices = new DefaultUniqueDataScrapingServices(_uniqueDataProvider, _webServices); _groundMoveScraper = new GroundMoveScraper(_moveScrapingServices); _aerialMoveScraper = new AerialMoveScraper(_moveScrapingServices); _specialMoveScraper = new SpecialMoveScraper(_moveScrapingServices); _throwMoveScraper = new ThrowMoveScraper(_moveScrapingServices); _characterMoveScraper = new DefaultCharacterMoveScraper(new List <IMoveScraper> { _groundMoveScraper, _aerialMoveScraper, _specialMoveScraper, _throwMoveScraper }); var attributeScrapers = AttributeScrapers.AllWithScrapingServices(_attributeScrapingServices, _urlUnderTest); _movementScraper = new DefaultMovementScraper(_movementScrapingServices); _characterDataScrapingServices = new DefaultCharacterDataScrapingServices(_imageScrapingService, _movementScraper, attributeScrapers, _characterMoveScraper, _uniqueDataScrapingServices, _webServices, instanceIdGenerator); return(new DefaultCharacterDataScraper(_characterDataScrapingServices)); }
public Executor( IPageDownloader downloader, ICrawledRepository crawledRepository, IIndexRepository indexRepository) { _pageDownloader = downloader ?? throw new ArgumentNullException(nameof(IPageDownloader)); _crawledRepository = crawledRepository ?? throw new ArgumentNullException(nameof(ICrawledRepository)); _indexRepository = indexRepository ?? throw new ArgumentNullException(nameof(IIndexRepository)); _termIdGenerator = new IntegerIdGenerator(); _pageIdGenerator = new IntegerIdGenerator(); // Build data flow _bufferBlock = new BufferBlock <Uri>(); _downloadBlock = new TransformBlock <Uri, PageInfo>(BuildDownloadTask()); _indexBlock = new TransformBlock <PageInfo, string>(BuildIndexTask()); var linkOptions = new DataflowLinkOptions { PropagateCompletion = true }; _bufferBlock.LinkTo(_downloadBlock, linkOptions); _downloadBlock.LinkTo(_indexBlock, linkOptions); }
public SinglePageCrawler(IHtmlParser htmlParser, IPageDownloader pageDownloader) { this.htmlParser = htmlParser; this.pageDownloader = pageDownloader; }
public PageParserTests() { pageDownloader = new PageDownloader(); pageParser = new PageParser(pageDownloader); }
public Scrapper(IPageDownloader pageDownloader) { _pageDownloader = pageDownloader; }
public SinglePageCrawler(IHtmlParser htmlParser, IPageDownloader pageDownloader) { this.htmlParser = htmlParser; this.pageDownloader = pageDownloader; }
public PageParser(IPageDownloader pageDownloader) { this.pageDownloader = pageDownloader; }