public Crawler(IWebLoader loader, IFileSystemWrapper fileSystem, IHtmlParserFactory parserFactory, string uri, bool isNeedUploadOtherDomens = false, int parallelDownloadFactor = 10) { if (loader == null) { throw new ArgumentNullException("loader"); } if (fileSystem == null) { throw new ArgumentNullException("fileSystem"); } if (parserFactory == null) { throw new ArgumentNullException("parserFactory"); } if (!Uri.IsWellFormedUriString(uri, UriKind.Absolute)) { throw new ArgumentException("Не верный формат uri " + uri); } this.loader = new RestrictedWebLoader(loader, parallelDownloadFactor); this.fileSystem = fileSystem; this.parserFactory = parserFactory; this.isNeedUploadOtherDomens = isNeedUploadOtherDomens; this.baseUri = new Uri(uri); }
internal static async Task <IDictionary <string, string> > CrawlAsync(string uri, int nestedLevel, bool isNeedUploadOtherDomens, IWebLoader loader, IFileSystemWrapper fileSystem, IHtmlParserFactory parserFactory, int parallelDownloadFactor = 10) { if (nestedLevel <= 0) { throw new ArgumentOutOfRangeException("Параметр nestedLevel должен быть больше нуля"); } var crawler = new Crawler(loader, fileSystem, parserFactory, uri, isNeedUploadOtherDomens, parallelDownloadFactor); await crawler.CrawlItAsync(uri, "index.html", nestedLevel); return(crawler.map); }
public GetSiteSearchRankingHandler(IPageScraperService pageScraperService, ISearchUrlBuilderFactory searchUrlBuilderFactory, IHtmlParserFactory htmlParserFactory) { _pageScraperService = pageScraperService; _searchUrlBuilderFactory = searchUrlBuilderFactory; _htmlParserFactory = htmlParserFactory; }