Пример #1
0
        public Crawler(IWebLoader loader, IFileSystemWrapper fileSystem, IHtmlParserFactory parserFactory, string uri, bool isNeedUploadOtherDomens = false, int parallelDownloadFactor = 10)
        {
            if (loader == null)
            {
                throw new ArgumentNullException("loader");
            }
            if (fileSystem == null)
            {
                throw new ArgumentNullException("fileSystem");
            }
            if (parserFactory == null)
            {
                throw new ArgumentNullException("parserFactory");
            }
            if (!Uri.IsWellFormedUriString(uri, UriKind.Absolute))
            {
                throw new ArgumentException("Не верный формат uri " + uri);
            }

            this.loader                  = new RestrictedWebLoader(loader, parallelDownloadFactor);
            this.fileSystem              = fileSystem;
            this.parserFactory           = parserFactory;
            this.isNeedUploadOtherDomens = isNeedUploadOtherDomens;
            this.baseUri                 = new Uri(uri);
        }
Пример #2
0
        internal static async Task <IDictionary <string, string> > CrawlAsync(string uri, int nestedLevel, bool isNeedUploadOtherDomens, IWebLoader loader, IFileSystemWrapper fileSystem, IHtmlParserFactory parserFactory, int parallelDownloadFactor = 10)
        {
            if (nestedLevel <= 0)
            {
                throw new ArgumentOutOfRangeException("Параметр nestedLevel должен быть больше нуля");
            }

            var crawler = new Crawler(loader, fileSystem, parserFactory, uri, isNeedUploadOtherDomens, parallelDownloadFactor);

            await crawler.CrawlItAsync(uri, "index.html", nestedLevel);

            return(crawler.map);
        }
Пример #3
0
 public GetSiteSearchRankingHandler(IPageScraperService pageScraperService, ISearchUrlBuilderFactory searchUrlBuilderFactory, IHtmlParserFactory htmlParserFactory)
 {
     _pageScraperService = pageScraperService;
     _searchUrlBuilderFactory = searchUrlBuilderFactory;
     _htmlParserFactory = htmlParserFactory;
 }