Beispiel #1
0
        public Crawler(IWebLoader loader, IFileSystemWrapper fileSystem, IHtmlParserFactory parserFactory, string uri, bool isNeedUploadOtherDomens = false, int parallelDownloadFactor = 10)
        {
            if (loader == null)
            {
                throw new ArgumentNullException("loader");
            }
            if (fileSystem == null)
            {
                throw new ArgumentNullException("fileSystem");
            }
            if (parserFactory == null)
            {
                throw new ArgumentNullException("parserFactory");
            }
            if (!Uri.IsWellFormedUriString(uri, UriKind.Absolute))
            {
                throw new ArgumentException("Не верный формат uri " + uri);
            }

            this.loader                  = new RestrictedWebLoader(loader, parallelDownloadFactor);
            this.fileSystem              = fileSystem;
            this.parserFactory           = parserFactory;
            this.isNeedUploadOtherDomens = isNeedUploadOtherDomens;
            this.baseUri                 = new Uri(uri);
        }
Beispiel #2
0
 public void Initialize()
 {
     loader    = new LoaderFromResource();
     loaderMoq = new Mock <IWebLoader>();
     loaderMoq.Setup(m => m.DownloadAsync(It.IsAny <string>(), It.IsAny <Stream>())).Returns <string, Stream>((uri, stream) => loader.DownloadAsync(uri, stream));
     fileSystemMoq = new Mock <IFileSystemWrapper>();
     fileSystemMoq.Setup(m => m.OpenStreamFor(It.IsAny <string>())).Returns(() => new MemoryStream());
 }
Beispiel #3
0
        internal static async Task <IDictionary <string, string> > CrawlAsync(string uri, int nestedLevel, bool isNeedUploadOtherDomens, IWebLoader loader, IFileSystemWrapper fileSystem, IHtmlParserFactory parserFactory, int parallelDownloadFactor = 10)
        {
            if (nestedLevel <= 0)
            {
                throw new ArgumentOutOfRangeException("Параметр nestedLevel должен быть больше нуля");
            }

            var crawler = new Crawler(loader, fileSystem, parserFactory, uri, isNeedUploadOtherDomens, parallelDownloadFactor);

            await crawler.CrawlItAsync(uri, "index.html", nestedLevel);

            return(crawler.map);
        }
 public ContentCacheLoader(IContentStorage storage, IWebLoader webLoader)
 {
     Storage   = storage;
     WebLoader = webLoader;
 }
Beispiel #5
0
 public RestrictedWebLoader(IWebLoader loader, int parallelRestrictNumber)
 {
     this.loader = loader;
     this.parallelRestrictNumber = parallelRestrictNumber;
 }
 protected AParseResultBuilder()
 {
     _webLoader = new SimpleWebLoader();
 }