예제 #1
0
        protected void DownloadFile(string urllink)
        {
            Url url = new Url(new Uri(urllink), -1, UrlStatus.Queue);

            UrlsSeen.Add(url);
            Download(url);
        }
예제 #2
0
        protected void DownloadFile(string urllink, Page page)
        {
            Url url = new Url(new Uri(urllink), page.Url.depth, UrlStatus.Queue);

            UrlsSeen.Add(url);
            Download(url);
        }
예제 #3
0
        /// <summary>
        /// Enqueue a new Url to the Urls Queue
        /// Add to the UrlsSeen list
        /// </summary>
        /// <returns></returns>
        protected bool AddProcess(Url url)
        {
            string link = url.link;

            if (UrlsSeen.Any(u => u.link == link))
            {
                _logger.LogInformation("Skipping...URL already queued", link);
            }
            else if (_config.UseWhiteList && !IsWhiteListedDomain(url.uri.Authority))
            {
                _logger.LogInformation("URL domain not on whitelist", link);
            }
            else if (IsExcludedDomain(link))
            {
                _logger.LogInformation("Skipping...URL domain is excluded", link);
            }
            else if (IsExcludedFileType(link))
            {
                _logger.LogInformation("Skipping...file type is excluded", link);
            }
            else if (ShouldDownload(link))
            {
                UrlsSeen.Add(url);
                Download(url);
            }
            else if (ShouldContinue(url.depth))
            {
                UrlsSeen.Add(url);
                ParallelManager.Add(FetchNewPage, url);
                return(true);
            }
            return(false);
        }