Beispiel #1
0
        public override byte[] GetImageSource(string imageAbsoluteUriOrID, IArachnodeDAO arachnodeDAO)
        {
            var managedImage = new ManagedImage();

            if (ApplicationSettings.DownloadedImagesDirectory == null)
            {
                throw new Exception("_applicationSettings.DownloadedImagesDirectory is null.  This is usually the result of failing to initialize the Application configuration from the ArachnodeDAO.");
            }

            ArachnodeDataSet.ImagesRow imagesRow = arachnodeDAO.GetImage(imageAbsoluteUriOrID);

            if (imagesRow != null)
            {
                if (imagesRow.Source.Length != 0)
                {
                    return(imagesRow.Source);
                }
                else
                {
                    string discoveryPath = GetDiscoveryPath(ApplicationSettings.DownloadedImagesDirectory, imagesRow.AbsoluteUri, imagesRow.FullTextIndexType);

                    if (!File.Exists(discoveryPath))
                    {
                        throw new Exception("Could not find the Image Source in the database or on disk.");
                    }

                    return(File.ReadAllBytes(discoveryPath));
                }
            }

            return(null);
        }
Beispiel #2
0
        private void ImageUtilities_OnImageProcessed(ArachnodeDataSet.ImagesRow imagesRow, string message)
        {
            BeginInvoke(new MethodInvoker(delegate
            {
                rtbPostProcessingStatus.Text = message + Environment.NewLine + rtbPostProcessingStatus.Text;

                if (rtbPostProcessingStatus.Text.Length > 10000)
                {
                    rtbPostProcessingStatus.Text = rtbPostProcessingStatus.Text.Substring(0, 10000);
                }
            }));

            //Application.DoEvents();

            //Thread.Sleep(100);
        }
Beispiel #3
0
        private void nudImageID_ValueChanged(object sender, EventArgs e)
        {
            _imagesRow = _arachnodeDAO.GetImage(nudImageID.Value.ToString());

            if (_imagesRow != null)
            {
                _imageDiscoveryPath = _discoveryManager.GetDiscoveryPath(_applicationSettings.DownloadedImagesDirectory, _imagesRow.AbsoluteUri, _imagesRow.FullTextIndexType);

                llImageDiscoveryPathDirectory.Visible = true;
                llImageDiscoveryPathDirectory.Text    = Path.GetDirectoryName(_imageDiscoveryPath);

                if (cbAutoView.Checked)
                {
                    btnViewImage_Click(sender, e);
                }
            }
            else
            {
                llImageDiscoveryPathDirectory.Visible = false;
                wbMain.DocumentText = "The Image with the ID of " + nudImageID.Value + " does not exist.";
            }
        }
        /// <summary>
        ///     Process a range of ImageID after crawling.  Useful if crawled Images were not processed at crawl time according to desired ApplicationSettings configuration.
        ///     Calling this method DOES change the 'LastDiscovered' fields where applicable.
        ///     This method is not when crawling, rather during post-processing.
        /// </summary>
        /// <param name = "imageIDLowerBound"></param>
        /// <param name = "imageIDUpperBound"></param>
        public static void ProcessImages(Crawler <TArachnodeDAO> crawler, long imageIDLowerBound, long imageIDUpperBound)
        {
            //do not assign the application settings.  doing so will override the ApplicationSetting you set before calling this method...
            TArachnodeDAO arachnodeDAO = (TArachnodeDAO)Activator.CreateInstance(typeof(TArachnodeDAO), crawler.ApplicationSettings.ConnectionString, crawler.ApplicationSettings, crawler.WebSettings, false, false);

            ConsoleManager <TArachnodeDAO> consoleManager = new ConsoleManager <TArachnodeDAO>(crawler.ApplicationSettings, crawler.WebSettings);
            ActionManager <TArachnodeDAO>  actionManager  = new ActionManager <TArachnodeDAO>(crawler.ApplicationSettings, crawler.WebSettings, consoleManager);
            CookieManager cookieManager = new CookieManager();;
            MemoryManager <TArachnodeDAO>      memoryManager      = new MemoryManager <TArachnodeDAO>(crawler.ApplicationSettings, crawler.WebSettings);
            RuleManager <TArachnodeDAO>        ruleManager        = new RuleManager <TArachnodeDAO>(crawler.ApplicationSettings, crawler.WebSettings, consoleManager);
            CacheManager <TArachnodeDAO>       cacheManager       = new CacheManager <TArachnodeDAO>(crawler.ApplicationSettings, crawler.WebSettings);
            CrawlerPeerManager <TArachnodeDAO> crawlerPeerManager = new CrawlerPeerManager <TArachnodeDAO>(crawler.ApplicationSettings, crawler.WebSettings, null, arachnodeDAO);
            Cache <TArachnodeDAO>            cache            = new Cache <TArachnodeDAO>(crawler.ApplicationSettings, crawler.WebSettings, crawler, actionManager, cacheManager, crawlerPeerManager, memoryManager, ruleManager);
            DiscoveryManager <TArachnodeDAO> discoveryManager = new DiscoveryManager <TArachnodeDAO>(crawler.ApplicationSettings, crawler.WebSettings, cache, actionManager, cacheManager, memoryManager, ruleManager);

            //load the CrawlActions, CrawlRules and EngineActions...
            ruleManager.ProcessCrawlRules(crawler);
            actionManager.ProcessCrawlActions(crawler);
            actionManager.ProcessEngineActions(crawler);

            //these three methods are called in the Engine.
            UserDefinedFunctions.RefreshAllowedExtensions(true);
            UserDefinedFunctions.RefreshAllowedSchemes(true);
            UserDefinedFunctions.RefreshDisallowed();

            //instantiate a WebClient to access the ResponseHeaders...
            WebClient <TArachnodeDAO> webClient = new WebClient <TArachnodeDAO>(crawler.ApplicationSettings, arachnodeDAO.WebSettings, consoleManager, cookieManager, new ProxyManager <TArachnodeDAO>(crawler.ApplicationSettings, crawler.WebSettings, consoleManager));

            webClient.GetHttpWebResponse("http://google.com", "GET", null, null, null, null);

            ImageManager <TArachnodeDAO> imageManager = new ImageManager <TArachnodeDAO>(crawler.ApplicationSettings, crawler.WebSettings, discoveryManager, arachnodeDAO);

            for (long i = imageIDLowerBound; i <= imageIDUpperBound; i++)
            {
                ArachnodeDataSet.ImagesRow imagesRow = null;

                try
                {
                    //get the Image from the database.  we need the source data as we don't store this in the index.
                    //even though most of the fields are available in the Document, the Image is the authoritative source, so we'll use that for all of the fields.
                    imagesRow = arachnodeDAO.GetImage(i.ToString());

                    if (imagesRow != null)
                    {
                        if (imagesRow.Source == null || imagesRow.Source.Length == 0)
                        {
                            if (File.Exists(discoveryManager.GetDiscoveryPath(crawler.ApplicationSettings.DownloadedImagesDirectory, imagesRow.AbsoluteUri, imagesRow.FullTextIndexType)))
                            {
                                imagesRow.Source = File.ReadAllBytes(discoveryManager.GetDiscoveryPath(crawler.ApplicationSettings.DownloadedImagesDirectory, imagesRow.AbsoluteUri, imagesRow.FullTextIndexType));
                            }
                            else
                            {
                                Console.WriteLine("ImageID: " + i + " was NOT processed successfully.");
                                if (OnImageProcessed != null)
                                {
                                    OnImageProcessed.BeginInvoke(imagesRow, "ImageID: " + i + " was NOT processed successfully.", null, null);
                                }
                            }
                        }

                        ProcessImage(crawler.ApplicationSettings, crawler.WebSettings, crawler, imagesRow, webClient, cache, actionManager, consoleManager, crawlerPeerManager, discoveryManager, imageManager, memoryManager, ruleManager, arachnodeDAO);

                        Console.WriteLine("ImageID: " + i + " was processed successfully.");
                        if (OnImageProcessed != null)
                        {
                            OnImageProcessed.BeginInvoke(imagesRow, "ImageID: " + i + " was processed successfully.", null, null);
                        }
                    }
                }
                catch (Exception exception)
                {
                    Console.WriteLine("ImageID: " + i + " was NOT processed successfully.");
                    Console.WriteLine(exception.Message);

                    if (OnImageProcessed != null)
                    {
                        OnImageProcessed.BeginInvoke(imagesRow, "ImageID: " + i + " was NOT processed successfully.", null, null);
                        OnImageProcessed.BeginInvoke(imagesRow, exception.Message, null, null);
                    }

                    arachnodeDAO.InsertException(null, null, exception, false);
                }
            }

            //stop the CrawlActions, CrawlRules and EngineActions...
            ruleManager.Stop();
            actionManager.Stop();
        }
        /// <summary>
        ///     Processes an ImagesRow after crawling.
        /// </summary>
        /// <param name = "imagesRow">The images row.</param>
        /// <param name="webClient"></param>
        /// <param name="actionManager"></param>
        /// <param name="consoleManager"></param>
        /// <param name="discoveryManager"></param>
        /// <param name = "imageManager">The image manager.</param>
        /// <param name = "imageManager">The image manager.</param>
        /// <param name = "imageManager">The image manager.</param>
        /// <param name="memoryManager"></param>
        /// <param name="ruleManager"></param>
        /// <param name = "arachnodeDAO">The arachnode DAO.</param>
        public static void ProcessImage(ApplicationSettings applicationSettings, WebSettings webSettings, Crawler <TArachnodeDAO> crawler, ArachnodeDataSet.ImagesRow imagesRow, WebClient <TArachnodeDAO> webClient, Cache <TArachnodeDAO> cache, ActionManager <TArachnodeDAO> actionManager, ConsoleManager <TArachnodeDAO> consoleManager, CrawlerPeerManager <TArachnodeDAO> crawlerPeerManager, DiscoveryManager <TArachnodeDAO> discoveryManager, ImageManager <TArachnodeDAO> imageManager, MemoryManager <TArachnodeDAO> memoryManager, RuleManager <TArachnodeDAO> ruleManager, IArachnodeDAO arachnodeDAO)
        {
            CacheManager <TArachnodeDAO> cacheManager = new CacheManager <TArachnodeDAO>(applicationSettings, webSettings);
            CookieManager cookieManager = new CookieManager();;
            CrawlRequestManager <TArachnodeDAO> crawlRequestManager = new CrawlRequestManager <TArachnodeDAO>(applicationSettings, webSettings, cache, consoleManager, discoveryManager);
            DataTypeManager <TArachnodeDAO>     dataTypeManager     = new DataTypeManager <TArachnodeDAO>(applicationSettings, webSettings);
            EncodingManager <TArachnodeDAO>     encodingManager     = new EncodingManager <TArachnodeDAO>(applicationSettings, webSettings);
            PolitenessManager <TArachnodeDAO>   politenessManager   = new PolitenessManager <TArachnodeDAO>(applicationSettings, webSettings, cache);
            ProxyManager <TArachnodeDAO>        proxyManager        = new ProxyManager <TArachnodeDAO>(applicationSettings, webSettings, consoleManager);
            HtmlManager <TArachnodeDAO>         htmlManager         = new HtmlManager <TArachnodeDAO>(applicationSettings, webSettings, discoveryManager);
            Crawl <TArachnodeDAO> crawl = new Crawl <TArachnodeDAO>(applicationSettings, webSettings, crawler, actionManager, consoleManager, cookieManager, crawlRequestManager, dataTypeManager, discoveryManager, encodingManager, htmlManager, politenessManager, proxyManager, ruleManager, true);

            //create a CrawlRequest as this is what the internals of SiteCrawler.dll expect to operate on...
            CrawlRequest <TArachnodeDAO> crawlRequest = new CrawlRequest <TArachnodeDAO>(new Discovery <TArachnodeDAO>(imagesRow.AbsoluteUri), 1, UriClassificationType.Host, UriClassificationType.Host, 0, RenderType.None, RenderType.None);

            crawlRequest.Crawl = crawl;
            crawlRequest.Discovery.DiscoveryType = DiscoveryType.Image;
            crawlRequest.Discovery.ID            = imagesRow.ID;
            crawlRequest.Data        = imagesRow.Source;
            crawlRequest.ProcessData = true;
            crawlRequest.WebClient   = webClient;

            crawlRequest.WebClient.HttpWebResponse.Headers.Clear();

            //parse the ResponseHeaders from the ImagesRow.ResponseHeaders string...
            foreach (string responseHeader in imagesRow.ResponseHeaders.Split("\r\n".ToCharArray(), StringSplitOptions.RemoveEmptyEntries))
            {
                string[] responseHeaderSplit = responseHeader.Split(":".ToCharArray());

                string name  = responseHeaderSplit[0];
                string value = UserDefinedFunctions.ExtractResponseHeader(imagesRow.ResponseHeaders, name, true).Value;

                crawlRequest.WebClient.HttpWebResponse.Headers.Add(name, value);
            }

            //refresh the DataTypes in the DataTypeManager... (if necessary)...
            if (dataTypeManager.AllowedDataTypes.Count == 0)
            {
                dataTypeManager.RefreshDataTypes();
            }

            crawlRequest.DataType = dataTypeManager.DetermineDataType(crawlRequest);

            if (applicationSettings.InsertImages)
            {
                crawlRequest.Discovery.ID = arachnodeDAO.InsertImage(crawlRequest.Parent.Uri.AbsoluteUri, crawlRequest.Discovery.Uri.AbsoluteUri, crawlRequest.WebClient.HttpWebResponse.Headers.ToString(), applicationSettings.InsertImageSource ? crawlRequest.Data : new byte[] {}, crawlRequest.DataType.FullTextIndexType);
            }

            crawlRequest.ManagedDiscovery = imageManager.ManageImage(crawlRequest, crawlRequest.Discovery.ID.Value, crawlRequest.Discovery.Uri.AbsoluteUri, crawlRequest.Data, crawlRequest.DataType.FullTextIndexType, applicationSettings.ExtractImageMetaData, applicationSettings.InsertImageMetaData, applicationSettings.SaveDiscoveredImagesToDisk);

            actionManager.PerformCrawlActions(crawlRequest, CrawlActionType.PostRequest, arachnodeDAO);

            discoveryManager.CloseAndDisposeManagedDiscovery(crawlRequest, arachnodeDAO);
        }
Beispiel #6
0
        public override void ProcessCrawlRequest(CrawlRequest <TArachnodeDAO> crawlRequest, bool obeyCrawlRules, bool executeCrawlActions)
        {
            IssueWebRequest(crawlRequest, "GET");

            crawlRequest.DataType = _dataTypeManager.DetermineDataType(crawlRequest);

            if (obeyCrawlRules)
            {
                _ruleManager.IsDisallowed(crawlRequest, CrawlRuleType.PreGet, _arachnodeDAO);
            }

            if (executeCrawlActions)
            {
                _actionManager.PerformCrawlActions(crawlRequest, CrawlActionType.PreGet, _arachnodeDAO);
            }

            if (!crawlRequest.IsDisallowed)
            {
                try
                {
                    if (crawlRequest.WebClient.HttpWebResponse != null)
                    {
                        crawlRequest.ProcessData = true;

                        bool isLastModifiedOutdated = true;

                        try
                        {
                            isLastModifiedOutdated = crawlRequest.WebClient.HttpWebResponse.LastModified != DateTime.Now;
                        }
                        catch (Exception exception)
                        {
                            _arachnodeDAO.InsertException(crawlRequest.Parent.Uri.AbsoluteUri, crawlRequest.Discovery.Uri.AbsoluteUri, exception, false);
                        }

                        if (isLastModifiedOutdated)
                        {
                            switch (crawlRequest.DataType.DiscoveryType)
                            {
                            case DiscoveryType.File:
                                if (ApplicationSettings.AssignFileAndImageDiscoveries)     //ANODET: robots.txt
                                {
                                    ArachnodeDataSet.FilesRow filesRow = _arachnodeDAO.GetFile(crawlRequest.Discovery.Uri.AbsoluteUri);

                                    if (filesRow == null)
                                    {
                                        crawlRequest.ProcessData = true;
                                    }
                                    else
                                    {
                                        if (!filesRow.IsResponseHeadersNull())
                                        {
                                            DateTime lastModified;

                                            SqlString lastModifiedValue = UserDefinedFunctions.ExtractResponseHeader(filesRow.ResponseHeaders, "Last-Modified: ", false);

                                            if (!lastModifiedValue.IsNull && DateTime.TryParse(lastModifiedValue.Value, out lastModified))
                                            {
                                                //crawlRequest.WebClient.HttpWebResponse.LastModified will equal DateTime.Now (or close to it) if the 'Last-Modified' ResponseHeader is not present...
                                                if ((crawlRequest.WebClient.HttpWebResponse).LastModified > lastModified)
                                                {
                                                    crawlRequest.ProcessData = true;
                                                }
                                                else
                                                {
                                                    crawlRequest.ProcessData = false;
                                                }
                                            }
                                            else
                                            {
                                                crawlRequest.ProcessData = false;
                                            }
                                        }
                                        else
                                        {
                                            crawlRequest.ProcessData = true;
                                        }

                                        if (!crawlRequest.ProcessData)
                                        {
                                            if (filesRow.Source.Length != 0)
                                            {
                                                crawlRequest.Data = filesRow.Source;
                                            }
                                            else
                                            {
                                                string discoveryPath = _discoveryManager.GetDiscoveryPath(ApplicationSettings.DownloadedFilesDirectory, crawlRequest.Discovery.Uri.AbsoluteUri, crawlRequest.DataType.FullTextIndexType);

                                                if (File.Exists(discoveryPath))
                                                {
                                                    crawlRequest.Data = File.ReadAllBytes(discoveryPath);
                                                }
                                                else
                                                {
                                                    try
                                                    {
                                                        throw new Exception("The 'LastModified' HttpResponse Header indicated that the Data was not stale, but the Data (Source) could not be found in the Files database table or at _applicationSettings.DownloadedFilesDirectory.  Therefore, the data was re-downloaded from the server.  The File file may have been deleted from disk or the 'Source' column in the 'Files' table may have been cleared or a previous crawl may have crawled with both _applicationSettings.InsertFileSource = false and _applicationSettings.SaveDiscoveredFilesToDisk = false.");
                                                    }
                                                    catch (Exception exception)
                                                    {
                                                        _arachnodeDAO.InsertException(crawlRequest.Parent.Uri.AbsoluteUri, crawlRequest.Discovery.Uri.AbsoluteUri, exception, false);
                                                    }

                                                    crawlRequest.ProcessData = true;
                                                }
                                            }
                                        }
                                    }
                                }
                                else
                                {
                                    crawlRequest.ProcessData = false;
                                }
                                break;

                            case DiscoveryType.Image:
                                if (ApplicationSettings.AssignFileAndImageDiscoveries)
                                {
                                    ArachnodeDataSet.ImagesRow imagesRow = _arachnodeDAO.GetImage(crawlRequest.Discovery.Uri.AbsoluteUri);

                                    if (imagesRow == null)
                                    {
                                        crawlRequest.ProcessData = true;
                                    }
                                    else
                                    {
                                        if (!imagesRow.IsResponseHeadersNull())
                                        {
                                            DateTime lastModified;

                                            SqlString lastModifiedValue = UserDefinedFunctions.ExtractResponseHeader(imagesRow.ResponseHeaders, "Last-Modified: ", false);

                                            if (!lastModifiedValue.IsNull && DateTime.TryParse(lastModifiedValue.Value, out lastModified))
                                            {
                                                //crawlRequest.WebClient.HttpWebResponse.LastModified will equal DateTime.Now (or close to it) if the 'Last-Modified' ResponseHeader is not present...
                                                if (crawlRequest.WebClient.HttpWebResponse.LastModified > lastModified)
                                                {
                                                    crawlRequest.ProcessData = true;
                                                }
                                                else
                                                {
                                                    crawlRequest.ProcessData = false;
                                                }
                                            }
                                            else
                                            {
                                                crawlRequest.ProcessData = false;
                                            }

                                            if (!crawlRequest.ProcessData)
                                            {
                                                if (imagesRow.Source.Length != 0)
                                                {
                                                    crawlRequest.Data = imagesRow.Source;
                                                }
                                                else
                                                {
                                                    string discoveryPath = _discoveryManager.GetDiscoveryPath(ApplicationSettings.DownloadedImagesDirectory, crawlRequest.Discovery.Uri.AbsoluteUri, crawlRequest.DataType.FullTextIndexType);

                                                    if (File.Exists(discoveryPath))
                                                    {
                                                        crawlRequest.Data = File.ReadAllBytes(discoveryPath);
                                                    }
                                                    else
                                                    {
                                                        try
                                                        {
                                                            throw new Exception("The 'LastModified' HttpResponse Header indicated that the Data was not stale, but the Data (Source) could not be found in the Images database table or at _applicationSettings.DownloadedImagesDirectory.  Therefore, the data was downloaded from the server.  The Image file may have been deleted from disk or the 'Source' column in the 'Images' table may have been cleared.  A previous crawl may have crawled with both _applicationSettings.InsertImageSource = false and _applicationSettings.SaveDiscoveredImagesToDisk = false.");
                                                        }
                                                        catch (Exception exception)
                                                        {
                                                            _arachnodeDAO.InsertException(crawlRequest.Parent.Uri.AbsoluteUri, crawlRequest.Discovery.Uri.AbsoluteUri, exception, false);
                                                        }

                                                        crawlRequest.ProcessData = true;
                                                    }
                                                }
                                            }
                                        }
                                        else
                                        {
                                            crawlRequest.ProcessData = true;
                                        }
                                    }
                                }
                                else
                                {
                                    crawlRequest.ProcessData = false;
                                }
                                break;

                            case DiscoveryType.WebPage:
                                ArachnodeDataSet.WebPagesRow webPagesRow = _arachnodeDAO.GetWebPage(crawlRequest.Discovery.Uri.AbsoluteUri);

                                if (webPagesRow == null)
                                {
                                    crawlRequest.ProcessData = true;
                                }
                                else
                                {
                                    if ((crawlRequest.WebClient.HttpWebResponse).LastModified > webPagesRow.LastDiscovered)
                                    {
                                        crawlRequest.ProcessData = true;
                                    }
                                    else
                                    {
                                        crawlRequest.ProcessData = false;
                                    }

                                    if (!crawlRequest.ProcessData)
                                    {
                                        if (webPagesRow.Source.Length != 0)
                                        {
                                            crawlRequest.Data = webPagesRow.Source;
                                        }
                                        else
                                        {
                                            string discoveryPath = _discoveryManager.GetDiscoveryPath(ApplicationSettings.DownloadedWebPagesDirectory, crawlRequest.Discovery.Uri.AbsoluteUri, crawlRequest.DataType.FullTextIndexType);

                                            if (File.Exists(discoveryPath))
                                            {
                                                crawlRequest.Data = File.ReadAllBytes(discoveryPath);
                                            }
                                            else
                                            {
                                                try
                                                {
                                                    throw new Exception("The 'LastModified' HttpResponse Header indicated that the Data was not stale, but the Data (Source) could not be found in the WebPages database table or at _applicationSettings.DownloadedWebPagesDirectory.  Therefore, the data was re-downloaded from the server.  The WebPage file may have been deleted from disk or the 'Source' column in the 'WebPages' table may have been cleared or a previous crawl may have crawled with both _applicationSettings.InsertWebPageSource = false and _applicationSettings.SaveDiscoveredWebPagesToDisk = false.");
                                                }
                                                catch (Exception exception)
                                                {
                                                    _arachnodeDAO.InsertException(crawlRequest.Parent.Uri.AbsoluteUri, crawlRequest.Discovery.Uri.AbsoluteUri, exception, false);
                                                }

                                                crawlRequest.ProcessData = true;
                                            }
                                        }
                                    }
                                }
                                break;

                            case DiscoveryType.None:
                                crawlRequest.ProcessData = true;
                                break;
                            }
                        }
                    }
                }
                catch (Exception exception)
                {
                    _arachnodeDAO.InsertException(crawlRequest.Parent.Uri.AbsoluteUri, crawlRequest.Discovery.Uri.AbsoluteUri, exception, false);
                }

                if (crawlRequest.ProcessData)
                {
                    if (crawlRequest.Data != null)
                    {
                    }

                    if (crawlRequest.RenderType == RenderType.None)
                    {
                        if (crawlRequest.Discovery.Uri.Scheme.ToLowerInvariant() != "ftp")
                        {
                            if (crawlRequest.WebClient.HttpWebResponse != null && crawlRequest.WebClient.HttpWebResponse.Method == "HEAD")
                            {
                                IssueWebRequest(crawlRequest, "GET");
                            }

                            if (crawlRequest.WebClient.HttpWebResponse != null)
                            {
                                crawlRequest.Data = crawlRequest.WebClient.DownloadHttpData(crawlRequest.Discovery.Uri.AbsoluteUri, crawlRequest.WebClient.HttpWebResponse.ContentEncoding.ToLowerInvariant() == "gzip", crawlRequest.WebClient.HttpWebResponse.ContentEncoding.ToLowerInvariant() == "deflate", crawlRequest.Crawl.Crawler.CookieContainer);
                            }
                        }
                        else
                        {
                            crawlRequest.Data = crawlRequest.WebClient.DownloadFtpData(crawlRequest.Discovery.Uri.AbsoluteUri);
                        }
                    }
                    else
                    {
                        RendererResponse rendererResponse = crawlRequest.Crawl.Crawler.Engine.Render(crawlRequest, RenderAction.Render, crawlRequest.RenderType);

                        if (rendererResponse != null)
                        {
                            if (rendererResponse.HTMLDocumentClass != null)
                            {
                                crawlRequest.Encoding = Encoding.GetEncoding(rendererResponse.HTMLDocumentClass.charset);

                                string outerHTML = rendererResponse.HTMLDocumentClass.documentElement.outerHTML;

                                crawlRequest.Data         = crawlRequest.Encoding.GetBytes(outerHTML);
                                crawlRequest.DecodedHtml  = HttpUtility.HtmlDecode(outerHTML);
                                crawlRequest.Html         = outerHTML;
                                crawlRequest.HtmlDocument = rendererResponse.HTMLDocumentClass;
                            }

                            crawlRequest.RendererMessage = rendererResponse.RendererMessage;
                        }
                    }
                }
            }
            else
            {
                if (crawlRequest.Data == null)
                {
                }
            }

            if (crawlRequest.Data == null)
            {
                crawlRequest.Data = new byte[0];
            }
        }