示例#1
0
        public override byte[] GetFileSource(string fileAbsoluteUriOrID, IArachnodeDAO arachnodeDAO)
        {
            if (ApplicationSettings.DownloadedFilesDirectory == null)
            {
                throw new Exception("_applicationSettings.DownloadedFilesDirectory is null.  This is usually the result of failing to initialize the Application configuration from the ArachnodeDAO.");
            }

            ArachnodeDataSet.FilesRow filesRow = arachnodeDAO.GetFile(fileAbsoluteUriOrID);

            if (filesRow != null)
            {
                if (filesRow.Source.Length != 0)
                {
                    return(filesRow.Source);
                }
                else
                {
                    string discoveryPath = GetDiscoveryPath(ApplicationSettings.DownloadedFilesDirectory, filesRow.AbsoluteUri, filesRow.FullTextIndexType);

                    if (!File.Exists(discoveryPath))
                    {
                        throw new Exception("Could not find the File Source in the database or on disk.");
                    }

                    return(File.ReadAllBytes(discoveryPath));
                }
            }

            return(null);
        }
示例#2
0
        private void FileUtilities_OnFileProcessed(ArachnodeDataSet.FilesRow filesRow, string message)
        {
            BeginInvoke(new MethodInvoker(delegate
            {
                rtbPostProcessingStatus.Text = message + Environment.NewLine + rtbPostProcessingStatus.Text;

                if (rtbPostProcessingStatus.Text.Length > 10000)
                {
                    rtbPostProcessingStatus.Text = rtbPostProcessingStatus.Text.Substring(0, 10000);
                }
            }));

            //Application.DoEvents();

            //Thread.Sleep(100);
        }
示例#3
0
        private void nudFileID_ValueChanged(object sender, EventArgs e)
        {
            _filesRow = _arachnodeDAO.GetFile(nudFileID.Value.ToString());

            if (_filesRow != null)
            {
                _fileDiscoveryPath = _discoveryManager.GetDiscoveryPath(_applicationSettings.DownloadedFilesDirectory, _filesRow.AbsoluteUri, _filesRow.FullTextIndexType);

                llFileDiscoveryPathDirectory.Visible = true;
                llFileDiscoveryPathDirectory.Text    = Path.GetDirectoryName(_fileDiscoveryPath);

                if (cbAutoView.Checked)
                {
                    btnViewFile_Click(sender, e);
                }
            }
            else
            {
                llFileDiscoveryPathDirectory.Visible = false;
                wbMain.DocumentText = "The File with the ID of " + nudFileID.Value + " does not exist.";
            }
        }
        /// <summary>
        ///     Process a range of FileID after crawling.  Useful if crawled Files were not processed at crawl time according to desired ApplicationSettings configuration.
        ///     Calling this method DOES change the 'LastDiscovered' fields where applicable.
        ///     This method is not when crawling, rather during post-processing.
        /// </summary>
        /// <param name = "fileIDLowerBound"></param>
        /// <param name = "fileIDUpperBound"></param>
        public static void ProcessFiles(Crawler <TArachnodeDAO> crawler, long fileIDLowerBound, long fileIDUpperBound)
        {
            //do not assign the application settings.  doing so will override the ApplicationSetting you set before calling this method...
            TArachnodeDAO arachnodeDAO = (TArachnodeDAO)Activator.CreateInstance(typeof(TArachnodeDAO), crawler.ApplicationSettings.ConnectionString, crawler.ApplicationSettings, crawler.WebSettings, false, false);

            ConsoleManager <TArachnodeDAO> consoleManager = new ConsoleManager <TArachnodeDAO>(crawler.ApplicationSettings, crawler.WebSettings);
            ActionManager <TArachnodeDAO>  actionManager  = new ActionManager <TArachnodeDAO>(crawler.ApplicationSettings, crawler.WebSettings, consoleManager);
            CookieManager cookieManager = new CookieManager();;
            MemoryManager <TArachnodeDAO>      memoryManager      = new MemoryManager <TArachnodeDAO>(crawler.ApplicationSettings, crawler.WebSettings);
            RuleManager <TArachnodeDAO>        ruleManager        = new RuleManager <TArachnodeDAO>(crawler.ApplicationSettings, crawler.WebSettings, consoleManager);
            CacheManager <TArachnodeDAO>       cacheManager       = new CacheManager <TArachnodeDAO>(crawler.ApplicationSettings, crawler.WebSettings);
            CrawlerPeerManager <TArachnodeDAO> crawlerPeerManager = new CrawlerPeerManager <TArachnodeDAO>(crawler.ApplicationSettings, crawler.WebSettings, null, arachnodeDAO);
            Cache <TArachnodeDAO> cache = new Cache <TArachnodeDAO>(crawler.ApplicationSettings, crawler.WebSettings, crawler, actionManager, cacheManager, crawlerPeerManager, memoryManager, ruleManager);

            DiscoveryManager <TArachnodeDAO> discoveryManager = new DiscoveryManager <TArachnodeDAO>(crawler.ApplicationSettings, crawler.WebSettings, cache, actionManager, cacheManager, memoryManager, ruleManager);

            //load the CrawlActions, CrawlRules and EngineActions...
            ruleManager.ProcessCrawlRules(crawler);
            actionManager.ProcessCrawlActions(crawler);
            actionManager.ProcessEngineActions(crawler);

            //these three methods are called in the Engine.
            UserDefinedFunctions.RefreshAllowedExtensions(true);
            UserDefinedFunctions.RefreshAllowedSchemes(true);
            UserDefinedFunctions.RefreshDisallowed();

            //instantiate a WebClient to access the ResponseHeaders...
            WebClient <TArachnodeDAO> webClient = new WebClient <TArachnodeDAO>(crawler.ApplicationSettings, crawler.WebSettings, consoleManager, cookieManager, new ProxyManager <TArachnodeDAO>(crawler.ApplicationSettings, crawler.WebSettings, consoleManager));

            webClient.GetHttpWebResponse("http://google.com", "GET", null, null, null, null);

            FileManager <TArachnodeDAO> fileManager = new FileManager <TArachnodeDAO>(crawler.ApplicationSettings, crawler.WebSettings, discoveryManager, arachnodeDAO);

            for (long i = fileIDLowerBound; i <= fileIDUpperBound; i++)
            {
                ArachnodeDataSet.FilesRow filesRow = null;

                try
                {
                    //get the File from the database.  we need the source data as we don't store this in the index.
                    //even though most of the fields are available in the Document, the File is the authoritative source, so we'll use that for all of the fields.
                    filesRow = arachnodeDAO.GetFile(i.ToString());

                    if (filesRow != null)
                    {
                        if (filesRow.Source == null || filesRow.Source.Length == 0)
                        {
                            if (File.Exists(discoveryManager.GetDiscoveryPath(crawler.ApplicationSettings.DownloadedFilesDirectory, filesRow.AbsoluteUri, filesRow.FullTextIndexType)))
                            {
                                filesRow.Source = File.ReadAllBytes(discoveryManager.GetDiscoveryPath(crawler.ApplicationSettings.DownloadedFilesDirectory, filesRow.AbsoluteUri, filesRow.FullTextIndexType));
                            }
                            else
                            {
                                Console.WriteLine("FileID: " + i + " was NOT processed successfully.");
                                if (OnFileProcessed != null)
                                {
                                    OnFileProcessed.BeginInvoke(filesRow, "FileID: " + i + " was NOT processed successfully.", null, null);
                                }
                            }
                        }

                        ProcessFile(crawler.ApplicationSettings, crawler.WebSettings, crawler, filesRow, webClient, cache, actionManager, consoleManager, crawlerPeerManager, discoveryManager, fileManager, memoryManager, ruleManager, arachnodeDAO);

                        Console.WriteLine("FileID: " + i + " was processed successfully.");
                        if (OnFileProcessed != null)
                        {
                            OnFileProcessed.BeginInvoke(filesRow, "FileID: " + i + " was processed successfully.", null, null);
                        }
                    }
                }
                catch (Exception exception)
                {
                    Console.WriteLine("FileID: " + i + " was NOT processed successfully.");
                    Console.WriteLine(exception.Message);

                    if (OnFileProcessed != null)
                    {
                        OnFileProcessed.BeginInvoke(filesRow, "FileID: " + i + " was NOT processed successfully.", null, null);
                        OnFileProcessed.BeginInvoke(filesRow, exception.Message, null, null);
                    }

                    arachnodeDAO.InsertException(null, null, exception, false);
                }
            }

            //stop the CrawlActions, CrawlRules and EngineActions...
            ruleManager.Stop();
            actionManager.Stop();
        }
        /// <summary>
        ///     Processes a FilesRow after crawling.
        /// </summary>
        /// <param name = "filesRow">The files row.</param>
        /// <param name="webClient"></param>
        /// <param name="actionManager"></param>
        /// <param name="consoleManager"></param>
        /// <param name="discoveryManager"></param>
        /// <param name = "fileManager">The file manager.</param>
        /// <param name = "fileManager">The file manager.</param>
        /// <param name="memoryManager"></param>
        /// <param name="ruleManager"></param>
        /// <param name = "arachnodeDAO">The arachnode DAO.</param>
        /// <param name = "imageManager">The image manager.</param>
        public static void ProcessFile(ApplicationSettings applicationSettings, WebSettings webSettings, Crawler <TArachnodeDAO> crawler, ArachnodeDataSet.FilesRow filesRow, WebClient <TArachnodeDAO> webClient, Cache <TArachnodeDAO> cache, ActionManager <TArachnodeDAO> actionManager, ConsoleManager <TArachnodeDAO> consoleManager, CrawlerPeerManager <TArachnodeDAO> crawlerPeerManager, DiscoveryManager <TArachnodeDAO> discoveryManager, FileManager <TArachnodeDAO> fileManager, MemoryManager <TArachnodeDAO> memoryManager, RuleManager <TArachnodeDAO> ruleManager, IArachnodeDAO arachnodeDAO)
        {
            CacheManager <TArachnodeDAO> cacheManager = new CacheManager <TArachnodeDAO>(applicationSettings, webSettings);
            CookieManager cookieManager = new CookieManager();;
            CrawlRequestManager <TArachnodeDAO> crawlRequestManager = new CrawlRequestManager <TArachnodeDAO>(applicationSettings, webSettings, cache, consoleManager, discoveryManager);
            DataTypeManager <TArachnodeDAO>     dataTypeManager     = new DataTypeManager <TArachnodeDAO>(applicationSettings, webSettings);
            EncodingManager <TArachnodeDAO>     encodingManager     = new EncodingManager <TArachnodeDAO>(applicationSettings, webSettings);
            PolitenessManager <TArachnodeDAO>   politenessManager   = new PolitenessManager <TArachnodeDAO>(applicationSettings, webSettings, cache);
            ProxyManager <TArachnodeDAO>        proxyManager        = new ProxyManager <TArachnodeDAO>(applicationSettings, webSettings, consoleManager);
            HtmlManager <TArachnodeDAO>         htmlManager         = new HtmlManager <TArachnodeDAO>(applicationSettings, webSettings, discoveryManager);
            Crawl <TArachnodeDAO> crawl = new Crawl <TArachnodeDAO>(applicationSettings, webSettings, crawler, actionManager, consoleManager, cookieManager, crawlRequestManager, dataTypeManager, discoveryManager, encodingManager, htmlManager, politenessManager, proxyManager, ruleManager, true);

            //create a CrawlRequest as this is what the internals of SiteCrawler.dll expect to operate on...
            CrawlRequest <TArachnodeDAO> crawlRequest = new CrawlRequest <TArachnodeDAO>(new Discovery <TArachnodeDAO>(filesRow.AbsoluteUri), 1, UriClassificationType.Host, UriClassificationType.Host, 0, RenderType.None, RenderType.None);

            crawlRequest.Crawl = crawl;
            crawlRequest.Discovery.DiscoveryType = DiscoveryType.File;
            crawlRequest.Discovery.ID            = filesRow.ID;
            crawlRequest.Data        = filesRow.Source;
            crawlRequest.ProcessData = true;
            crawlRequest.WebClient   = webClient;

            crawlRequest.WebClient.HttpWebResponse.Headers.Clear();

            //parse the ResponseHeaders from the FilesRow.ResponseHeaders string...
            foreach (string responseHeader in filesRow.ResponseHeaders.Split("\r\n".ToCharArray(), StringSplitOptions.RemoveEmptyEntries))
            {
                string[] responseHeaderSplit = responseHeader.Split(":".ToCharArray());

                string name  = responseHeaderSplit[0];
                string value = UserDefinedFunctions.ExtractResponseHeader(filesRow.ResponseHeaders, name, true).Value;

                crawlRequest.WebClient.HttpWebResponse.Headers.Add(name, value);
            }

            //refresh the DataTypes in the DataTypeManager... (if necessary)...
            if (dataTypeManager.AllowedDataTypes.Count == 0)
            {
                dataTypeManager.RefreshDataTypes();
            }

            crawlRequest.DataType = dataTypeManager.DetermineDataType(crawlRequest);

            if (applicationSettings.InsertFiles)
            {
                crawlRequest.Discovery.ID = arachnodeDAO.InsertFile(crawlRequest.Parent.Uri.AbsoluteUri, crawlRequest.Discovery.Uri.AbsoluteUri, crawlRequest.WebClient.HttpWebResponse.Headers.ToString(), applicationSettings.InsertFileSource ? crawlRequest.Data : new byte[] { }, crawlRequest.DataType.FullTextIndexType, applicationSettings.ClassifyAbsoluteUris);
            }

            crawlRequest.ManagedDiscovery = fileManager.ManageFile(crawlRequest, crawlRequest.Discovery.ID.Value, crawlRequest.Discovery.Uri.AbsoluteUri, crawlRequest.Data, crawlRequest.DataType.FullTextIndexType, applicationSettings.ExtractFileMetaData, applicationSettings.InsertFileMetaData, applicationSettings.SaveDiscoveredFilesToDisk);

            actionManager.PerformCrawlActions(crawlRequest, CrawlActionType.PostRequest, arachnodeDAO);

            discoveryManager.CloseAndDisposeManagedDiscovery(crawlRequest, arachnodeDAO);
        }
示例#6
0
        public override void ProcessCrawlRequest(CrawlRequest <TArachnodeDAO> crawlRequest, bool obeyCrawlRules, bool executeCrawlActions)
        {
            IssueWebRequest(crawlRequest, "GET");

            crawlRequest.DataType = _dataTypeManager.DetermineDataType(crawlRequest);

            if (obeyCrawlRules)
            {
                _ruleManager.IsDisallowed(crawlRequest, CrawlRuleType.PreGet, _arachnodeDAO);
            }

            if (executeCrawlActions)
            {
                _actionManager.PerformCrawlActions(crawlRequest, CrawlActionType.PreGet, _arachnodeDAO);
            }

            if (!crawlRequest.IsDisallowed)
            {
                try
                {
                    if (crawlRequest.WebClient.HttpWebResponse != null)
                    {
                        crawlRequest.ProcessData = true;

                        bool isLastModifiedOutdated = true;

                        try
                        {
                            isLastModifiedOutdated = crawlRequest.WebClient.HttpWebResponse.LastModified != DateTime.Now;
                        }
                        catch (Exception exception)
                        {
                            _arachnodeDAO.InsertException(crawlRequest.Parent.Uri.AbsoluteUri, crawlRequest.Discovery.Uri.AbsoluteUri, exception, false);
                        }

                        if (isLastModifiedOutdated)
                        {
                            switch (crawlRequest.DataType.DiscoveryType)
                            {
                            case DiscoveryType.File:
                                if (ApplicationSettings.AssignFileAndImageDiscoveries)     //ANODET: robots.txt
                                {
                                    ArachnodeDataSet.FilesRow filesRow = _arachnodeDAO.GetFile(crawlRequest.Discovery.Uri.AbsoluteUri);

                                    if (filesRow == null)
                                    {
                                        crawlRequest.ProcessData = true;
                                    }
                                    else
                                    {
                                        if (!filesRow.IsResponseHeadersNull())
                                        {
                                            DateTime lastModified;

                                            SqlString lastModifiedValue = UserDefinedFunctions.ExtractResponseHeader(filesRow.ResponseHeaders, "Last-Modified: ", false);

                                            if (!lastModifiedValue.IsNull && DateTime.TryParse(lastModifiedValue.Value, out lastModified))
                                            {
                                                //crawlRequest.WebClient.HttpWebResponse.LastModified will equal DateTime.Now (or close to it) if the 'Last-Modified' ResponseHeader is not present...
                                                if ((crawlRequest.WebClient.HttpWebResponse).LastModified > lastModified)
                                                {
                                                    crawlRequest.ProcessData = true;
                                                }
                                                else
                                                {
                                                    crawlRequest.ProcessData = false;
                                                }
                                            }
                                            else
                                            {
                                                crawlRequest.ProcessData = false;
                                            }
                                        }
                                        else
                                        {
                                            crawlRequest.ProcessData = true;
                                        }

                                        if (!crawlRequest.ProcessData)
                                        {
                                            if (filesRow.Source.Length != 0)
                                            {
                                                crawlRequest.Data = filesRow.Source;
                                            }
                                            else
                                            {
                                                string discoveryPath = _discoveryManager.GetDiscoveryPath(ApplicationSettings.DownloadedFilesDirectory, crawlRequest.Discovery.Uri.AbsoluteUri, crawlRequest.DataType.FullTextIndexType);

                                                if (File.Exists(discoveryPath))
                                                {
                                                    crawlRequest.Data = File.ReadAllBytes(discoveryPath);
                                                }
                                                else
                                                {
                                                    try
                                                    {
                                                        throw new Exception("The 'LastModified' HttpResponse Header indicated that the Data was not stale, but the Data (Source) could not be found in the Files database table or at _applicationSettings.DownloadedFilesDirectory.  Therefore, the data was re-downloaded from the server.  The File file may have been deleted from disk or the 'Source' column in the 'Files' table may have been cleared or a previous crawl may have crawled with both _applicationSettings.InsertFileSource = false and _applicationSettings.SaveDiscoveredFilesToDisk = false.");
                                                    }
                                                    catch (Exception exception)
                                                    {
                                                        _arachnodeDAO.InsertException(crawlRequest.Parent.Uri.AbsoluteUri, crawlRequest.Discovery.Uri.AbsoluteUri, exception, false);
                                                    }

                                                    crawlRequest.ProcessData = true;
                                                }
                                            }
                                        }
                                    }
                                }
                                else
                                {
                                    crawlRequest.ProcessData = false;
                                }
                                break;

                            case DiscoveryType.Image:
                                if (ApplicationSettings.AssignFileAndImageDiscoveries)
                                {
                                    ArachnodeDataSet.ImagesRow imagesRow = _arachnodeDAO.GetImage(crawlRequest.Discovery.Uri.AbsoluteUri);

                                    if (imagesRow == null)
                                    {
                                        crawlRequest.ProcessData = true;
                                    }
                                    else
                                    {
                                        if (!imagesRow.IsResponseHeadersNull())
                                        {
                                            DateTime lastModified;

                                            SqlString lastModifiedValue = UserDefinedFunctions.ExtractResponseHeader(imagesRow.ResponseHeaders, "Last-Modified: ", false);

                                            if (!lastModifiedValue.IsNull && DateTime.TryParse(lastModifiedValue.Value, out lastModified))
                                            {
                                                //crawlRequest.WebClient.HttpWebResponse.LastModified will equal DateTime.Now (or close to it) if the 'Last-Modified' ResponseHeader is not present...
                                                if (crawlRequest.WebClient.HttpWebResponse.LastModified > lastModified)
                                                {
                                                    crawlRequest.ProcessData = true;
                                                }
                                                else
                                                {
                                                    crawlRequest.ProcessData = false;
                                                }
                                            }
                                            else
                                            {
                                                crawlRequest.ProcessData = false;
                                            }

                                            if (!crawlRequest.ProcessData)
                                            {
                                                if (imagesRow.Source.Length != 0)
                                                {
                                                    crawlRequest.Data = imagesRow.Source;
                                                }
                                                else
                                                {
                                                    string discoveryPath = _discoveryManager.GetDiscoveryPath(ApplicationSettings.DownloadedImagesDirectory, crawlRequest.Discovery.Uri.AbsoluteUri, crawlRequest.DataType.FullTextIndexType);

                                                    if (File.Exists(discoveryPath))
                                                    {
                                                        crawlRequest.Data = File.ReadAllBytes(discoveryPath);
                                                    }
                                                    else
                                                    {
                                                        try
                                                        {
                                                            throw new Exception("The 'LastModified' HttpResponse Header indicated that the Data was not stale, but the Data (Source) could not be found in the Images database table or at _applicationSettings.DownloadedImagesDirectory.  Therefore, the data was downloaded from the server.  The Image file may have been deleted from disk or the 'Source' column in the 'Images' table may have been cleared.  A previous crawl may have crawled with both _applicationSettings.InsertImageSource = false and _applicationSettings.SaveDiscoveredImagesToDisk = false.");
                                                        }
                                                        catch (Exception exception)
                                                        {
                                                            _arachnodeDAO.InsertException(crawlRequest.Parent.Uri.AbsoluteUri, crawlRequest.Discovery.Uri.AbsoluteUri, exception, false);
                                                        }

                                                        crawlRequest.ProcessData = true;
                                                    }
                                                }
                                            }
                                        }
                                        else
                                        {
                                            crawlRequest.ProcessData = true;
                                        }
                                    }
                                }
                                else
                                {
                                    crawlRequest.ProcessData = false;
                                }
                                break;

                            case DiscoveryType.WebPage:
                                ArachnodeDataSet.WebPagesRow webPagesRow = _arachnodeDAO.GetWebPage(crawlRequest.Discovery.Uri.AbsoluteUri);

                                if (webPagesRow == null)
                                {
                                    crawlRequest.ProcessData = true;
                                }
                                else
                                {
                                    if ((crawlRequest.WebClient.HttpWebResponse).LastModified > webPagesRow.LastDiscovered)
                                    {
                                        crawlRequest.ProcessData = true;
                                    }
                                    else
                                    {
                                        crawlRequest.ProcessData = false;
                                    }

                                    if (!crawlRequest.ProcessData)
                                    {
                                        if (webPagesRow.Source.Length != 0)
                                        {
                                            crawlRequest.Data = webPagesRow.Source;
                                        }
                                        else
                                        {
                                            string discoveryPath = _discoveryManager.GetDiscoveryPath(ApplicationSettings.DownloadedWebPagesDirectory, crawlRequest.Discovery.Uri.AbsoluteUri, crawlRequest.DataType.FullTextIndexType);

                                            if (File.Exists(discoveryPath))
                                            {
                                                crawlRequest.Data = File.ReadAllBytes(discoveryPath);
                                            }
                                            else
                                            {
                                                try
                                                {
                                                    throw new Exception("The 'LastModified' HttpResponse Header indicated that the Data was not stale, but the Data (Source) could not be found in the WebPages database table or at _applicationSettings.DownloadedWebPagesDirectory.  Therefore, the data was re-downloaded from the server.  The WebPage file may have been deleted from disk or the 'Source' column in the 'WebPages' table may have been cleared or a previous crawl may have crawled with both _applicationSettings.InsertWebPageSource = false and _applicationSettings.SaveDiscoveredWebPagesToDisk = false.");
                                                }
                                                catch (Exception exception)
                                                {
                                                    _arachnodeDAO.InsertException(crawlRequest.Parent.Uri.AbsoluteUri, crawlRequest.Discovery.Uri.AbsoluteUri, exception, false);
                                                }

                                                crawlRequest.ProcessData = true;
                                            }
                                        }
                                    }
                                }
                                break;

                            case DiscoveryType.None:
                                crawlRequest.ProcessData = true;
                                break;
                            }
                        }
                    }
                }
                catch (Exception exception)
                {
                    _arachnodeDAO.InsertException(crawlRequest.Parent.Uri.AbsoluteUri, crawlRequest.Discovery.Uri.AbsoluteUri, exception, false);
                }

                if (crawlRequest.ProcessData)
                {
                    if (crawlRequest.Data != null)
                    {
                    }

                    if (crawlRequest.RenderType == RenderType.None)
                    {
                        if (crawlRequest.Discovery.Uri.Scheme.ToLowerInvariant() != "ftp")
                        {
                            if (crawlRequest.WebClient.HttpWebResponse != null && crawlRequest.WebClient.HttpWebResponse.Method == "HEAD")
                            {
                                IssueWebRequest(crawlRequest, "GET");
                            }

                            if (crawlRequest.WebClient.HttpWebResponse != null)
                            {
                                crawlRequest.Data = crawlRequest.WebClient.DownloadHttpData(crawlRequest.Discovery.Uri.AbsoluteUri, crawlRequest.WebClient.HttpWebResponse.ContentEncoding.ToLowerInvariant() == "gzip", crawlRequest.WebClient.HttpWebResponse.ContentEncoding.ToLowerInvariant() == "deflate", crawlRequest.Crawl.Crawler.CookieContainer);
                            }
                        }
                        else
                        {
                            crawlRequest.Data = crawlRequest.WebClient.DownloadFtpData(crawlRequest.Discovery.Uri.AbsoluteUri);
                        }
                    }
                    else
                    {
                        RendererResponse rendererResponse = crawlRequest.Crawl.Crawler.Engine.Render(crawlRequest, RenderAction.Render, crawlRequest.RenderType);

                        if (rendererResponse != null)
                        {
                            if (rendererResponse.HTMLDocumentClass != null)
                            {
                                crawlRequest.Encoding = Encoding.GetEncoding(rendererResponse.HTMLDocumentClass.charset);

                                string outerHTML = rendererResponse.HTMLDocumentClass.documentElement.outerHTML;

                                crawlRequest.Data         = crawlRequest.Encoding.GetBytes(outerHTML);
                                crawlRequest.DecodedHtml  = HttpUtility.HtmlDecode(outerHTML);
                                crawlRequest.Html         = outerHTML;
                                crawlRequest.HtmlDocument = rendererResponse.HTMLDocumentClass;
                            }

                            crawlRequest.RendererMessage = rendererResponse.RendererMessage;
                        }
                    }
                }
            }
            else
            {
                if (crawlRequest.Data == null)
                {
                }
            }

            if (crawlRequest.Data == null)
            {
                crawlRequest.Data = new byte[0];
            }
        }