Esempio n. 1
0
        private void crawler_ProcessPageCrawlCompleted(object sender, PageCrawlCompletedArgs e)
        {
            CrawledPage crawledPage = e.CrawledPage;

            if (crawledPage.WebException != null || crawledPage.HttpWebResponse.StatusCode != HttpStatusCode.OK)
                _logger.Error(String.Format("Crawl of page failed {0} StatusCode: [{1}]", crawledPage.Uri.AbsoluteUri, crawledPage.StatusCode));
            else
            {
                IDatabase database = _databaseConnectionPool.GetObject();
                try
                {
                    string command = "INSERT INTO [CrawledPage] ([PageGUID], [ProjectGUID], [URL], [StatusCode], [PageSize], [HttpResponse], [CrawlDepth], [ParentURI]) VALUES (@PageGUID, @ProjectGUID, @URL, @StatusCode, @PageSize, @HttpResponse, @CrawlDepth, @ParentURI);";

                    Dictionary<string, object> parameters = new Dictionary<string, object>();
                    parameters.Add("@PageGUID", crawledPage.PageGUID);
                    parameters.Add("@ProjectGUID", _projectGUID);
                    parameters.Add("@URL", crawledPage.Uri.AbsoluteUri);
                    parameters.Add("@StatusCode", crawledPage.StatusCode);
                    parameters.Add("@PageSize", crawledPage.PageSizeInBytes);
                    parameters.Add("@HttpResponse", crawledPage.HttpWebResponse.Headers.ToString());
                    parameters.Add("@CrawlDepth", crawledPage.CrawlDepth);
                    parameters.Add("@ParentURI", crawledPage.ParentUri.AbsoluteUri);

                    database.ExecuteNonQueryWithParameters(command, parameters);

                    if (string.IsNullOrEmpty(crawledPage.RawContent) == false)
                    {
                        string contentCommand = "INSERT INTO [PageContent] ([PageGUID], [ProjectGUID], [URL], [RawContent]) VALUES (@pageGUID, @projectGUID, @url, @rawContent);";
                        Dictionary<string, object> contentParams = new Dictionary<string, object>();
                        contentParams.Add("@pageGUID", crawledPage.PageGUID);
                        contentParams.Add("@projectGUID", _projectGUID);
                        contentParams.Add("@url", crawledPage.Uri.AbsoluteUri);
                        contentParams.Add("@rawContent", crawledPage.RawContent);

                        database.ExecuteNonQueryWithParameters(contentCommand, contentParams);
                    }
                }
                catch (Exception exception)
                {
                    Console.WriteLine(exception.ToString());
                    _logger.Error("An unhandled exception was thrown inserting crawled page details/content to DB");
                    _logger.Error(exception);
                }
                finally
                {
                    _databaseConnectionPool.PutObject(database);
                }
            }
            Console.WriteLine("Crawl of page succeeded {0} StatusCode: [{1}]", crawledPage.Uri.AbsoluteUri, crawledPage.StatusCode);
        }
Esempio n. 2
0
        private void crawler_ProcessPageCrawlCompleted(object sender, PageCrawlCompletedArgs e)
        {
            CrawledPage crawledPage = e.CrawledPage;

            if (crawledPage.WebException != null || crawledPage.HttpWebResponse.StatusCode != HttpStatusCode.OK)
                Console.WriteLine("Crawl of page failed {0}", crawledPage.Uri.AbsoluteUri);
            else
            {
                Console.WriteLine("Crawl of page succeeded {0}", crawledPage.Uri.AbsoluteUri);
                IDatabase database = _databaseConnectionPool.GetObject();

                _databaseConnectionPool.PutObject(database);
            }
            if (string.IsNullOrEmpty(crawledPage.RawContent))
                Console.WriteLine("Page had no content {0}", crawledPage.Uri.AbsoluteUri);
        }