private void RaisePageCrawled(PageCrawlResult result) { var handler = PageCrawled; if (handler != null) { handler.Invoke(result); } }
public PageCrawlResult Crawl(Uri url) { var crawlResult = new PageCrawlResult(); var downloadResponse = pageDownloader.Download(url); crawlResult.StatusCode = downloadResponse.StatusCode; if (downloadResponse.IsSuccessful) { crawlResult.Contents = downloadResponse.Contents; crawlResult.Links = htmlParser.ExtractLinks(url, downloadResponse.Contents); } crawlResult.CrawlEndedAt = DateTimeOffset.Now; return crawlResult; }
public void Save(PageCrawlResult pageCrawlResult) { using (var session = documentStore.OpenSession()) { var existing = session.Advanced.LuceneQuery<Page, PagesToCrawlByUrl>().Where(string.Format("Hash:\"{0}\"", pageCrawlResult.CrawlUrl.Hash)).FirstOrDefault() ?? new Page(); existing.WebsiteUrl = pageCrawlResult.CrawlUrl.WebsiteDefinition.Website.RootUrl; existing.Url = pageCrawlResult.CrawlUrl.Url; existing.Hash = pageCrawlResult.CrawlUrl.Hash; existing.Contents = pageCrawlResult.Contents; existing.StatusCode = pageCrawlResult.StatusCode; existing.CrawledAt = pageCrawlResult.CrawlEndedAt; session.Store(existing); session.SaveChanges(); } }
public PageCrawlResult Crawl(Uri url) { var crawlResult = new PageCrawlResult(); var downloadResponse = pageDownloader.Download(url); crawlResult.StatusCode = downloadResponse.StatusCode; if (downloadResponse.IsSuccessful) { crawlResult.Contents = downloadResponse.Contents; crawlResult.Links = htmlParser.ExtractLinks(url, downloadResponse.Contents); } crawlResult.CrawlEndedAt = DateTimeOffset.Now; return(crawlResult); }
private void RaisePageCrawled(PageCrawlResult result) { var handler = PageCrawled; if (handler != null) handler.Invoke(result); }