private static void Crawler_PageCrawlCompletedAsync(object sender, Abot.Crawler.PageCrawlCompletedArgs e) { Abot.Poco.CrawledPage page = e.CrawledPage; if (page.WebException != null || page.HttpWebResponse.StatusCode != System.Net.HttpStatusCode.OK) { Console.WriteLine("Crawl of page failed {0}", page.Uri.AbsoluteUri); } else { Console.WriteLine("Crawl of page succeeded {0}", page.Uri.AbsoluteUri); } if (string.IsNullOrEmpty(page.Content.Text)) { Console.WriteLine("Page had no content {0}", page.Uri.AbsoluteUri); } else { var infomationVideo = GetInfomationVideo(page.Content.Text); lock (objlock) { if (!Videos.Any(a => a.Id.Equals(infomationVideo.Id))) { Videos.Add(infomationVideo); } } } }
/// <summary> /// Processes a crawled page and returns a ProcessedPage object /// which can be stored. /// </summary> /// <param name="page">The results of a crawled url</param> /// <returns>ProcessedPage or null</returns> public ProcessedPage ProcessPage(Abot.Poco.CrawledPage page) { //TODO extract data var processed = new ProcessedPage(); processed.SessionId = page.PageBag.SessionId; processed.CrawlerId = page.PageBag.CrawlerId; processed.PageUrl = page.Uri.AbsoluteUri; processed.StatusCode = page.HttpWebResponse.StatusCode; //TODO store cookies var cookies = page.HttpWebResponse.Cookies; return(processed); }
public void ProcessCrawledPage(Abot.Poco.CrawlContext crawlContext, Abot.Poco.CrawledPage crawledPage) { throw new System.NotImplementedException(); }