public static ProcessedPage GetProcessedPage(string url) { var page = new ProcessedPage(); page.SessionId = 77; page.CrawlerId = 34; page.PageUrl = url; page.Title = "Blah 1"; page.Description = "Blah blah and blah"; page.KeyWords = "blue, red"; page.StatusCode = System.Net.HttpStatusCode.Ambiguous; return page; }
/// <summary> /// Processes a crawled page and returns a ProcessedPage object /// which can be stored. /// </summary> /// <param name="page">The results of a crawled url</param> /// <returns>ProcessedPage or null</returns> public ProcessedPage ProcessPage(Abot.Poco.CrawledPage page) { //TODO extract data var processed = new ProcessedPage(); processed.SessionId = page.PageBag.SessionId; processed.CrawlerId = page.PageBag.CrawlerId; processed.PageUrl = page.Uri.AbsoluteUri; processed.StatusCode = page.HttpWebResponse.StatusCode; //TODO store cookies var cookies = page.HttpWebResponse.Cookies; return processed; }
public void UpdateProcessedPage(ProcessedPage result) { using (var session = _sessionFactory.OpenSession()) { using (var transaction = session.BeginTransaction()) { session.Update(result); transaction.Commit(); } } }
public void UpdateProcessedPage(ProcessedPage result) { if (ProcessedPages.ContainsKey(result.Id)) ProcessedPages[result.Id] = result; }
public void AddProcessedPage(ProcessedPage result) { result.Id = NextId; ProcessedPages.Add(result.Id, result); }