public void Constructor_ValidReason_SetsPublicProperty() { string reason = "aaa"; PageLinksCrawlDisallowedArgs args = new PageLinksCrawlDisallowedArgs(_context, _page, reason); Assert.AreSame(reason, args.DisallowedReason); }
private void crawler_PageLinksCrawlDisallowed(object sender, PageLinksCrawlDisallowedArgs e) { var crawledPage = e.CrawledPage; Console.WriteLine("Did not crawl the links on page {0} due to {1}", crawledPage.Uri.AbsoluteUri, e.DisallowedReason); }
private void crawler_PageLinksCrawlDisallowed(object sender, PageLinksCrawlDisallowedArgs e) { CrawledPage crawledPage = e.CrawledPage; var mssg = string.Format("Did not crawl the links on page {0} due to {1}", crawledPage.Uri.AbsoluteUri, e.DisallowedReason); _logger.Debug(mssg); }
private void crawler_PageLinksCrawlDisallowed(object sender, PageLinksCrawlDisallowedArgs e) { CrawledPage crawledPage = e.CrawledPage; var result = string.Format("Did not crawl the links on page {0} due to {1}", crawledPage.Uri.AbsoluteUri, e.DisallowedReason); log.Info(result); }
//private CrawlDecision ShouldCrawlPage(PageToCrawl page, CrawlContext context) //{ // if (site.Pages.FirstOrDefault(item => item.Address == page.Uri.ToString()) != null) // return new CrawlDecision { Allow = false, Reason = "Already Crawled" }; // return new CrawlDecision { Allow = true }; //} private void Agent_PageLinksCrawlDisallowed(object sender, PageLinksCrawlDisallowedArgs e) { log.Warn($"Page {e.CrawledPage.Uri} is Disallowed cause {e.DisallowedReason}"); using (var dbContext = new ApplicationDbContext()) { var sendNotification = false; var site = dbContext.Sites.FirstOrDefault(m => m.Id == siteId); var page = site.Pages.FirstOrDefault(item => item.Address == e.CrawledPage.Uri.ToString()); if (e.DisallowedReason == "Already Crawled") { sendNotification = true; } if (page == null) { log.Debug("Page not found in DB. Creating new page"); site.Pages.Add(new Page { Address = e.CrawledPage.Uri.ToString(), IsSuccess = false, SeeTime = DateTime.Now, Text = "" }); } else { page.IsSuccess = false; page.SeeTime = DateTime.Now; dbContext.Entry(page).State = EntityState.Modified; } dbContext.SaveChanges(); if (sendNotification) { CallPageCrawledEvent(site.Id, page.Id, e.CrawledPage); } } }
public static void crawler_PageLinksCrawlDisallowed(object sender, PageLinksCrawlDisallowedArgs e) { try { CrawledPage crawledPage = e.CrawledPage; Console.WriteLine("Did not crawl the links on page {0} due to {1}", crawledPage.Uri.AbsoluteUri, e.DisallowedReason); }catch (Exception) { } }
void crawler_PageLinksCrawlDisallowedAsync(object sender, PageLinksCrawlDisallowedArgs e) { CrawledPage crawledPage = e.CrawledPage; string msg; m_linksSkipped++; updateCrawlingProgress(-1, -1, m_linksSkipped); msg = "Did not crawl the links on page " + crawledPage.Uri.AbsoluteUri.ToString() + " due to " + e.DisallowedReason.ToString(); log.Warn(msg); }
void crawler_PageLinksCrawlDisallowed(object sender, PageLinksCrawlDisallowedArgs e) { CrawledPage crawledPage = e.CrawledPage; Console.WriteLine("Did not crawl the links on page {0} due to {1}", crawledPage.Uri.AbsoluteUri, e.DisallowedReason); FileStream FailLog = new FileStream("FailLog.txt", FileMode.Append, FileAccess.Write, FileShare.ReadWrite); StreamWriter sw = new StreamWriter(FailLog); sw.WriteLine(crawledPage.Uri.AbsoluteUri + e.DisallowedReason); sw.Close(); FailLog.Close(); return; }
void SaveWebsiteUriRequestResult(object o, CrawlArgs e) { PageCrawlDisallowedArgs disallowedArgs; double requestTime; string absoluteUri; if ((disallowedArgs = e as PageCrawlDisallowedArgs) != null) { requestTime = 0; absoluteUri = disallowedArgs.PageToCrawl.Uri.AbsoluteUri; } else { PageCrawlCompletedArgs crawlCompletedArgs; PageLinksCrawlDisallowedArgs linksDisallowedArgs = e as PageLinksCrawlDisallowedArgs; if (linksDisallowedArgs != null) { absoluteUri = linksDisallowedArgs.CrawledPage.Uri.AbsoluteUri; requestTime = linksDisallowedArgs.CrawledPage.Elapsed; } else { crawlCompletedArgs = e as PageCrawlCompletedArgs; requestTime = crawlCompletedArgs.CrawledPage.Elapsed; absoluteUri = crawlCompletedArgs.CrawledPage.Uri.AbsoluteUri; } } _sitePages.Add(new WebPage { WebsiteId = _website.Id, RequestUri = absoluteUri, RequestTime = requestTime, Timestamp = DateTime.Now } ); }
private static void sub_crawler_PageLinksCrawlDisallowed(object sender, PageLinksCrawlDisallowedArgs e) { }
private void _crawler_PageLinksCrawlDisallowed(object sender, PageLinksCrawlDisallowedArgs e) { }
void crawler_PageLinksCrawlDisallowed(object sender, PageLinksCrawlDisallowedArgs e) { CrawledPage crawledPage = e.CrawledPage; OnMessageReceived($"Did not crawl the links on page {crawledPage.Uri.AbsoluteUri} due to {e.DisallowedReason}"); }
private void PageLinksCrawlDisallowedEvent(object sender, PageLinksCrawlDisallowedArgs e) { }
static void crawler_PageLinksCrawlDissallowed(object sender, PageLinksCrawlDisallowedArgs e) { }
private void crawler_CrawlerLinkDisalowed(object sender, PageLinksCrawlDisallowedArgs e) { PageToCrawl page = e.CrawledPage; Console.WriteLine("Disallowed: {0}", page.Uri.ToString()); }
/// <summary> /// 不爬取这个链接的原因 /// </summary> /// <param name="sender"></param> /// <param name="e"></param> void crawler_PageLinksCrawlDisallowed(object sender, PageLinksCrawlDisallowedArgs e) { CrawledPage crawledPage = e.CrawledPage; log.Info("不爬取此链接 " + crawledPage.Uri.AbsoluteUri + " 其原因为 " + e.DisallowedReason); }
//抓取连接失败 public static void Disallowed(object sender, PageLinksCrawlDisallowedArgs e) { CrawledPage crawledPage = e.CrawledPage; Outputer.Output(string.Format("没有抓取 {0} 页上的链接 {1} ", crawledPage.Uri.AbsoluteUri, e.DisallowedReason)); }
private static void WebCrawler_PageLinksCrawlDisallowedAsync(object sender, PageLinksCrawlDisallowedArgs e) { CrawledPage crawledPage = e.CrawledPage; Console.WriteLine("Abot-Did not crawl the links on page {0} due to {1}", crawledPage.Uri.AbsoluteUri, e.DisallowedReason); }
private void Crawler_PageLinksCrawlDisallowed(object sender, PageLinksCrawlDisallowedArgs e) { Console.WriteLine($"Page Crawl Disallowed, Page: {e.CrawledPage.Uri}, Reason: {e.DisallowedReason}"); }
static void crawler_PageLinksCrawlDisallowed(object sender, PageLinksCrawlDisallowedArgs e) { //Process data }
void PageLinksCrawlDisallowed(object sender, PageLinksCrawlDisallowedArgs e) { CrawledPage crawledPage = e.CrawledPage; Log.Logger.Error($"Did not crawl the links on page {crawledPage.Uri.AbsoluteUri} due to {e.DisallowedReason}"); }
static void Crawler_PageLinksCrawlDisallowed(object sender, PageLinksCrawlDisallowedArgs e) { CrawledPage crawledPage = e.CrawledPage; _log.Info($"Did not crawl the links on page {crawledPage.Uri.AbsoluteUri} due to {e.DisallowedReason}"); }
static void crawler_PageLinksCrawlDisallowed(object sender, PageLinksCrawlDisallowedArgs e) { Log("crawler_PageLinksCrawlDisallowed", e.CrawledPage.Uri.AbsoluteUri); var e1 = e; //Process data }
void crawler_PageLinksCrawlDisallowed(object sender, PageLinksCrawlDisallowedArgs e) { Log($"Did not crawl the links on page {e.CrawledPage.Uri.AbsoluteUri} due to {e.DisallowedReason}"); }