public void Constructor_ValidReason_SetsPublicProperty()
        {
            string reason = "aaa";
            PageLinksCrawlDisallowedArgs args = new PageLinksCrawlDisallowedArgs(_context, _page, reason);

            Assert.AreSame(reason, args.DisallowedReason);
        }
示例#2
0
        private void crawler_PageLinksCrawlDisallowed(object sender, PageLinksCrawlDisallowedArgs e)
        {
            var crawledPage = e.CrawledPage;

            Console.WriteLine("Did not crawl the links on page {0} due to {1}", crawledPage.Uri.AbsoluteUri,
                              e.DisallowedReason);
        }
示例#3
0
        private void crawler_PageLinksCrawlDisallowed(object sender, PageLinksCrawlDisallowedArgs e)
        {
            CrawledPage crawledPage = e.CrawledPage;
            var         mssg        = string.Format("Did not crawl the links on page {0} due to {1}", crawledPage.Uri.AbsoluteUri, e.DisallowedReason);

            _logger.Debug(mssg);
        }
示例#4
0
        private void crawler_PageLinksCrawlDisallowed(object sender, PageLinksCrawlDisallowedArgs e)
        {
            CrawledPage crawledPage = e.CrawledPage;
            var         result      = string.Format("Did not crawl the links on page {0} due to {1}", crawledPage.Uri.AbsoluteUri, e.DisallowedReason);

            log.Info(result);
        }
        //private CrawlDecision ShouldCrawlPage(PageToCrawl page, CrawlContext context)
        //{
        //    if (site.Pages.FirstOrDefault(item => item.Address == page.Uri.ToString()) != null)
        //        return new CrawlDecision { Allow = false, Reason = "Already Crawled" };
        //    return new CrawlDecision { Allow = true };
        //}
        private void Agent_PageLinksCrawlDisallowed(object sender, PageLinksCrawlDisallowedArgs e)
        {
            log.Warn($"Page {e.CrawledPage.Uri} is Disallowed cause {e.DisallowedReason}");
            using (var dbContext = new ApplicationDbContext())
            {
                var sendNotification = false;
                var site             = dbContext.Sites.FirstOrDefault(m => m.Id == siteId);
                var page             = site.Pages.FirstOrDefault(item => item.Address == e.CrawledPage.Uri.ToString());

                if (e.DisallowedReason == "Already Crawled")
                {
                    sendNotification = true;
                }
                if (page == null)
                {
                    log.Debug("Page not found in DB. Creating new page");
                    site.Pages.Add(new Page {
                        Address = e.CrawledPage.Uri.ToString(), IsSuccess = false, SeeTime = DateTime.Now, Text = ""
                    });
                }
                else
                {
                    page.IsSuccess = false;
                    page.SeeTime   = DateTime.Now;
                    dbContext.Entry(page).State = EntityState.Modified;
                }
                dbContext.SaveChanges();
                if (sendNotification)
                {
                    CallPageCrawledEvent(site.Id, page.Id, e.CrawledPage);
                }
            }
        }
示例#6
0
 public static void crawler_PageLinksCrawlDisallowed(object sender, PageLinksCrawlDisallowedArgs e)
 {
     try
     {
         CrawledPage crawledPage = e.CrawledPage;
         Console.WriteLine("Did not crawl the links on page {0} due to {1}", crawledPage.Uri.AbsoluteUri, e.DisallowedReason);
     }catch (Exception)
     {
     }
 }
        void crawler_PageLinksCrawlDisallowedAsync(object sender, PageLinksCrawlDisallowedArgs e)
        {
            CrawledPage crawledPage = e.CrawledPage;
            string      msg;

            m_linksSkipped++;
            updateCrawlingProgress(-1, -1, m_linksSkipped);

            msg = "Did not crawl the links on page " + crawledPage.Uri.AbsoluteUri.ToString() + " due to " + e.DisallowedReason.ToString();
            log.Warn(msg);
        }
示例#8
0
        void crawler_PageLinksCrawlDisallowed(object sender, PageLinksCrawlDisallowedArgs e)
        {
            CrawledPage crawledPage = e.CrawledPage;

            Console.WriteLine("Did not crawl the links on page {0} due to {1}", crawledPage.Uri.AbsoluteUri, e.DisallowedReason);
            FileStream   FailLog = new FileStream("FailLog.txt", FileMode.Append, FileAccess.Write, FileShare.ReadWrite);
            StreamWriter sw      = new StreamWriter(FailLog);

            sw.WriteLine(crawledPage.Uri.AbsoluteUri + e.DisallowedReason);
            sw.Close();
            FailLog.Close();
            return;
        }
        void SaveWebsiteUriRequestResult(object o, CrawlArgs e)
        {
            PageCrawlDisallowedArgs disallowedArgs;
            double requestTime;
            string absoluteUri;

            if ((disallowedArgs = e as PageCrawlDisallowedArgs) != null)
            {
                requestTime = 0;
                absoluteUri = disallowedArgs.PageToCrawl.Uri.AbsoluteUri;
            }
            else
            {
                PageCrawlCompletedArgs       crawlCompletedArgs;
                PageLinksCrawlDisallowedArgs linksDisallowedArgs = e as PageLinksCrawlDisallowedArgs;
                if (linksDisallowedArgs != null)
                {
                    absoluteUri = linksDisallowedArgs.CrawledPage.Uri.AbsoluteUri;
                    requestTime = linksDisallowedArgs.CrawledPage.Elapsed;
                }
                else
                {
                    crawlCompletedArgs = e as PageCrawlCompletedArgs;
                    requestTime        = crawlCompletedArgs.CrawledPage.Elapsed;
                    absoluteUri        = crawlCompletedArgs.CrawledPage.Uri.AbsoluteUri;
                }
            }
            _sitePages.Add(new WebPage
            {
                WebsiteId   = _website.Id,
                RequestUri  = absoluteUri,
                RequestTime = requestTime,
                Timestamp   = DateTime.Now
            }
                           );
        }
示例#10
0
 private static void sub_crawler_PageLinksCrawlDisallowed(object sender, PageLinksCrawlDisallowedArgs e)
 {
 }
 private void _crawler_PageLinksCrawlDisallowed(object sender, PageLinksCrawlDisallowedArgs e)
 {
 }
示例#12
0
        void crawler_PageLinksCrawlDisallowed(object sender, PageLinksCrawlDisallowedArgs e)
        {
            CrawledPage crawledPage = e.CrawledPage;

            OnMessageReceived($"Did not crawl the links on page {crawledPage.Uri.AbsoluteUri} due to {e.DisallowedReason}");
        }
 private void PageLinksCrawlDisallowedEvent(object sender, PageLinksCrawlDisallowedArgs e)
 {
 }
示例#14
0
 static void crawler_PageLinksCrawlDissallowed(object sender, PageLinksCrawlDisallowedArgs e)
 {
 }
示例#15
0
        private void crawler_CrawlerLinkDisalowed(object sender, PageLinksCrawlDisallowedArgs e)
        {
            PageToCrawl page = e.CrawledPage;

            Console.WriteLine("Disallowed: {0}", page.Uri.ToString());
        }
        /// <summary>
        /// 不爬取这个链接的原因
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        void crawler_PageLinksCrawlDisallowed(object sender, PageLinksCrawlDisallowedArgs e)
        {
            CrawledPage crawledPage = e.CrawledPage;

            log.Info("不爬取此链接 " + crawledPage.Uri.AbsoluteUri + " 其原因为 " + e.DisallowedReason);
        }
示例#17
0
        //抓取连接失败
        public static void Disallowed(object sender, PageLinksCrawlDisallowedArgs e)
        {
            CrawledPage crawledPage = e.CrawledPage;

            Outputer.Output(string.Format("没有抓取 {0} 页上的链接 {1} ", crawledPage.Uri.AbsoluteUri, e.DisallowedReason));
        }
示例#18
0
        private static void WebCrawler_PageLinksCrawlDisallowedAsync(object sender, PageLinksCrawlDisallowedArgs e)
        {
            CrawledPage crawledPage = e.CrawledPage;

            Console.WriteLine("Abot-Did not crawl the links on page {0} due to {1}", crawledPage.Uri.AbsoluteUri, e.DisallowedReason);
        }
 private void Crawler_PageLinksCrawlDisallowed(object sender, PageLinksCrawlDisallowedArgs e)
 {
     Console.WriteLine($"Page Crawl Disallowed, Page: {e.CrawledPage.Uri}, Reason: {e.DisallowedReason}");
 }
示例#20
0
 static void crawler_PageLinksCrawlDisallowed(object sender, PageLinksCrawlDisallowedArgs e)
 {
     //Process data
 }
示例#21
0
        void PageLinksCrawlDisallowed(object sender, PageLinksCrawlDisallowedArgs e)
        {
            CrawledPage crawledPage = e.CrawledPage;

            Log.Logger.Error($"Did not crawl the links on page {crawledPage.Uri.AbsoluteUri} due to {e.DisallowedReason}");
        }
示例#22
0
        static void Crawler_PageLinksCrawlDisallowed(object sender, PageLinksCrawlDisallowedArgs e)
        {
            CrawledPage crawledPage = e.CrawledPage;

            _log.Info($"Did not crawl the links on page {crawledPage.Uri.AbsoluteUri} due to {e.DisallowedReason}");
        }
示例#23
0
 static void crawler_PageLinksCrawlDisallowed(object sender, PageLinksCrawlDisallowedArgs e)
 {
     Log("crawler_PageLinksCrawlDisallowed", e.CrawledPage.Uri.AbsoluteUri);
     var e1 = e;
     //Process data
 }
示例#24
0
 void crawler_PageLinksCrawlDisallowed(object sender, PageLinksCrawlDisallowedArgs e)
 {
     Log($"Did not crawl the links on page {e.CrawledPage.Uri.AbsoluteUri} due to {e.DisallowedReason}");
 }