void crawler_ProcessPageCrawlStarting(object sender, PageCrawlStartingArgs e) { PageToCrawl pageToCrawl = e.PageToCrawl; Console.WriteLine("About to crawl link {0} which was found on page {1}", pageToCrawl.Uri.AbsoluteUri, pageToCrawl.ParentUri.AbsoluteUri); }
void crawler_ProcessPageCrawlStarting(object sender, PageCrawlStartingArgs e) { PageToCrawl pageToCrawl = e.PageToCrawl; OnMessageReceived( $"About to crawl link {pageToCrawl.Uri.AbsoluteUri} which was found on page {pageToCrawl.ParentUri.AbsoluteUri}"); }
public void Constructor_ValidArg_SetsPublicProperty() { PageToCrawl page = new CrawledPage(new Uri("http://aaa.com/")); PageCrawlStartingArgs args = new PageCrawlStartingArgs(new CrawlContext(), page); Assert.AreSame(page, args.PageToCrawl); }
void crawler_ProcessPageCrawlStarting(object sender, PageCrawlStartingArgs e) { PageToCrawl pageToCrawl = e.PageToCrawl; new HelloResponse { Result = $"About to crawl link {pageToCrawl.Uri.AbsoluteUri} which was found on page {1}, {pageToCrawl.ParentUri.AbsoluteUri}!" }; }
void crawler_ProcessPageCrawlStarting(object sender, PageCrawlStartingArgs e) { Interlocked.Increment(ref PageCount); PageToCrawl pageToCrawl = e.PageToCrawl; Console.WriteLine("{0} found on {1}", pageToCrawl.Uri.AbsoluteUri, pageToCrawl.ParentUri.AbsoluteUri); }
static void crawler_ProcessPageCrawlStarting(object sender, PageCrawlStartingArgs e) { string childUrl = e.PageToCrawl.Uri.AbsoluteUri; string parentUrl = e.PageToCrawl.ParentUri.AbsoluteUri; CrawlContext context = e.CrawlContext; CrawledLinks crawledLinks = context.CrawlBag.CrawledLinks; crawledLinks.AddRelation(parentUrl, childUrl); }
void crawler_ProcessPageCrawlStarting(object sender, PageCrawlStartingArgs e) { var pageToCrawl = e.PageToCrawl; var context = e.CrawlContext; //context.CrawlBag.MyFoo1.Bar(); e.PageToCrawl.PageBag.Bar = new Bar(); Console.WriteLine("About to crawl link {0} which was found on page {1}", pageToCrawl.Uri.AbsoluteUri, pageToCrawl.ParentUri.AbsoluteUri); }
void crawler_PageCrawlStartingAsync(object sender, PageCrawlStartingArgs e) { PageToCrawl pageToCrawl = e.PageToCrawl; string msg; m_linksFound++; updateCrawlingProgress(m_linksFound, -1, -1); msg = "checking: " + pageToCrawl.Uri.AbsoluteUri.ToString() + " (parent: " + pageToCrawl.ParentUri.AbsoluteUri.ToString() + ")"; log.Debug(msg); }
private void crawler_ProcessPageCrawlStarting(object sender, PageCrawlStartingArgs e) { CrawlContext context = e.CrawlContext; //Also id info to the page to crawl (will be passed to Crawled Page) e.PageToCrawl.PageBag.SessionId = SessionId; e.PageToCrawl.PageBag.CrawlerId = CrawlerId; PageToCrawl pageToCrawl = e.PageToCrawl; _logger.DebugFormat("Page Crawl Starting {0} which was found on page {1}", pageToCrawl.Uri.AbsoluteUri, pageToCrawl.ParentUri.AbsoluteUri); }
public void ProcessPageCrawlStarted( object sender, PageCrawlStartingArgs arguments) { if (arguments != null) { PageToCrawl page = arguments.PageToCrawl; string message = string.Format( CultureInfo.InvariantCulture, "Checking: {0}", page.Uri.AbsolutePath); Log.Info(CultureInfo.InvariantCulture, m => m( message)); } }
//static void Main(string[] args) //{ // FileStream fs = new FileStream("gsm.txt", FileMode.Open); // StreamReader sr = new StreamReader(fs); // string str = ""; // while ((str = sr.ReadLine()) != null) // { // StartCrawl(str); // } // //StartCrawl("http://www.gsmarena.com/huawei-phones-58.php"); // //DisplayDetails(); // /*This is the code with which Abot Crawl Links/HyperLinks from some Specific Website*/ // /*Abot Crawler does Depth Crawling that is it jumps from one hyper link to another*/ // /*I have Crawled links of iphone6 and stored them in a file*/ // /*This code is commented because once Abot start Crawling it won't stop. It may take several hours*/ // /*Can be uncommented to verify*/ //} static void crawler_ProcessPageCrawlStartingGSM(object sender, PageCrawlStartingArgs e) { PageToCrawl pageToCrawl = e.PageToCrawl; string url = pageToCrawl.ToString(); if ((url.Contains("samsung") || url.Contains("apple") || url.Contains("microsoft") || url.Contains("nokia") || url.Contains("sony") || url.Contains("lg") || url.Contains("htc") || url.Contains("motorola") || url.Contains("huawei") || url.Contains("blackberry") || url.Contains("lenovo") || url.Contains("oppo") || url.Contains("lava")) && (url.Contains("_") && (!(url.Contains("pictures"))) && (!(url.Contains("reviews"))) && (!(url.Contains("review"))))) { FileStream fs = new FileStream("../../../Users/Hp Mobile Workstatio/Documents/Visual Studio 2013/Projects/Working/FYPAdam/AdamDal/bin/Debug/UrlGSM.txt", FileMode.Append); StreamWriter sw = new StreamWriter(fs); sw.WriteLine(url); sw.Close(); fs.Close(); } Console.WriteLine("About to crawl link {0} which was found on page {1}", pageToCrawl.Uri.AbsoluteUri, pageToCrawl.ParentUri.AbsoluteUri); }
//static void Main(string[] args) //{ // FileStream fs = new FileStream("ebuyer.txt", FileMode.Open); // StreamReader sr = new StreamReader(fs); // string str = ""; // while ((str = sr.ReadLine()) != null) // { // StartCrawl(str); // } // //DisplayDetails(); //} public static void crawler_ProcessPageCrawlStarting(object sender, PageCrawlStartingArgs e) { try { PageToCrawl pageToCrawl = e.PageToCrawl; string url = pageToCrawl.ToString(); if ((url.Contains("apple") || url.Contains("hp") || url.Contains("lenovo") || url.Contains("asus") || url.Contains("dell") || url.Contains("acer")) && url.Contains("-") && ((url.Contains("laptop")) || (url.Contains("chromebook")) || (url.Contains("ultrabook")) || (url.Contains("transformer")) || (url.Contains("tablet")) || (url.Contains("macbook")) || (url.Contains("laptops")))) { FileStream fs = new FileStream("../../../Users/Hp Mobile Workstatio/Documents/Visual Studio 2013/Projects/FYP DB Fix UP/FYPAdam/AdamDal/bin/Debug/UrlEbuyer.txt", FileMode.Append); // FileStream fs = new FileStream("UrlEbuyer.txt", FileMode.Append); //FileStream fs = new FileStream(@"~\AdamDal\bin\Debug\url.txt", FileMode.Append); StreamWriter sw = new StreamWriter(fs); sw.WriteLine(url); sw.Close(); fs.Close(); } Console.WriteLine("About to crawl link {0} which was found on page {1}", pageToCrawl.Uri.AbsoluteUri, pageToCrawl.ParentUri.AbsoluteUri); }catch (Exception) { } }
/// <summary> /// 设置爬虫开始爬行 /// </summary> /// <param name="sender"></param> /// <param name="e"></param> void crawler_ProcessPageCrawlStarting(object sender, PageCrawlStartingArgs e) { PageToCrawl pageToCrawl = e.PageToCrawl; log.Info("要爬取的链接 " + pageToCrawl.Uri.AbsoluteUri + " 在页面 " + pageToCrawl.ParentUri.AbsoluteUri); }
private void PageCrawlStartingEvent(object sender, PageCrawlStartingArgs e) { }
static void Crawler_ProcessPageCrawlStarting(object sender, PageCrawlStartingArgs e) { PageToCrawl pageToCrawl = e.PageToCrawl; _log.Info($"About to crawl link {pageToCrawl.Uri.AbsoluteUri} which was found on page {pageToCrawl.ParentUri.AbsoluteUri}"); }
private void WebCrawler_PageCrawlStartingAsync(object sender, PageCrawlStartingArgs e) { _rateLimiter.WaitToProceed(); }
static void crawler_ProcessPageCrawlStarting(object sender, PageCrawlStartingArgs e) { //Process data }
private void _crawler_PageCrawlStartingAsync(object sender, PageCrawlStartingArgs e) { PageToCrawl pageToCrawl = e.PageToCrawl; }
// ----------------------- // DEBUG output statements // ----------------------- private static void WebCrawler_PageCrawlStartingAsync(object sender, PageCrawlStartingArgs e) { PageToCrawl pageToCrawl = e.PageToCrawl; Console.WriteLine("Abot-About to crawl link {0} which was found on page {1}", pageToCrawl.Uri.AbsoluteUri, pageToCrawl.ParentUri.AbsoluteUri); }
void ProcessPageCrawlStarting(object sender, PageCrawlStartingArgs e) { PageToCrawl pageToCrawl = e.PageToCrawl; Log.Logger.Debug($"About to crawl link {pageToCrawl.Uri.AbsoluteUri} which was found on page {pageToCrawl.ParentUri.AbsoluteUri}"); }
static void crawler_ProcessPageCrawlStarting(object sender, PageCrawlStartingArgs e) { Console.WriteLine(e.PageToCrawl.Uri); }
private void crawler_CrawlerStart(object sender, PageCrawlStartingArgs e) { PageToCrawl page = e.PageToCrawl; Console.WriteLine("Starting with {0}", page.Uri.ToString()); }
private void Crawler_ProcessPageCrawlStarting(object sender, PageCrawlStartingArgs e) { Console.WriteLine($"Page to Crawl: {e.PageToCrawl}"); }
private static void crawler_ProcessPageCrawlStarting(object sender, PageCrawlStartingArgs e) { }
void crawler_PageCrawlStarting(object sender, PageCrawlStartingArgs e) { }
static void crawler_ProcessPageCrawlStarting(object sender, PageCrawlStartingArgs e) { Log("crawler_ProcessPageCrawlStarting", e.PageToCrawl.Uri.AbsoluteUri); }
private void crawler_ProcessPageCrawlStarting(object sender, PageCrawlStartingArgs e) { PageToCrawl pageToCrawl = e.PageToCrawl; var result = string.Format("About to crawl link {0} which was found on page {1}", pageToCrawl.Uri.AbsoluteUri, pageToCrawl.ParentUri.AbsoluteUri); }
void crawler_ProcessPageCrawlStarting(object sender, PageCrawlStartingArgs e) { Log($"About to crawl link {e.PageToCrawl.Uri.AbsoluteUri} which was found on page {e.PageToCrawl.ParentUri.AbsoluteUri}"); }
//开始抓取 public static void Starting(object sender, PageCrawlStartingArgs e) { PageToCrawl pageToCrawl = e.PageToCrawl; Outputer.Output(string.Format("关于抓取页面 {0} 上找到的链接 {1}", pageToCrawl.Uri.AbsoluteUri, pageToCrawl.ParentUri.AbsoluteUri)); }