public string StartCrawling() { CloudQueueMessage cnn = new CloudQueueMessage("http://www.cnn.com/robots.txt"); CloudQueueMessage bleacher = new CloudQueueMessage("http://www.bleacherreport.com/robots.txt"); //CloudQueueMessage checking = new CloudQueueMessage("https://www.cnn.com/sitemaps/sitemap-profile-2018-02.xml"); StorageManager.LinkQueue().AddMessage(cnn); StorageManager.LinkQueue().AddMessage(bleacher); //StorageManager.LinkQueue().AddMessage(checking); StorageManager.CommandQueue().AddMessage(new CloudQueueMessage("startcrawling")); return("Initated"); }
public override void Run() { Trace.TraceInformation("WorkerRole1 is running"); while (true) { CloudQueueMessage commandMessage = StorageManager.CommandQueue().GetMessage(); if (commandMessage != null) { switch (commandMessage.AsString) { case "startcrawling": crawler.Start(); StorageManager.CommandQueue().DeleteMessage(commandMessage); break; case "stopcrawling": crawler.Stop(); StorageManager.CommandQueue().DeleteMessage(commandMessage); break; case "clear": StorageManager.LinkQueue().Clear(); StorageManager.CommandQueue().Clear(); StorageManager.HTMLQueue().Clear(); StorageManager.GetTable().DeleteIfExists(); StorageManager.PerformanceCounterTable().DeleteIfExists(); StorageManager.ErrorTable().DeleteIfExists(); break; default: break; } } new Task(crawler.GetPerfCounters).Start(); if (Crawler.state.Equals("Loading")) { new Task(crawler.GetPerfCounters).Start(); crawler.CrawlUrl(); } else if (crawler.GetState().Equals("Crawling")) { new Task(crawler.GetPerfCounters).Start(); crawler.GetHTMLData(); } } }
public string ClearIndex() { StorageManager.CommandQueue().AddMessage(new CloudQueueMessage("clear")); return("Cleared"); }
public string StopCrawling() { StorageManager.CommandQueue().AddMessage(new CloudQueueMessage("stopcrawling")); return("Stop Crawling"); }