Ejemplo n.º 1
0
 public void ClearEverything()
 {
     Table.DeleteIfExists();
     LoadQueue.DeleteIfExists();
     CrawlQueue.DeleteIfExists();
     StopQueue.DeleteIfExists();
 }
Ejemplo n.º 2
0
        public string StopCrawler()
        {
            StopQueue = CloudConfiguration.GetStopQueue();
            CloudQueueMessage stopSignal = new CloudQueueMessage("stop");

            StopQueue.AddMessage(stopSignal);
            return(StopQueue.Name + " " + stopSignal.AsString);
        }
Ejemplo n.º 3
0
 public void ClearEverything()
 {
     SiteDataTable.DeleteIfExists();
     LoadQueue.DeleteIfExists();
     CrawlQueue.DeleteIfExists();
     StopQueue.DeleteIfExists();
     AdminStatusTable.DeleteIfExists();
     ErrorQueue.DeleteIfExists();
 }
Ejemplo n.º 4
0
 public string StopCrawler()
 {
     StopQueue  = CloudConfiguration.GetStopQueue();
     StateQueue = CloudConfiguration.GetStateQueue();
     if (StopQueue.PeekMessage() == null)
     {
         CloudQueueMessage stopSignal = new CloudQueueMessage("stop");
         StopQueue.AddMessage(stopSignal);
         if (StateQueue.PeekMessage() != null)
         {
             StateQueue.DeleteMessage(StateQueue.GetMessage());
         }
         CloudQueueMessage state = new CloudQueueMessage("Idle");
         StateQueue.AddMessage(state);
     }
     return("stopped crawling");
 }
Ejemplo n.º 5
0
 public string StartCrawler()
 {
     StopQueue  = CloudConfiguration.GetStopQueue();
     StateQueue = CloudConfiguration.GetStateQueue();
     if (StopQueue.PeekMessage() == null)
     {
         LoadQueue = CloudConfiguration.GetLoadingQueue();
         CloudQueueMessage startMessage =
             new CloudQueueMessage("http://www.cnn.com/robots.txt http://www.bleacherreport.com/robots.txt");
         LoadQueue.AddMessage(startMessage);
         CloudQueueMessage state = new CloudQueueMessage("Loading");
     }
     else
     {
         StopQueue.DeleteMessage(StopQueue.GetMessage());
     }
     return("start crawler method executed");
 }
Ejemplo n.º 6
0
        public string StartCrawler()
        {
            StopQueue = CloudConfiguration.GetStopQueue();
            CloudQueueMessage stopMessage = StopQueue.GetMessage();

            while (stopMessage != null)
            {
                StopQueue.DeleteMessage(stopMessage);
                stopMessage = StopQueue.GetMessage();
            }

            LoadQueue = CloudConfiguration.GetLoadingQueue();

            //Add message
            CloudQueueMessage cnnRobots = new CloudQueueMessage("http://www.cnn.com/robots.txt");

            LoadQueue.AddMessage(cnnRobots);

            CloudQueueMessage bleacherReportRobots = new CloudQueueMessage("http://www.bleacherreport.com/robots.txt");

            LoadQueue.AddMessage(bleacherReportRobots);

            return(LoadQueue.Name + " " + cnnRobots.AsString + " " + bleacherReportRobots.AsString);
        }
Ejemplo n.º 7
0
        public override void Run()
        {
            Storage = new AzureStorage();

            LoadQueue        = CloudConfiguration.GetLoadingQueue();
            CrawlQueue       = CloudConfiguration.GetCrawlingQueue();
            StopQueue        = CloudConfiguration.GetStopQueue();
            SiteDataTable    = CloudConfiguration.GetSiteDataTable();
            AdminStatusTable = CloudConfiguration.GetAdminStatusTable();
            StateQueue       = CloudConfiguration.GetStateQueue();

            State = "Idle";

            CPUCount = new PerformanceCounter("Processor", "% Processor Time", "_Total");
            MemCount = new PerformanceCounter("Memory", "Available MBytes");

            Status = new AdminStatus(State, (int)CPUCount.NextValue(), (int)MemCount.NextValue());

            string[] robots = { "http://www.cnn.com/robots.txt", "http://www.bleacherreport.com/robots.txt" };
            Crawler = new WebCrawler(robots, Storage);

            Thread.Sleep(10000);



            string url = "";

            while (true)
            {
                CloudQueueMessage stopMessage = StopQueue.GetMessage();

                while (stopMessage == null)
                {
                    // Get the next message
                    CloudQueueMessage loadMessage = LoadQueue.GetMessage();
                    State = "Loading";
                    if (loadMessage != null)
                    {
                        State = "Loading";
                        url   = loadMessage.AsString;
                        if (url.Contains("robots.txt"))
                        {
                            string[] robotLinks = url.Split(null);
                            foreach (string link in robotLinks)
                            {
                                Crawler.ProcessURL(link);
                            }
                            LoadQueue.DeleteMessage(loadMessage);
                        }
                        else
                        {
                            Crawler.ProcessURL(url);
                        }
                    }
                    else if (State.Equals("Loading") || State.Equals("Crawling"))
                    {
                        CloudQueueMessage crawlMessage = CrawlQueue.GetMessage();
                        // dequeue crawl message
                        if (crawlMessage != null)
                        {
                            State = "Crawling";
                            url   = crawlMessage.AsString;
                            Crawler.ProcessURL(url);
                            CrawlQueue.DeleteMessage(crawlMessage);
                        }
                    }
                    stopMessage = StopQueue.GetMessage();
                    UpdateDashboard(url);
                }
                State = "Idle";
            }
        }