Пример #1
0
        public override void Run()
        {
            CloudQueue    LoadQueue           = CloudConfiguration.GetLoadingQueue();
            CloudQueue    CrawlQueue          = CloudConfiguration.GetCrawlingQueue();
            CloudQueue    StopQueue           = CloudConfiguration.GetStopQueue();
            CloudTable    Table               = CloudConfiguration.GetTable();
            List <string> CNNRules            = ProcessRobots("http://www.cnn.com/robots.txt");
            List <string> BleacherReportRules = ProcessRobots("http://www.bleacherreport.com/robots.txt");
            WebCrawler    Crawler             = new WebCrawler(CNNRules, BleacherReportRules);

            State = "Idle";
            Thread.Sleep(10000);

            CloudQueueMessage stopMessage = StopQueue.GetMessage();

            CPUCount = new PerformanceCounter("Processor", "% Processor Time", "_Total");
            MemCount = new PerformanceCounter("Memory", "Available MBytes");

            while (true)
            {
                while (stopMessage == null)
                {
                    // Get the next message
                    CloudQueueMessage loadMessage = LoadQueue.GetMessage();

                    if (loadMessage != null)
                    {
                        State = "Loading";
                        string message = loadMessage.AsString;
                        Crawler.ProcessURL(message);
                        LoadQueue.DeleteMessage(loadMessage);
                    }
                    else if (State.Equals("Loading") || State.Equals("Crawling"))
                    {
                        CloudQueueMessage crawlMessage = CrawlQueue.GetMessage();
                        // dequeue crawl message
                        if (crawlMessage != null)
                        {
                            State = "Crawling";
                            Crawler.ProcessURL(crawlMessage.AsString);
                            CrawlQueue.DeleteMessage(crawlMessage);
                        }
                    }
                    stopMessage = StopQueue.GetMessage();
                }
                State = "Idle";
            }
        }
Пример #2
0
        public override void Run()
        {
            Storage = new AzureStorage();

            LoadQueue        = CloudConfiguration.GetLoadingQueue();
            CrawlQueue       = CloudConfiguration.GetCrawlingQueue();
            StopQueue        = CloudConfiguration.GetStopQueue();
            SiteDataTable    = CloudConfiguration.GetSiteDataTable();
            AdminStatusTable = CloudConfiguration.GetAdminStatusTable();
            StateQueue       = CloudConfiguration.GetStateQueue();

            State = "Idle";

            CPUCount = new PerformanceCounter("Processor", "% Processor Time", "_Total");
            MemCount = new PerformanceCounter("Memory", "Available MBytes");

            Status = new AdminStatus(State, (int)CPUCount.NextValue(), (int)MemCount.NextValue());

            string[] robots = { "http://www.cnn.com/robots.txt", "http://www.bleacherreport.com/robots.txt" };
            Crawler = new WebCrawler(robots, Storage);

            Thread.Sleep(10000);



            string url = "";

            while (true)
            {
                CloudQueueMessage stopMessage = StopQueue.GetMessage();

                while (stopMessage == null)
                {
                    // Get the next message
                    CloudQueueMessage loadMessage = LoadQueue.GetMessage();
                    State = "Loading";
                    if (loadMessage != null)
                    {
                        State = "Loading";
                        url   = loadMessage.AsString;
                        if (url.Contains("robots.txt"))
                        {
                            string[] robotLinks = url.Split(null);
                            foreach (string link in robotLinks)
                            {
                                Crawler.ProcessURL(link);
                            }
                            LoadQueue.DeleteMessage(loadMessage);
                        }
                        else
                        {
                            Crawler.ProcessURL(url);
                        }
                    }
                    else if (State.Equals("Loading") || State.Equals("Crawling"))
                    {
                        CloudQueueMessage crawlMessage = CrawlQueue.GetMessage();
                        // dequeue crawl message
                        if (crawlMessage != null)
                        {
                            State = "Crawling";
                            url   = crawlMessage.AsString;
                            Crawler.ProcessURL(url);
                            CrawlQueue.DeleteMessage(crawlMessage);
                        }
                    }
                    stopMessage = StopQueue.GetMessage();
                    UpdateDashboard(url);
                }
                State = "Idle";
            }
        }