public override void Run() { Trace.TraceInformation("WorkerRole1 is running"); CloudStorageAccount storageAccount = CloudStorageAccount.Parse(ConfigurationManager.AppSettings["StorageConnectionString"]); CloudTableClient tableClient = storageAccount.CreateCloudTableClient(); CloudQueueClient queueClient = storageAccount.CreateCloudQueueClient(); table = tableClient.GetTableReference("htmlURLs"); HTMLs = queueClient.GetQueueReference("urls"); messages = queueClient.GetQueueReference("status"); visitedURLs = queueClient.GetQueueReference("visited"); bool robotsparsed = false; CloudQueueMessage status = null; lastTen = new List <String>(); while (true) { Thread.Sleep(10); if (messages.Exists()) { status = messages.PeekMessage(); } if (status != null && status.AsString.Equals("Start") && HTMLs != null && !robotsparsed) { spider = new Crawler(); List <string> urls = spider.crawlRobots(); List <string> urls2 = spider.cnnRobotsCrawl(); index = 0; urlsCrawled = 0; foreach (string url in urls) { HTMLs.AddMessage(new CloudQueueMessage(url)); } foreach (string url in urls2) { HTMLs.AddMessage(new CloudQueueMessage(url)); } robotsparsed = true; messages.DeleteMessage(messages.GetMessage(TimeSpan.FromMinutes(5))); messages.AddMessage(new CloudQueueMessage("Crawling")); } else if (status != null && !status.AsString.Equals("Stop") && HTMLs != null && robotsparsed) { CloudQueueMessage message = HTMLs.GetMessage(TimeSpan.FromMinutes(5)); if (message != null) { HTMLs.DeleteMessage(message); string messageString = message.AsString; if (spider.isAllowed(messageString)) { List <string> newLinks = spider.crawlLink(messageString); URL entry = null; if (newLinks[0] == "Error") { entry = new URL(messageString, newLinks[0], "Error"); } else { entry = new URL(messageString, newLinks[0], "Partition"); } if (lastTen.Count < 10) { lastTen.Add(messageString); } else { lastTen.Remove(lastTen[0]); lastTen.Add(messageString); } if (spider.visited().Contains(messageString)) { index++; } urlsCrawled++; TableOperation insertOperation = TableOperation.InsertOrReplace(entry); table.ExecuteAsync(insertOperation); //var result = table.BeginExecute(insertOperation, //new AsyncCallback(onTableExecuteComplete), entity); //result.AsyncWaitHandle.WaitOne(); if (newLinks.Count > 1) { for (int i = 2; i < newLinks.Count - 1; i++) { HTMLs.AddMessageAsync(new CloudQueueMessage(newLinks[i])); } } } URL tableEntry = new URL("Index", index.ToString(), "IndexCount"); TableOperation indexOperation = TableOperation.InsertOrReplace(tableEntry); table.Execute(indexOperation); URL tableEntry2 = new URL("urls", urlsCrawled.ToString(), "URLs"); TableOperation urlCountOperation = TableOperation.InsertOrReplace(tableEntry2); table.Execute(urlCountOperation); } } } // try // { // this.RunAsync(this.cancellationTokenSource.Token).Wait(); // } // finally // { // this.runCompleteEvent.Set(); // } }