public override void Run() { //put visited urls in a hash set startingCode(); while (true) { Trace.TraceInformation("Working"); //Sleep 50ms Thread.Sleep(50); //Check and handle admin messages CloudQueueMessage startStopMessage = startStopQueue.GetMessage(); if (startStopMessage != null) { state = startStopMessage.AsString; Update("info", "state", state); startStopQueue.DeleteMessage(startStopMessage); if (state.Equals("started")) { crawl = new Crawler(); crawl.StartLoader(); } } if (state.Equals("clearing")) { //clear queue queue.Clear(); startingCode(); } else if (state.Equals("started")) //keepGoing { //get message from url queue CloudQueueMessage message = queue.GetMessage(); //if message isn't null if (message != null) { urlsCrawled++; Update("info", "total", urlsCrawled.ToString()); queue.FetchAttributes(); queueSize = (int)queue.ApproximateMessageCount; Update("info", "queue", queueSize.ToString()); string url = message.AsString; if (!acceptedURLs.Contains(url)) { try { List <WebCrawlerEntity> entities = crawl.startCrawler(url); //Store dates if (entities != null) { numTitles++; Update("info", "numTitles", numTitles.ToString()); string temp = ""; foreach (WebCrawlerEntity w in entities) { temp += " " + w.PartitionKey; urlsAccepted++; w.num = urlsAccepted; Update("info", "accepted", urlsAccepted.ToString()); TableOperation insertOperation = TableOperation.InsertOrReplace(w); table.ExecuteAsync(insertOperation); acceptedURLs.Add(url); } TextInfo myTI = new CultureInfo("en-US", false).TextInfo; Update("info", "lastTitle", myTI.ToTitleCase(temp)); } } catch (Exception e) { //put errors in error table numErrors++; infoEntity newError = new infoEntity(numErrors.ToString(), "url: " + url + " Error: " + e.Message); TableOperation insertErrorOperation = TableOperation.InsertOrReplace(newError); errorTable.Execute(insertErrorOperation); } } queue.DeleteMessage(message); } } } }
public override void Run() { //put visited urls in a hash set startingCode(); while (true) { Trace.TraceInformation("Working"); //Sleep 50ms Thread.Sleep(50); //Check and handle admin messages CloudQueueMessage startStopMessage = startStopQueue.GetMessage(); if (startStopMessage != null) { state = startStopMessage.AsString; Update("info", "state", state); startStopQueue.DeleteMessage(startStopMessage); if (state.Equals("started")) { crawl = new Crawler(); crawl.StartLoader(); } } if (state.Equals("clearing")){ //clear queue queue.Clear(); startingCode(); } else if (state.Equals("started")) //keepGoing { //get message from url queue CloudQueueMessage message = queue.GetMessage(); //if message isn't null if (message != null) { urlsCrawled++; Update("info", "total", urlsCrawled.ToString()); queue.FetchAttributes(); queueSize = (int)queue.ApproximateMessageCount; Update("info", "queue", queueSize.ToString()); string url = message.AsString; if (!acceptedURLs.Contains(url)) { try { List<WebCrawlerEntity> entities = crawl.startCrawler(url); //Store dates if (entities != null) { numTitles++; Update("info", "numTitles", numTitles.ToString()); string temp = ""; foreach (WebCrawlerEntity w in entities) { temp += " " + w.PartitionKey; urlsAccepted++; w.num = urlsAccepted; Update("info", "accepted", urlsAccepted.ToString()); TableOperation insertOperation = TableOperation.InsertOrReplace(w); table.ExecuteAsync(insertOperation); acceptedURLs.Add(url); } TextInfo myTI = new CultureInfo("en-US", false).TextInfo; Update("info", "lastTitle", myTI.ToTitleCase(temp)); } } catch (Exception e) { //put errors in error table numErrors++; infoEntity newError = new infoEntity(numErrors.ToString(), "url: " + url + " Error: " + e.Message); TableOperation insertErrorOperation = TableOperation.InsertOrReplace(newError); errorTable.Execute(insertErrorOperation); } } queue.DeleteMessage(message); } } } }