public void StartCrawling() { string commandmessage = myStorage.PeekFromQueue(true); if (commandmessage.Equals("getStart")) { string url = myStorage.getMsgFromQueue(false); visitedUrl.Add(url); getHTML parser = new getHTML(url); Entity urlentity = new Entity(myurl, "PageUrl"); CloudTable table = tableClient.GetTableReference("hw3table"); TableOperation insertOperation = TableOperation.Insert(urlentity); table.Execute(insertOperation); foreach (string rightUrl in parser.GetAllLink()) { foreach (string rule in disAllow.ToList()) { if ((!rightUrl.Contains(rule)) && (!visitedUrl.Contains(rightUrl))) { myStorage.AddQueue(rightUrl, false); count++; } else { errorList.Add(rightUrl); } } } } else if (commandmessage.Equals("done")) { myStorage.getMsgFromQueue(true); errorcount++; } }
public void StartCrawling() { string commandmessage = myStorage.PeekFromQueue(true); if (commandmessage.Equals("getStart")) { string url = myStorage.getMsgFromQueue(false); visitedUrl.Add(url); getHTML parser = new getHTML(url); Entity urlentity = new Entity( myurl, "PageUrl"); CloudTable table = tableClient.GetTableReference("hw3table"); TableOperation insertOperation = TableOperation.Insert(urlentity); table.Execute(insertOperation); foreach (string rightUrl in parser.GetAllLink()) { foreach (string rule in disAllow.ToList()) { if ((!rightUrl.Contains(rule)) && (!visitedUrl.Contains(rightUrl))) { myStorage.AddQueue(rightUrl, false); count++; } else { errorList.Add(rightUrl); } } } } else if (commandmessage.Equals("done")) { myStorage.getMsgFromQueue(true); errorcount++; } }