public string GetSearchResults(string query) { SiteDataTable = CloudConfiguration.GetSiteDataTable(); query = query.Trim().ToLower(); if (cache.ContainsKey(query)) { return(new JavaScriptSerializer().Serialize(cache[query])); } else { var keywords = query.Split(null) .Select(x => Base64.Base64Encode(x)); var results = new List <URLEntity>(); foreach (string keyword in keywords) { TableQuery <URLEntity> rangeQuery = new TableQuery <URLEntity>() .Where(TableQuery.GenerateFilterCondition("PartitionKey", QueryComparisons.Equal, keyword)); var data = SiteDataTable.ExecuteQuery(rangeQuery); results.AddRange(data); } var siteMatches = results.GroupBy(x => x.URL) .Select(group => new Tuple <string, int, string>(group.Key, group.Count(), group.First().Title)) .OrderByDescending(tuple => tuple.Item2); var links = siteMatches.Select(x => x.Item1 + "$" + x.Item3).ToList <string>(); cache.Add(query, links); return(new JavaScriptSerializer().Serialize(links)); } }
public override void Run() { Storage = new AzureStorage(); LoadQueue = CloudConfiguration.GetLoadingQueue(); CrawlQueue = CloudConfiguration.GetCrawlingQueue(); StopQueue = CloudConfiguration.GetStopQueue(); SiteDataTable = CloudConfiguration.GetSiteDataTable(); AdminStatusTable = CloudConfiguration.GetAdminStatusTable(); StateQueue = CloudConfiguration.GetStateQueue(); State = "Idle"; CPUCount = new PerformanceCounter("Processor", "% Processor Time", "_Total"); MemCount = new PerformanceCounter("Memory", "Available MBytes"); Status = new AdminStatus(State, (int)CPUCount.NextValue(), (int)MemCount.NextValue()); string[] robots = { "http://www.cnn.com/robots.txt", "http://www.bleacherreport.com/robots.txt" }; Crawler = new WebCrawler(robots, Storage); Thread.Sleep(10000); string url = ""; while (true) { CloudQueueMessage stopMessage = StopQueue.GetMessage(); while (stopMessage == null) { // Get the next message CloudQueueMessage loadMessage = LoadQueue.GetMessage(); State = "Loading"; if (loadMessage != null) { State = "Loading"; url = loadMessage.AsString; if (url.Contains("robots.txt")) { string[] robotLinks = url.Split(null); foreach (string link in robotLinks) { Crawler.ProcessURL(link); } LoadQueue.DeleteMessage(loadMessage); } else { Crawler.ProcessURL(url); } } else if (State.Equals("Loading") || State.Equals("Crawling")) { CloudQueueMessage crawlMessage = CrawlQueue.GetMessage(); // dequeue crawl message if (crawlMessage != null) { State = "Crawling"; url = crawlMessage.AsString; Crawler.ProcessURL(url); CrawlQueue.DeleteMessage(crawlMessage); } } stopMessage = StopQueue.GetMessage(); UpdateDashboard(url); } State = "Idle"; } }