static async Task CollectResults(int experimentId, AzureExperimentStorage storage) { Console.WriteLine("Started collection."); var queue = storage.GetResultsQueueReference(experimentId); List <AzureBenchmarkResult> results = new List <AzureBenchmarkResult>(); // (await storage.GetAzureExperimentResults(experimentId)).ToList(); int processedBenchmarks = 0; // goodResults.Count + badResults.Count;// results.Count; var formatter = new BinaryFormatter(); bool completed = false; do { completed = totalBenchmarksToProcess != -1 && completedTasksCount >= totalBenchmarksToProcess; var messages = queue.GetMessages(32, TimeSpan.FromMinutes(5)); int messageCount = messages.Count(); completed = completed && messageCount == 0; foreach (CloudQueueMessage message in messages) { using (var ms = new MemoryStream(message.AsBytes)) { goodResults.Add((AzureBenchmarkResult)formatter.Deserialize(ms)); } } int oldCount = results.Count; results = goodResults.Concat(badResults).ToList(); var tuple = SortCountUniqueNamesAndRemoveExactDuplicates(results); processedBenchmarks = tuple.Item1; results = tuple.Item2; await storage.PutAzureExperimentResults(experimentId, results.ToArray(), AzureExperimentStorage.UploadBlobMode.CreateOrReplace); int completedBenchmarks = totalBenchmarks == -1 ? processedBenchmarks : totalBenchmarks - totalBenchmarksToProcess + completedTasksCount; await storage.SetCompletedBenchmarks(experimentId, completedBenchmarks); Console.WriteLine("Setting completed benchmarks to {0}.\nTotal benchmarks: {1}\nProcessed benchmarks: {2}\nTotal to process: {3}\nCompleted tasks: {4}\nMessage count: {5}", completedBenchmarks, totalBenchmarks, processedBenchmarks, totalBenchmarksToProcess, completedTasksCount, messageCount); foreach (CloudQueueMessage message in messages) { queue.DeleteMessage(message); } if (oldCount == results.Count) { Thread.Sleep(500); } }while (!completed); await storage.DeleteResultsQueue(experimentId); var totalRuntime = results.Sum(r => r.NormalizedRuntime); await storage.SetTotalRuntime(experimentId, totalRuntime); Console.WriteLine("Collected all results."); }
static async Task ManageTasks(string[] args) { int experimentId = int.Parse(args[0], CultureInfo.InvariantCulture); string summaryName = null; if (args.Length > 1) { summaryName = args[1]; } //Console.WriteLine(String.Format("Params are:\n id: {0}\ncontainer: {8}\ndirectory:{9}\ncategory: {1}\nextensions: {10}\ndomain: {11}\nexec: {2}\nargs: {3}\ntimeout: {4}\nmemlimit: {5}\noutlimit: {6}\nerrlimit: {7}", experimentId, benchmarkCategory, executable, arguments, timeout, memoryLimit, outputLimit, errorLimit, benchmarkContainerUri, benchmarkDirectory, extensionsString, domainString)); #if DEBUG string jobId = "cz3_exp8535"; #else string jobId = Environment.GetEnvironmentVariable(JobIdEnvVariableName); #endif Console.WriteLine("Retrieving credentials..."); var secretStorage = new SecretStorage(Settings.Default.AADApplicationId, Settings.Default.AADApplicationCertThumbprint, Settings.Default.KeyVaultUrl); BatchConnectionString credentials = new BatchConnectionString(await secretStorage.GetSecret(Settings.Default.ConnectionStringSecretId)); var batchCred = new BatchSharedKeyCredentials(credentials.BatchURL, credentials.BatchAccountName, credentials.BatchAccessKey); var storage = new AzureExperimentStorage(credentials.WithoutBatchData().ToString()); var expInfo = await storage.GetExperiment(experimentId); string benchmarkContainerUri = expInfo.BenchmarkContainerUri; // args[1]; string benchmarkDirectory = expInfo.BenchmarkDirectory; // args[2]; string benchmarkCategory = expInfo.Category; // args[3]; string extensionsString = expInfo.BenchmarkFileExtension; //args[4]; string domainString = expInfo.DomainName; // args[5]; string executable = expInfo.Executable; // args[6]; string arguments = expInfo.Parameters; // args[7]; double timeout = expInfo.BenchmarkTimeout; // TimeSpan.FromSeconds(double.Parse(args[8])); double memoryLimit = expInfo.MemoryLimitMB; // 0; // no limit int maxRepetitions = expInfo.AdaptiveRunMaxRepetitions; double maxTime = expInfo.AdaptiveRunMaxTimeInSeconds; long?outputLimit = 1 * (1024 * 1024); // 1 MB long?errorLimit = 256 * 1024; // 256 KB AzureBenchmarkStorage benchmarkStorage = CreateBenchmarkStorage(benchmarkContainerUri, storage); var queue = await storage.CreateResultsQueue(experimentId); DateTime before = DateTime.Now; Console.Write("Fetching existing results..."); await FetchSavedResults(experimentId, storage); Domain domain = ResolveDomain(domainString); HashSet <string> extensions; if (string.IsNullOrEmpty(extensionsString)) { extensions = new HashSet <string>(domain.BenchmarkExtensions.Distinct()); } else { extensions = new HashSet <string>(extensionsString.Split('|').Select(s => s.Trim().TrimStart('.')).Distinct()); } using (BatchClient batchClient = BatchClient.Open(batchCred)) { // Exclude benchmarks that finished correctly var processedBlobs = new HashSet <string>(); string prefix = (benchmarkDirectory.Trim('/') + "/" + benchmarkCategory.Trim('/')).Trim('/'); foreach (var r in goodResults.Select(g => prefix + "/" + g.BenchmarkFileName)) { processedBlobs.Add(r.Trim()); } Console.WriteLine(" took {0}.", (DateTime.Now - before)); // Exclude those that are still in progress ODATADetailLevel detailLevel = new ODATADetailLevel(); detailLevel.FilterClause = "(state eq 'active') or (state eq 'running') or (state eq 'preparing')"; detailLevel.SelectClause = "id,displayName"; CloudJob old_job = null; try { old_job = batchClient.JobOperations.GetJob(jobId); } catch { /* OK */ } if (old_job != null) { before = DateTime.Now; Console.Write("Listing existing tasks..."); var ts = batchClient.JobOperations.ListTasks(jobId, detailLevel); foreach (CloudTask t in ts) { int id; if (int.TryParse(t.Id, out id)) { string n = t.DisplayName.Trim(); if (!processedBlobs.Contains(n)) { processedBlobs.Add(n); } } } ; Console.WriteLine(" took {0}.", (DateTime.Now - before)); // Create new job if the old one is already sealed off switch (old_job.State) { case Microsoft.Azure.Batch.Common.JobState.Completed: case Microsoft.Azure.Batch.Common.JobState.Deleting: case Microsoft.Azure.Batch.Common.JobState.Disabled: case Microsoft.Azure.Batch.Common.JobState.Disabling: case Microsoft.Azure.Batch.Common.JobState.Terminating: { before = DateTime.Now; Console.Write("Creating fresh job..."); PoolInformation pool_info = old_job.PoolInformation; string new_jid; int cnt = 1; bool have_jid = false; do { new_jid = String.Format("{0}-{1}", jobId, cnt++); try { CloudJob new_job = batchClient.JobOperations.CreateJob(new_jid, pool_info); new_job.OnAllTasksComplete = Microsoft.Azure.Batch.Common.OnAllTasksComplete.NoAction; new_job.OnTaskFailure = old_job.OnTaskFailure; new_job.Constraints = old_job.Constraints; new_job.DisplayName = old_job.DisplayName; new_job.Commit(); have_jid = true; } catch (Microsoft.Azure.Batch.Common.BatchException) { Console.Write("."); } }while (!have_jid); jobId = new_jid; Console.WriteLine(" took {0}.", (DateTime.Now - before)); break; } } } BlobContinuationToken continuationToken = null; BlobResultSegment resultSegment = null; before = DateTime.Now; Console.Write("Adding tasks..."); List <Task> starterTasks = new List <Task>(); int benchmarksTotal = processedBlobs.Count(); string benchmarksPath = CombineBlobPath(benchmarkDirectory, benchmarkCategory); string outputQueueUri = storage.GetOutputQueueSASUri(experimentId, TimeSpan.FromHours(48)); string outputContainerUri = storage.GetOutputContainerSASUri(TimeSpan.FromHours(48)); do { resultSegment = await benchmarkStorage.ListBlobsSegmentedAsync(benchmarksPath, continuationToken); string[] blobNamesToProcess = resultSegment.Results.SelectMany(item => { var blob = item as CloudBlockBlob; if (blob == null || processedBlobs.Contains(blob.Name)) { return new string[] { } } ; var nameParts = blob.Name.Split('/'); var shortnameParts = nameParts[nameParts.Length - 1].Split('.'); if (shortnameParts.Length == 1 && !extensions.Contains("")) { return new string[] { } } ; var ext = shortnameParts[shortnameParts.Length - 1]; if (!extensions.Contains(ext)) { return new string[] { } } ; return(new string[] { blob.Name }); }).ToArray(); starterTasks.Add(StartTasksForSegment(timeout.ToString(), experimentId, executable, arguments, memoryLimit, domainString, outputQueueUri, outputContainerUri, outputLimit, errorLimit, jobId, batchClient, blobNamesToProcess, benchmarksPath, benchmarksTotal, benchmarkStorage, maxRepetitions, maxTime)); continuationToken = resultSegment.ContinuationToken; benchmarksTotal += blobNamesToProcess.Length; }while (continuationToken != null); await storage.SetBenchmarksTotal(experimentId, benchmarksTotal); Program.benchmarksTotal = benchmarksTotal; benchmarksToProcess = benchmarksTotal - goodResults.Count; Console.WriteLine(" took {0}.", (DateTime.Now - before)); before = DateTime.Now; Console.Write("Waiting for tasks to start..."); await Task.WhenAll(starterTasks.ToArray()); Console.WriteLine(" took {0}.", (DateTime.Now - before)); CloudJob j = batchClient.JobOperations.GetJob(jobId); j.OnAllTasksComplete = Microsoft.Azure.Batch.Common.OnAllTasksComplete.TerminateJob; j.CommitChanges(); before = DateTime.Now; Console.Write("Waiting for results..."); var collectionTask = CollectResults(experimentId, storage); Console.WriteLine(" took {0}.", (DateTime.Now - before)); MonitorTasksUntilCompletion(experimentId, jobId, collectionTask, batchClient, domain); if (summaryName != null && expInfo.Creator == "Nightly") { Trace.WriteLine(string.Format("Building summary for experiment {0} and summary name {1}...", experimentId, summaryName)); AzureSummaryManager manager = new AzureSummaryManager(credentials.WithoutBatchData().ToString(), MEFDomainResolver.Instance); await AppendSummaryAndSendReport(summaryName, experimentId, domain, manager); } else { Trace.WriteLine("No summary requested."); } try { int?amc = storage.GetResultsQueueReference(experimentId).ApproximateMessageCount; if (amc.HasValue && amc.Value == 0) { switch (batchClient.JobOperations.GetJob(jobId).State) { case Microsoft.Azure.Batch.Common.JobState.Completed: case Microsoft.Azure.Batch.Common.JobState.Disabled: Console.WriteLine("Deleting Batch job and results queue."); await batchClient.JobOperations.DeleteJobAsync(jobId); await storage.DeleteResultsQueue(experimentId); break; } } } catch { /* OK */ } Console.WriteLine("Closing."); } }