static async Task ManageTasks(string[] args) { int experimentId = int.Parse(args[0], CultureInfo.InvariantCulture); string summaryName = null; if (args.Length > 1) { summaryName = args[1]; } //Console.WriteLine(String.Format("Params are:\n id: {0}\ncontainer: {8}\ndirectory:{9}\ncategory: {1}\nextensions: {10}\ndomain: {11}\nexec: {2}\nargs: {3}\ntimeout: {4}\nmemlimit: {5}\noutlimit: {6}\nerrlimit: {7}", experimentId, benchmarkCategory, executable, arguments, timeout, memoryLimit, outputLimit, errorLimit, benchmarkContainerUri, benchmarkDirectory, extensionsString, domainString)); string jobId = Environment.GetEnvironmentVariable(JobIdEnvVariableName); var secretStorage = new SecretStorage(Settings.Default.AADApplicationId, Settings.Default.AADApplicationCertThumbprint, Settings.Default.KeyVaultUrl); BatchConnectionString credentials = new BatchConnectionString(await secretStorage.GetSecret(Settings.Default.ConnectionStringSecretId)); Console.WriteLine("Retrieved credentials."); var batchCred = new BatchSharedKeyCredentials(credentials.BatchURL, credentials.BatchAccountName, credentials.BatchAccessKey); var storage = new AzureExperimentStorage(credentials.WithoutBatchData().ToString()); var expInfo = await storage.GetExperiment(experimentId); string benchmarkContainerUri = expInfo.BenchmarkContainerUri; // args[1]; string benchmarkDirectory = expInfo.BenchmarkDirectory; // args[2]; string benchmarkCategory = expInfo.Category; // args[3]; string extensionsString = expInfo.BenchmarkFileExtension; //args[4]; string domainString = expInfo.DomainName; // args[5]; string executable = expInfo.Executable; // args[6]; string arguments = expInfo.Parameters; // args[7]; double timeout = expInfo.BenchmarkTimeout; // TimeSpan.FromSeconds(double.Parse(args[8])); double memoryLimit = expInfo.MemoryLimitMB; // 0; // no limit int maxRepetitions = expInfo.AdaptiveRunMaxRepetitions; double maxTime = expInfo.AdaptiveRunMaxTimeInSeconds; //long? outputLimit = null; //long? errorLimit = null; //if (args.Length > 9) //{ // memoryLimit = double.Parse(args[9]); // if (args.Length > 10) // { // outputLimit = args[10] == "null" ? null : (long?)long.Parse(args[10]); // if (args.Length > 11) // { // errorLimit = args[11] == "null" ? null : (long?)long.Parse(args[11]); // } // } //} AzureBenchmarkStorage benchmarkStorage = CreateBenchmarkStorage(benchmarkContainerUri, storage); var queue = await storage.CreateResultsQueue(experimentId); Console.Write("Created queue"); await FetchSavedResults(experimentId, storage); Console.WriteLine("Fetched existing results"); var collectionTask = CollectResults(experimentId, storage); Console.WriteLine("Started collection thread."); Domain domain = ResolveDomain(domainString); SortedSet <string> extensions; if (string.IsNullOrEmpty(extensionsString)) { extensions = new SortedSet <string>(domain.BenchmarkExtensions.Distinct()); } else { extensions = new SortedSet <string>(extensionsString.Split('|').Select(s => s.Trim().TrimStart('.')).Distinct()); } using (BatchClient batchClient = BatchClient.Open(batchCred)) { if (expInfo.TotalBenchmarks <= 0) { //not all experiments started ODATADetailLevel detailLevel = new ODATADetailLevel(); detailLevel.SelectClause = "id,displayName"; Console.WriteLine("Listing existing tasks."); var processedBlobs = new SortedSet <string>(batchClient.JobOperations.ListTasks(jobId, detailLevel) .SelectMany(t => { int id; if (int.TryParse(t.Id, out id)) { // we put benchmark file first return(new string[] { t.DisplayName }); } return(new string[] { }); })); Console.WriteLine("Done!"); BlobContinuationToken continuationToken = null; BlobResultSegment resultSegment = null; List <Task> starterTasks = new List <Task>(); int totalBenchmarks = 0; string benchmarksPath = CombineBlobPath(benchmarkDirectory, benchmarkCategory); string outputQueueUri = storage.GetOutputQueueSASUri(experimentId, TimeSpan.FromHours(48)); string outputContainerUri = storage.GetOutputContainerSASUri(TimeSpan.FromHours(48)); do { resultSegment = await benchmarkStorage.ListBlobsSegmentedAsync(benchmarksPath, continuationToken); Console.WriteLine("Got some blobs"); string[] blobNamesToProcess = resultSegment.Results.SelectMany(item => { var blob = item as CloudBlockBlob; if (blob == null || processedBlobs.Contains(blob.Name)) { return new string[] { } } ; var nameParts = blob.Name.Split('/'); var shortnameParts = nameParts[nameParts.Length - 1].Split('.'); if (shortnameParts.Length == 1 && !extensions.Contains("")) { return new string[] { } } ; var ext = shortnameParts[shortnameParts.Length - 1]; if (!extensions.Contains(ext)) { return new string[] { } } ; return(new string[] { blob.Name }); }).ToArray(); starterTasks.Add(StartTasksForSegment(timeout.ToString(), experimentId, executable, arguments, memoryLimit, domainString, outputQueueUri, outputContainerUri, null, null, jobId, batchClient, blobNamesToProcess, benchmarksPath, totalBenchmarks, benchmarkStorage, maxRepetitions, maxTime)); continuationToken = resultSegment.ContinuationToken; totalBenchmarks += blobNamesToProcess.Length; }while (continuationToken != null); await storage.SetTotalBenchmarks(experimentId, totalBenchmarks); Program.totalBenchmarks = totalBenchmarks; totalBenchmarksToProcess = totalBenchmarks; await Task.WhenAll(starterTasks.ToArray()); Console.WriteLine("Finished starting tasks"); } else { Program.totalBenchmarks = expInfo.TotalBenchmarks; totalBenchmarksToProcess = expInfo.TotalBenchmarks; } MonitorTasksUntilCompletion(experimentId, jobId, collectionTask, batchClient); if (summaryName != null) { Trace.WriteLine(string.Format("Building summary for experiment {0} and summary name {1}...", experimentId, summaryName)); AzureSummaryManager manager = new AzureSummaryManager(credentials.WithoutBatchData().ToString(), MEFDomainResolver.Instance); await AppendSummary(summaryName, experimentId, domain, manager); } else { Trace.WriteLine("No summary requested."); } Console.WriteLine("Closing."); } }
static async Task ManageRetry(string[] args) { int experimentId = int.Parse(args[0], CultureInfo.InvariantCulture); string benchmarkListBlobId = args[1]; string benchmarkContainerUri = null; if (args.Length > 2) { benchmarkContainerUri = args[2]; } string jobId = Environment.GetEnvironmentVariable(JobIdEnvVariableName); var secretStorage = new SecretStorage(Settings.Default.AADApplicationId, Settings.Default.AADApplicationCertThumbprint, Settings.Default.KeyVaultUrl); BatchConnectionString credentials = new BatchConnectionString(await secretStorage.GetSecret(Settings.Default.ConnectionStringSecretId)); Console.WriteLine("Retrieved credentials."); var batchCred = new BatchSharedKeyCredentials(credentials.BatchURL, credentials.BatchAccountName, credentials.BatchAccessKey); var storage = new AzureExperimentStorage(credentials.WithoutBatchData().ToString()); var expInfo = await storage.GetExperiment(experimentId); if (benchmarkContainerUri == null) { if (expInfo.BenchmarkContainerUri != ExperimentDefinition.DefaultContainerUri) { throw new ArgumentException("New URI for non-default benchmark container was not provided."); } else { benchmarkContainerUri = ExperimentDefinition.DefaultContainerUri; } } AzureBenchmarkStorage benchmarkStorage = CreateBenchmarkStorage(benchmarkContainerUri, storage); var queue = await storage.CreateResultsQueue(experimentId); Console.Write("Created queue"); // We can't tell bad results we got during previous runs on the same experiment from bad results // we got during this run when job manager crashed, so we put them all into 'good' list. // 'Fresh' (and, therefore, duplicate) bad results will be removed during deduplication. goodResults = (await storage.GetAzureExperimentResults(experimentId)).Item1.ToList(); Console.WriteLine("Fetched existing results"); Domain domain = ResolveDomain(expInfo.DomainName); string benchmarksPath = CombineBlobPath(expInfo.BenchmarkDirectory, expInfo.Category); var benchmarkListBlob = storage.TempBlobContainer.GetBlockBlobReference(benchmarkListBlobId); string[] benchmarkList = (await benchmarkListBlob.DownloadTextAsync()).Split('\n') .SelectMany(s => { s = s.Trim(); if (string.IsNullOrEmpty(s)) { return new string[] { } } ; else { return new string[] { benchmarksPath + s } }; }).ToArray(); totalBenchmarksToProcess = benchmarkList.Length; totalBenchmarks = expInfo.TotalBenchmarks; Console.WriteLine("Retrieved list of benchmarks to re-process. Total: {0}.", totalBenchmarksToProcess); var collectionTask = CollectResults(experimentId, storage); Console.WriteLine("Started collection thread."); using (BatchClient batchClient = BatchClient.Open(batchCred)) { //not all experiments started ODATADetailLevel detailLevel = new ODATADetailLevel(); detailLevel.SelectClause = "id,displayName"; Console.WriteLine("Listing existing tasks."); var processedBlobs = new SortedSet <string>(batchClient.JobOperations.ListTasks(jobId, detailLevel) .SelectMany(t => { int id; if (int.TryParse(t.Id, out id)) { // we put benchmark file first return(new string[] { t.DisplayName }); } return(new string[] { }); })); Console.WriteLine("Done!"); string outputQueueUri = storage.GetOutputQueueSASUri(experimentId, TimeSpan.FromHours(48)); string outputContainerUri = storage.GetOutputContainerSASUri(TimeSpan.FromHours(48)); string[] blobsToProcess = benchmarkList.Where(b => !processedBlobs.Contains(b)).ToArray(); if (blobsToProcess.Length > 0) { var starterTask = StartTasksForSegment(expInfo.BenchmarkTimeout.ToString(), experimentId, expInfo.Executable, expInfo.Parameters, expInfo.MemoryLimitMB, expInfo.DomainName, outputQueueUri, outputContainerUri, null, null, jobId, batchClient, blobsToProcess, benchmarksPath, 0, benchmarkStorage, expInfo.AdaptiveRunMaxRepetitions, expInfo.AdaptiveRunMaxTimeInSeconds); await starterTask; Console.WriteLine("Finished starting tasks"); } MonitorTasksUntilCompletion(experimentId, jobId, collectionTask, batchClient); } Console.WriteLine("Deleting blob with benchmark list."); await benchmarkListBlob.DeleteIfExistsAsync(); Console.WriteLine("Closing."); }
static async Task ManageTasks(string[] args) { int experimentId = int.Parse(args[0], CultureInfo.InvariantCulture); string summaryName = null; if (args.Length > 1) { summaryName = args[1]; } //Console.WriteLine(String.Format("Params are:\n id: {0}\ncontainer: {8}\ndirectory:{9}\ncategory: {1}\nextensions: {10}\ndomain: {11}\nexec: {2}\nargs: {3}\ntimeout: {4}\nmemlimit: {5}\noutlimit: {6}\nerrlimit: {7}", experimentId, benchmarkCategory, executable, arguments, timeout, memoryLimit, outputLimit, errorLimit, benchmarkContainerUri, benchmarkDirectory, extensionsString, domainString)); #if DEBUG string jobId = "cz3_exp8535"; #else string jobId = Environment.GetEnvironmentVariable(JobIdEnvVariableName); #endif Console.WriteLine("Retrieving credentials..."); var secretStorage = new SecretStorage(Settings.Default.AADApplicationId, Settings.Default.AADApplicationCertThumbprint, Settings.Default.KeyVaultUrl); BatchConnectionString credentials = new BatchConnectionString(await secretStorage.GetSecret(Settings.Default.ConnectionStringSecretId)); var batchCred = new BatchSharedKeyCredentials(credentials.BatchURL, credentials.BatchAccountName, credentials.BatchAccessKey); var storage = new AzureExperimentStorage(credentials.WithoutBatchData().ToString()); var expInfo = await storage.GetExperiment(experimentId); string benchmarkContainerUri = expInfo.BenchmarkContainerUri; // args[1]; string benchmarkDirectory = expInfo.BenchmarkDirectory; // args[2]; string benchmarkCategory = expInfo.Category; // args[3]; string extensionsString = expInfo.BenchmarkFileExtension; //args[4]; string domainString = expInfo.DomainName; // args[5]; string executable = expInfo.Executable; // args[6]; string arguments = expInfo.Parameters; // args[7]; double timeout = expInfo.BenchmarkTimeout; // TimeSpan.FromSeconds(double.Parse(args[8])); double memoryLimit = expInfo.MemoryLimitMB; // 0; // no limit int maxRepetitions = expInfo.AdaptiveRunMaxRepetitions; double maxTime = expInfo.AdaptiveRunMaxTimeInSeconds; long?outputLimit = 1 * (1024 * 1024); // 1 MB long?errorLimit = 256 * 1024; // 256 KB AzureBenchmarkStorage benchmarkStorage = CreateBenchmarkStorage(benchmarkContainerUri, storage); var queue = await storage.CreateResultsQueue(experimentId); DateTime before = DateTime.Now; Console.Write("Fetching existing results..."); await FetchSavedResults(experimentId, storage); Domain domain = ResolveDomain(domainString); HashSet <string> extensions; if (string.IsNullOrEmpty(extensionsString)) { extensions = new HashSet <string>(domain.BenchmarkExtensions.Distinct()); } else { extensions = new HashSet <string>(extensionsString.Split('|').Select(s => s.Trim().TrimStart('.')).Distinct()); } using (BatchClient batchClient = BatchClient.Open(batchCred)) { // Exclude benchmarks that finished correctly var processedBlobs = new HashSet <string>(); string prefix = (benchmarkDirectory.Trim('/') + "/" + benchmarkCategory.Trim('/')).Trim('/'); foreach (var r in goodResults.Select(g => prefix + "/" + g.BenchmarkFileName)) { processedBlobs.Add(r.Trim()); } Console.WriteLine(" took {0}.", (DateTime.Now - before)); // Exclude those that are still in progress ODATADetailLevel detailLevel = new ODATADetailLevel(); detailLevel.FilterClause = "(state eq 'active') or (state eq 'running') or (state eq 'preparing')"; detailLevel.SelectClause = "id,displayName"; CloudJob old_job = null; try { old_job = batchClient.JobOperations.GetJob(jobId); } catch { /* OK */ } if (old_job != null) { before = DateTime.Now; Console.Write("Listing existing tasks..."); var ts = batchClient.JobOperations.ListTasks(jobId, detailLevel); foreach (CloudTask t in ts) { int id; if (int.TryParse(t.Id, out id)) { string n = t.DisplayName.Trim(); if (!processedBlobs.Contains(n)) { processedBlobs.Add(n); } } } ; Console.WriteLine(" took {0}.", (DateTime.Now - before)); // Create new job if the old one is already sealed off switch (old_job.State) { case Microsoft.Azure.Batch.Common.JobState.Completed: case Microsoft.Azure.Batch.Common.JobState.Deleting: case Microsoft.Azure.Batch.Common.JobState.Disabled: case Microsoft.Azure.Batch.Common.JobState.Disabling: case Microsoft.Azure.Batch.Common.JobState.Terminating: { before = DateTime.Now; Console.Write("Creating fresh job..."); PoolInformation pool_info = old_job.PoolInformation; string new_jid; int cnt = 1; bool have_jid = false; do { new_jid = String.Format("{0}-{1}", jobId, cnt++); try { CloudJob new_job = batchClient.JobOperations.CreateJob(new_jid, pool_info); new_job.OnAllTasksComplete = Microsoft.Azure.Batch.Common.OnAllTasksComplete.NoAction; new_job.OnTaskFailure = old_job.OnTaskFailure; new_job.Constraints = old_job.Constraints; new_job.DisplayName = old_job.DisplayName; new_job.Commit(); have_jid = true; } catch (Microsoft.Azure.Batch.Common.BatchException) { Console.Write("."); } }while (!have_jid); jobId = new_jid; Console.WriteLine(" took {0}.", (DateTime.Now - before)); break; } } } BlobContinuationToken continuationToken = null; BlobResultSegment resultSegment = null; before = DateTime.Now; Console.Write("Adding tasks..."); List <Task> starterTasks = new List <Task>(); int benchmarksTotal = processedBlobs.Count(); string benchmarksPath = CombineBlobPath(benchmarkDirectory, benchmarkCategory); string outputQueueUri = storage.GetOutputQueueSASUri(experimentId, TimeSpan.FromHours(48)); string outputContainerUri = storage.GetOutputContainerSASUri(TimeSpan.FromHours(48)); do { resultSegment = await benchmarkStorage.ListBlobsSegmentedAsync(benchmarksPath, continuationToken); string[] blobNamesToProcess = resultSegment.Results.SelectMany(item => { var blob = item as CloudBlockBlob; if (blob == null || processedBlobs.Contains(blob.Name)) { return new string[] { } } ; var nameParts = blob.Name.Split('/'); var shortnameParts = nameParts[nameParts.Length - 1].Split('.'); if (shortnameParts.Length == 1 && !extensions.Contains("")) { return new string[] { } } ; var ext = shortnameParts[shortnameParts.Length - 1]; if (!extensions.Contains(ext)) { return new string[] { } } ; return(new string[] { blob.Name }); }).ToArray(); starterTasks.Add(StartTasksForSegment(timeout.ToString(), experimentId, executable, arguments, memoryLimit, domainString, outputQueueUri, outputContainerUri, outputLimit, errorLimit, jobId, batchClient, blobNamesToProcess, benchmarksPath, benchmarksTotal, benchmarkStorage, maxRepetitions, maxTime)); continuationToken = resultSegment.ContinuationToken; benchmarksTotal += blobNamesToProcess.Length; }while (continuationToken != null); await storage.SetBenchmarksTotal(experimentId, benchmarksTotal); Program.benchmarksTotal = benchmarksTotal; benchmarksToProcess = benchmarksTotal - goodResults.Count; Console.WriteLine(" took {0}.", (DateTime.Now - before)); before = DateTime.Now; Console.Write("Waiting for tasks to start..."); await Task.WhenAll(starterTasks.ToArray()); Console.WriteLine(" took {0}.", (DateTime.Now - before)); CloudJob j = batchClient.JobOperations.GetJob(jobId); j.OnAllTasksComplete = Microsoft.Azure.Batch.Common.OnAllTasksComplete.TerminateJob; j.CommitChanges(); before = DateTime.Now; Console.Write("Waiting for results..."); var collectionTask = CollectResults(experimentId, storage); Console.WriteLine(" took {0}.", (DateTime.Now - before)); MonitorTasksUntilCompletion(experimentId, jobId, collectionTask, batchClient, domain); if (summaryName != null && expInfo.Creator == "Nightly") { Trace.WriteLine(string.Format("Building summary for experiment {0} and summary name {1}...", experimentId, summaryName)); AzureSummaryManager manager = new AzureSummaryManager(credentials.WithoutBatchData().ToString(), MEFDomainResolver.Instance); await AppendSummaryAndSendReport(summaryName, experimentId, domain, manager); } else { Trace.WriteLine("No summary requested."); } try { int?amc = storage.GetResultsQueueReference(experimentId).ApproximateMessageCount; if (amc.HasValue && amc.Value == 0) { switch (batchClient.JobOperations.GetJob(jobId).State) { case Microsoft.Azure.Batch.Common.JobState.Completed: case Microsoft.Azure.Batch.Common.JobState.Disabled: Console.WriteLine("Deleting Batch job and results queue."); await batchClient.JobOperations.DeleteJobAsync(jobId); await storage.DeleteResultsQueue(experimentId); break; } } } catch { /* OK */ } Console.WriteLine("Closing."); } }