示例#1
0
        static async Task CollectResults(int experimentId, AzureExperimentStorage storage)
        {
            Console.WriteLine("Started collection.");
            var queue = storage.GetResultsQueueReference(experimentId);
            List <AzureBenchmarkResult> results = new List <AzureBenchmarkResult>(); // (await storage.GetAzureExperimentResults(experimentId)).ToList();
            int processedBenchmarks             = 0;                                 // goodResults.Count + badResults.Count;// results.Count;

            var  formatter = new BinaryFormatter();
            bool completed = false;

            do
            {
                completed = totalBenchmarksToProcess != -1 && completedTasksCount >= totalBenchmarksToProcess;
                var messages     = queue.GetMessages(32, TimeSpan.FromMinutes(5));
                int messageCount = messages.Count();
                completed = completed && messageCount == 0;
                foreach (CloudQueueMessage message in messages)
                {
                    using (var ms = new MemoryStream(message.AsBytes))
                    {
                        goodResults.Add((AzureBenchmarkResult)formatter.Deserialize(ms));
                    }
                }
                int oldCount = results.Count;
                results = goodResults.Concat(badResults).ToList();
                var tuple = SortCountUniqueNamesAndRemoveExactDuplicates(results);
                processedBenchmarks = tuple.Item1;
                results             = tuple.Item2;
                await storage.PutAzureExperimentResults(experimentId, results.ToArray(), AzureExperimentStorage.UploadBlobMode.CreateOrReplace);

                int completedBenchmarks = totalBenchmarks == -1 ? processedBenchmarks : totalBenchmarks - totalBenchmarksToProcess + completedTasksCount;
                await storage.SetCompletedBenchmarks(experimentId, completedBenchmarks);

                Console.WriteLine("Setting completed benchmarks to {0}.\nTotal benchmarks: {1}\nProcessed benchmarks: {2}\nTotal to process: {3}\nCompleted tasks: {4}\nMessage count: {5}", completedBenchmarks, totalBenchmarks, processedBenchmarks, totalBenchmarksToProcess, completedTasksCount, messageCount);
                foreach (CloudQueueMessage message in messages)
                {
                    queue.DeleteMessage(message);
                }
                if (oldCount == results.Count)
                {
                    Thread.Sleep(500);
                }
            }while (!completed);
            await storage.DeleteResultsQueue(experimentId);

            var totalRuntime = results.Sum(r => r.NormalizedRuntime);
            await storage.SetTotalRuntime(experimentId, totalRuntime);

            Console.WriteLine("Collected all results.");
        }
示例#2
0
        static async Task ManageTasks(string[] args)
        {
            int    experimentId = int.Parse(args[0], CultureInfo.InvariantCulture);
            string summaryName  = null;

            if (args.Length > 1)
            {
                summaryName = args[1];
            }
            //Console.WriteLine(String.Format("Params are:\n id: {0}\ncontainer: {8}\ndirectory:{9}\ncategory: {1}\nextensions: {10}\ndomain: {11}\nexec: {2}\nargs: {3}\ntimeout: {4}\nmemlimit: {5}\noutlimit: {6}\nerrlimit: {7}", experimentId, benchmarkCategory, executable, arguments, timeout, memoryLimit, outputLimit, errorLimit, benchmarkContainerUri, benchmarkDirectory, extensionsString, domainString));
#if DEBUG
            string jobId = "cz3_exp8535";
#else
            string jobId = Environment.GetEnvironmentVariable(JobIdEnvVariableName);
#endif
            Console.WriteLine("Retrieving credentials...");
            var secretStorage = new SecretStorage(Settings.Default.AADApplicationId, Settings.Default.AADApplicationCertThumbprint, Settings.Default.KeyVaultUrl);
            BatchConnectionString credentials = new BatchConnectionString(await secretStorage.GetSecret(Settings.Default.ConnectionStringSecretId));

            var batchCred = new BatchSharedKeyCredentials(credentials.BatchURL, credentials.BatchAccountName, credentials.BatchAccessKey);
            var storage   = new AzureExperimentStorage(credentials.WithoutBatchData().ToString());

            var expInfo = await storage.GetExperiment(experimentId);

            string benchmarkContainerUri = expInfo.BenchmarkContainerUri;  // args[1];
            string benchmarkDirectory    = expInfo.BenchmarkDirectory;     // args[2];
            string benchmarkCategory     = expInfo.Category;               // args[3];
            string extensionsString      = expInfo.BenchmarkFileExtension; //args[4];
            string domainString          = expInfo.DomainName;             // args[5];
            string executable            = expInfo.Executable;             // args[6];
            string arguments             = expInfo.Parameters;             // args[7];
            double timeout        = expInfo.BenchmarkTimeout;              // TimeSpan.FromSeconds(double.Parse(args[8]));
            double memoryLimit    = expInfo.MemoryLimitMB;                 // 0; // no limit
            int    maxRepetitions = expInfo.AdaptiveRunMaxRepetitions;
            double maxTime        = expInfo.AdaptiveRunMaxTimeInSeconds;

            long?outputLimit = 1 * (1024 * 1024); // 1 MB
            long?errorLimit  = 256 * 1024;        // 256 KB

            AzureBenchmarkStorage benchmarkStorage = CreateBenchmarkStorage(benchmarkContainerUri, storage);

            var queue = await storage.CreateResultsQueue(experimentId);

            DateTime before = DateTime.Now;
            Console.Write("Fetching existing results...");
            await FetchSavedResults(experimentId, storage);

            Domain           domain = ResolveDomain(domainString);
            HashSet <string> extensions;
            if (string.IsNullOrEmpty(extensionsString))
            {
                extensions = new HashSet <string>(domain.BenchmarkExtensions.Distinct());
            }
            else
            {
                extensions = new HashSet <string>(extensionsString.Split('|').Select(s => s.Trim().TrimStart('.')).Distinct());
            }

            using (BatchClient batchClient = BatchClient.Open(batchCred))
            {
                // Exclude benchmarks that finished correctly
                var    processedBlobs = new HashSet <string>();
                string prefix         = (benchmarkDirectory.Trim('/') + "/" + benchmarkCategory.Trim('/')).Trim('/');
                foreach (var r in goodResults.Select(g => prefix + "/" + g.BenchmarkFileName))
                {
                    processedBlobs.Add(r.Trim());
                }
                Console.WriteLine(" took {0}.", (DateTime.Now - before));

                // Exclude those that are still in progress
                ODATADetailLevel detailLevel = new ODATADetailLevel();
                detailLevel.FilterClause = "(state eq 'active') or (state eq 'running') or (state eq 'preparing')";
                detailLevel.SelectClause = "id,displayName";

                CloudJob old_job = null;
                try { old_job = batchClient.JobOperations.GetJob(jobId); } catch { /* OK */ }

                if (old_job != null)
                {
                    before = DateTime.Now;
                    Console.Write("Listing existing tasks...");
                    var ts = batchClient.JobOperations.ListTasks(jobId, detailLevel);
                    foreach (CloudTask t in ts)
                    {
                        int id;

                        if (int.TryParse(t.Id, out id))
                        {
                            string n = t.DisplayName.Trim();
                            if (!processedBlobs.Contains(n))
                            {
                                processedBlobs.Add(n);
                            }
                        }
                    }
                    ;
                    Console.WriteLine(" took {0}.", (DateTime.Now - before));

                    // Create new job if the old one is already sealed off
                    switch (old_job.State)
                    {
                    case Microsoft.Azure.Batch.Common.JobState.Completed:
                    case Microsoft.Azure.Batch.Common.JobState.Deleting:
                    case Microsoft.Azure.Batch.Common.JobState.Disabled:
                    case Microsoft.Azure.Batch.Common.JobState.Disabling:
                    case Microsoft.Azure.Batch.Common.JobState.Terminating:
                    {
                        before = DateTime.Now;
                        Console.Write("Creating fresh job...");
                        PoolInformation pool_info = old_job.PoolInformation;
                        string          new_jid;
                        int             cnt      = 1;
                        bool            have_jid = false;
                        do
                        {
                            new_jid = String.Format("{0}-{1}", jobId, cnt++);
                            try
                            {
                                CloudJob new_job = batchClient.JobOperations.CreateJob(new_jid, pool_info);
                                new_job.OnAllTasksComplete = Microsoft.Azure.Batch.Common.OnAllTasksComplete.NoAction;
                                new_job.OnTaskFailure      = old_job.OnTaskFailure;
                                new_job.Constraints        = old_job.Constraints;
                                new_job.DisplayName        = old_job.DisplayName;
                                new_job.Commit();
                                have_jid = true;
                            }
                            catch (Microsoft.Azure.Batch.Common.BatchException)
                            {
                                Console.Write(".");
                            }
                        }while (!have_jid);
                        jobId = new_jid;
                        Console.WriteLine(" took {0}.", (DateTime.Now - before));
                        break;
                    }
                    }
                }

                BlobContinuationToken continuationToken = null;
                BlobResultSegment     resultSegment     = null;

                before = DateTime.Now;
                Console.Write("Adding tasks...");
                List <Task> starterTasks       = new List <Task>();
                int         benchmarksTotal    = processedBlobs.Count();
                string      benchmarksPath     = CombineBlobPath(benchmarkDirectory, benchmarkCategory);
                string      outputQueueUri     = storage.GetOutputQueueSASUri(experimentId, TimeSpan.FromHours(48));
                string      outputContainerUri = storage.GetOutputContainerSASUri(TimeSpan.FromHours(48));
                do
                {
                    resultSegment = await benchmarkStorage.ListBlobsSegmentedAsync(benchmarksPath, continuationToken);

                    string[] blobNamesToProcess = resultSegment.Results.SelectMany(item =>
                    {
                        var blob = item as CloudBlockBlob;
                        if (blob == null || processedBlobs.Contains(blob.Name))
                        {
                            return new string[] { }
                        }
                        ;

                        var nameParts      = blob.Name.Split('/');
                        var shortnameParts = nameParts[nameParts.Length - 1].Split('.');
                        if (shortnameParts.Length == 1 && !extensions.Contains(""))
                        {
                            return new string[] { }
                        }
                        ;
                        var ext = shortnameParts[shortnameParts.Length - 1];
                        if (!extensions.Contains(ext))
                        {
                            return new string[] { }
                        }
                        ;

                        return(new string[] { blob.Name });
                    }).ToArray();
                    starterTasks.Add(StartTasksForSegment(timeout.ToString(), experimentId, executable, arguments, memoryLimit, domainString, outputQueueUri, outputContainerUri, outputLimit, errorLimit, jobId, batchClient, blobNamesToProcess, benchmarksPath, benchmarksTotal, benchmarkStorage, maxRepetitions, maxTime));

                    continuationToken = resultSegment.ContinuationToken;
                    benchmarksTotal  += blobNamesToProcess.Length;
                }while (continuationToken != null);

                await storage.SetBenchmarksTotal(experimentId, benchmarksTotal);

                Program.benchmarksTotal = benchmarksTotal;
                benchmarksToProcess     = benchmarksTotal - goodResults.Count;
                Console.WriteLine(" took {0}.", (DateTime.Now - before));

                before = DateTime.Now;
                Console.Write("Waiting for tasks to start...");
                await Task.WhenAll(starterTasks.ToArray());

                Console.WriteLine(" took {0}.", (DateTime.Now - before));

                CloudJob j = batchClient.JobOperations.GetJob(jobId);
                j.OnAllTasksComplete = Microsoft.Azure.Batch.Common.OnAllTasksComplete.TerminateJob;
                j.CommitChanges();

                before = DateTime.Now;
                Console.Write("Waiting for results...");
                var collectionTask = CollectResults(experimentId, storage);
                Console.WriteLine(" took {0}.", (DateTime.Now - before));

                MonitorTasksUntilCompletion(experimentId, jobId, collectionTask, batchClient, domain);

                if (summaryName != null && expInfo.Creator == "Nightly")
                {
                    Trace.WriteLine(string.Format("Building summary for experiment {0} and summary name {1}...", experimentId, summaryName));
                    AzureSummaryManager manager = new AzureSummaryManager(credentials.WithoutBatchData().ToString(), MEFDomainResolver.Instance);
                    await AppendSummaryAndSendReport(summaryName, experimentId, domain, manager);
                }
                else
                {
                    Trace.WriteLine("No summary requested.");
                }

                try
                {
                    int?amc = storage.GetResultsQueueReference(experimentId).ApproximateMessageCount;

                    if (amc.HasValue && amc.Value == 0)
                    {
                        switch (batchClient.JobOperations.GetJob(jobId).State)
                        {
                        case Microsoft.Azure.Batch.Common.JobState.Completed:
                        case Microsoft.Azure.Batch.Common.JobState.Disabled:
                            Console.WriteLine("Deleting Batch job and results queue.");
                            await batchClient.JobOperations.DeleteJobAsync(jobId);

                            await storage.DeleteResultsQueue(experimentId);

                            break;
                        }
                    }
                }
                catch { /* OK */ }

                Console.WriteLine("Closing.");
            }
        }