예제 #1
0
        static async Task ManageTasks(string[] args)
        {
            int    experimentId = int.Parse(args[0], CultureInfo.InvariantCulture);
            string summaryName  = null;

            if (args.Length > 1)
            {
                summaryName = args[1];
            }
            //Console.WriteLine(String.Format("Params are:\n id: {0}\ncontainer: {8}\ndirectory:{9}\ncategory: {1}\nextensions: {10}\ndomain: {11}\nexec: {2}\nargs: {3}\ntimeout: {4}\nmemlimit: {5}\noutlimit: {6}\nerrlimit: {7}", experimentId, benchmarkCategory, executable, arguments, timeout, memoryLimit, outputLimit, errorLimit, benchmarkContainerUri, benchmarkDirectory, extensionsString, domainString));

            string jobId = Environment.GetEnvironmentVariable(JobIdEnvVariableName);

            var secretStorage = new SecretStorage(Settings.Default.AADApplicationId, Settings.Default.AADApplicationCertThumbprint, Settings.Default.KeyVaultUrl);
            BatchConnectionString credentials = new BatchConnectionString(await secretStorage.GetSecret(Settings.Default.ConnectionStringSecretId));

            Console.WriteLine("Retrieved credentials.");


            var batchCred = new BatchSharedKeyCredentials(credentials.BatchURL, credentials.BatchAccountName, credentials.BatchAccessKey);

            var storage = new AzureExperimentStorage(credentials.WithoutBatchData().ToString());

            var expInfo = await storage.GetExperiment(experimentId);

            string benchmarkContainerUri = expInfo.BenchmarkContainerUri;  // args[1];
            string benchmarkDirectory    = expInfo.BenchmarkDirectory;     // args[2];
            string benchmarkCategory     = expInfo.Category;               // args[3];
            string extensionsString      = expInfo.BenchmarkFileExtension; //args[4];
            string domainString          = expInfo.DomainName;             // args[5];
            string executable            = expInfo.Executable;             // args[6];
            string arguments             = expInfo.Parameters;             // args[7];
            double timeout        = expInfo.BenchmarkTimeout;              // TimeSpan.FromSeconds(double.Parse(args[8]));
            double memoryLimit    = expInfo.MemoryLimitMB;                 // 0; // no limit
            int    maxRepetitions = expInfo.AdaptiveRunMaxRepetitions;
            double maxTime        = expInfo.AdaptiveRunMaxTimeInSeconds;

            //long? outputLimit = null;
            //long? errorLimit = null;
            //if (args.Length > 9)
            //{
            //    memoryLimit = double.Parse(args[9]);
            //    if (args.Length > 10)
            //    {
            //        outputLimit = args[10] == "null" ? null : (long?)long.Parse(args[10]);
            //        if (args.Length > 11)
            //        {
            //            errorLimit = args[11] == "null" ? null : (long?)long.Parse(args[11]);
            //        }
            //    }
            //}

            AzureBenchmarkStorage benchmarkStorage = CreateBenchmarkStorage(benchmarkContainerUri, storage);


            var queue = await storage.CreateResultsQueue(experimentId);

            Console.Write("Created queue");

            await FetchSavedResults(experimentId, storage);

            Console.WriteLine("Fetched existing results");
            var collectionTask = CollectResults(experimentId, storage);

            Console.WriteLine("Started collection thread.");
            Domain             domain = ResolveDomain(domainString);
            SortedSet <string> extensions;

            if (string.IsNullOrEmpty(extensionsString))
            {
                extensions = new SortedSet <string>(domain.BenchmarkExtensions.Distinct());
            }
            else
            {
                extensions = new SortedSet <string>(extensionsString.Split('|').Select(s => s.Trim().TrimStart('.')).Distinct());
            }

            using (BatchClient batchClient = BatchClient.Open(batchCred))
            {
                if (expInfo.TotalBenchmarks <= 0)
                {
                    //not all experiments started
                    ODATADetailLevel detailLevel = new ODATADetailLevel();
                    detailLevel.SelectClause = "id,displayName";

                    Console.WriteLine("Listing existing tasks.");
                    var processedBlobs = new SortedSet <string>(batchClient.JobOperations.ListTasks(jobId, detailLevel)
                                                                .SelectMany(t =>
                    {
                        int id;
                        if (int.TryParse(t.Id, out id))
                        {
                            // we put benchmark file first
                            return(new string[] { t.DisplayName });
                        }
                        return(new string[] { });
                    }));
                    Console.WriteLine("Done!");

                    BlobContinuationToken continuationToken = null;
                    BlobResultSegment     resultSegment     = null;

                    List <Task> starterTasks       = new List <Task>();
                    int         totalBenchmarks    = 0;
                    string      benchmarksPath     = CombineBlobPath(benchmarkDirectory, benchmarkCategory);
                    string      outputQueueUri     = storage.GetOutputQueueSASUri(experimentId, TimeSpan.FromHours(48));
                    string      outputContainerUri = storage.GetOutputContainerSASUri(TimeSpan.FromHours(48));
                    do
                    {
                        resultSegment = await benchmarkStorage.ListBlobsSegmentedAsync(benchmarksPath, continuationToken);

                        Console.WriteLine("Got some blobs");
                        string[] blobNamesToProcess = resultSegment.Results.SelectMany(item =>
                        {
                            var blob = item as CloudBlockBlob;
                            if (blob == null || processedBlobs.Contains(blob.Name))
                            {
                                return new string[] { }
                            }
                            ;

                            var nameParts      = blob.Name.Split('/');
                            var shortnameParts = nameParts[nameParts.Length - 1].Split('.');
                            if (shortnameParts.Length == 1 && !extensions.Contains(""))
                            {
                                return new string[] { }
                            }
                            ;
                            var ext = shortnameParts[shortnameParts.Length - 1];
                            if (!extensions.Contains(ext))
                            {
                                return new string[] { }
                            }
                            ;

                            return(new string[] { blob.Name });
                        }).ToArray();
                        starterTasks.Add(StartTasksForSegment(timeout.ToString(), experimentId, executable, arguments, memoryLimit, domainString, outputQueueUri, outputContainerUri, null, null, jobId, batchClient, blobNamesToProcess, benchmarksPath, totalBenchmarks, benchmarkStorage, maxRepetitions, maxTime));

                        continuationToken = resultSegment.ContinuationToken;
                        totalBenchmarks  += blobNamesToProcess.Length;
                    }while (continuationToken != null);

                    await storage.SetTotalBenchmarks(experimentId, totalBenchmarks);

                    Program.totalBenchmarks  = totalBenchmarks;
                    totalBenchmarksToProcess = totalBenchmarks;

                    await Task.WhenAll(starterTasks.ToArray());

                    Console.WriteLine("Finished starting tasks");
                }
                else
                {
                    Program.totalBenchmarks  = expInfo.TotalBenchmarks;
                    totalBenchmarksToProcess = expInfo.TotalBenchmarks;
                }

                MonitorTasksUntilCompletion(experimentId, jobId, collectionTask, batchClient);

                if (summaryName != null)
                {
                    Trace.WriteLine(string.Format("Building summary for experiment {0} and summary name {1}...", experimentId, summaryName));
                    AzureSummaryManager manager = new AzureSummaryManager(credentials.WithoutBatchData().ToString(), MEFDomainResolver.Instance);
                    await AppendSummary(summaryName, experimentId, domain, manager);
                }
                else
                {
                    Trace.WriteLine("No summary requested.");
                }
                Console.WriteLine("Closing.");
            }
        }
예제 #2
0
        static async Task ManageRetry(string[] args)
        {
            int    experimentId          = int.Parse(args[0], CultureInfo.InvariantCulture);
            string benchmarkListBlobId   = args[1];
            string benchmarkContainerUri = null;

            if (args.Length > 2)
            {
                benchmarkContainerUri = args[2];
            }

            string jobId = Environment.GetEnvironmentVariable(JobIdEnvVariableName);

            var secretStorage = new SecretStorage(Settings.Default.AADApplicationId, Settings.Default.AADApplicationCertThumbprint, Settings.Default.KeyVaultUrl);
            BatchConnectionString credentials = new BatchConnectionString(await secretStorage.GetSecret(Settings.Default.ConnectionStringSecretId));

            Console.WriteLine("Retrieved credentials.");


            var batchCred = new BatchSharedKeyCredentials(credentials.BatchURL, credentials.BatchAccountName, credentials.BatchAccessKey);

            var storage = new AzureExperimentStorage(credentials.WithoutBatchData().ToString());

            var expInfo = await storage.GetExperiment(experimentId);

            if (benchmarkContainerUri == null)
            {
                if (expInfo.BenchmarkContainerUri != ExperimentDefinition.DefaultContainerUri)
                {
                    throw new ArgumentException("New URI for non-default benchmark container was not provided.");
                }
                else
                {
                    benchmarkContainerUri = ExperimentDefinition.DefaultContainerUri;
                }
            }

            AzureBenchmarkStorage benchmarkStorage = CreateBenchmarkStorage(benchmarkContainerUri, storage);

            var queue = await storage.CreateResultsQueue(experimentId);

            Console.Write("Created queue");

            // We can't tell bad results we got during previous runs on the same experiment from bad results
            // we got during this run when job manager crashed, so we put them all into 'good' list.
            // 'Fresh' (and, therefore, duplicate) bad results will be removed during deduplication.
            goodResults = (await storage.GetAzureExperimentResults(experimentId)).Item1.ToList();
            Console.WriteLine("Fetched existing results");
            Domain domain = ResolveDomain(expInfo.DomainName);


            string benchmarksPath    = CombineBlobPath(expInfo.BenchmarkDirectory, expInfo.Category);
            var    benchmarkListBlob = storage.TempBlobContainer.GetBlockBlobReference(benchmarkListBlobId);

            string[] benchmarkList = (await benchmarkListBlob.DownloadTextAsync()).Split('\n')
                                     .SelectMany(s =>
            {
                s = s.Trim();
                if (string.IsNullOrEmpty(s))
                {
                    return new string[] { }
                }
                ;
                else
                {
                    return new string[] { benchmarksPath + s }
                };
            }).ToArray();
            totalBenchmarksToProcess = benchmarkList.Length;
            totalBenchmarks          = expInfo.TotalBenchmarks;
            Console.WriteLine("Retrieved list of benchmarks to re-process. Total: {0}.", totalBenchmarksToProcess);
            var collectionTask = CollectResults(experimentId, storage);

            Console.WriteLine("Started collection thread.");

            using (BatchClient batchClient = BatchClient.Open(batchCred))
            {
                //not all experiments started
                ODATADetailLevel detailLevel = new ODATADetailLevel();
                detailLevel.SelectClause = "id,displayName";

                Console.WriteLine("Listing existing tasks.");
                var processedBlobs = new SortedSet <string>(batchClient.JobOperations.ListTasks(jobId, detailLevel)
                                                            .SelectMany(t =>
                {
                    int id;
                    if (int.TryParse(t.Id, out id))
                    {
                        // we put benchmark file first
                        return(new string[] { t.DisplayName });
                    }
                    return(new string[] { });
                }));
                Console.WriteLine("Done!");

                string   outputQueueUri     = storage.GetOutputQueueSASUri(experimentId, TimeSpan.FromHours(48));
                string   outputContainerUri = storage.GetOutputContainerSASUri(TimeSpan.FromHours(48));
                string[] blobsToProcess     = benchmarkList.Where(b => !processedBlobs.Contains(b)).ToArray();

                if (blobsToProcess.Length > 0)
                {
                    var starterTask = StartTasksForSegment(expInfo.BenchmarkTimeout.ToString(), experimentId, expInfo.Executable, expInfo.Parameters, expInfo.MemoryLimitMB, expInfo.DomainName, outputQueueUri, outputContainerUri, null, null, jobId, batchClient, blobsToProcess, benchmarksPath, 0, benchmarkStorage, expInfo.AdaptiveRunMaxRepetitions, expInfo.AdaptiveRunMaxTimeInSeconds);

                    await starterTask;
                    Console.WriteLine("Finished starting tasks");
                }

                MonitorTasksUntilCompletion(experimentId, jobId, collectionTask, batchClient);
            }

            Console.WriteLine("Deleting blob with benchmark list.");
            await benchmarkListBlob.DeleteIfExistsAsync();

            Console.WriteLine("Closing.");
        }
예제 #3
0
        static async Task ManageTasks(string[] args)
        {
            int    experimentId = int.Parse(args[0], CultureInfo.InvariantCulture);
            string summaryName  = null;

            if (args.Length > 1)
            {
                summaryName = args[1];
            }
            //Console.WriteLine(String.Format("Params are:\n id: {0}\ncontainer: {8}\ndirectory:{9}\ncategory: {1}\nextensions: {10}\ndomain: {11}\nexec: {2}\nargs: {3}\ntimeout: {4}\nmemlimit: {5}\noutlimit: {6}\nerrlimit: {7}", experimentId, benchmarkCategory, executable, arguments, timeout, memoryLimit, outputLimit, errorLimit, benchmarkContainerUri, benchmarkDirectory, extensionsString, domainString));
#if DEBUG
            string jobId = "cz3_exp8535";
#else
            string jobId = Environment.GetEnvironmentVariable(JobIdEnvVariableName);
#endif
            Console.WriteLine("Retrieving credentials...");
            var secretStorage = new SecretStorage(Settings.Default.AADApplicationId, Settings.Default.AADApplicationCertThumbprint, Settings.Default.KeyVaultUrl);
            BatchConnectionString credentials = new BatchConnectionString(await secretStorage.GetSecret(Settings.Default.ConnectionStringSecretId));

            var batchCred = new BatchSharedKeyCredentials(credentials.BatchURL, credentials.BatchAccountName, credentials.BatchAccessKey);
            var storage   = new AzureExperimentStorage(credentials.WithoutBatchData().ToString());

            var expInfo = await storage.GetExperiment(experimentId);

            string benchmarkContainerUri = expInfo.BenchmarkContainerUri;  // args[1];
            string benchmarkDirectory    = expInfo.BenchmarkDirectory;     // args[2];
            string benchmarkCategory     = expInfo.Category;               // args[3];
            string extensionsString      = expInfo.BenchmarkFileExtension; //args[4];
            string domainString          = expInfo.DomainName;             // args[5];
            string executable            = expInfo.Executable;             // args[6];
            string arguments             = expInfo.Parameters;             // args[7];
            double timeout        = expInfo.BenchmarkTimeout;              // TimeSpan.FromSeconds(double.Parse(args[8]));
            double memoryLimit    = expInfo.MemoryLimitMB;                 // 0; // no limit
            int    maxRepetitions = expInfo.AdaptiveRunMaxRepetitions;
            double maxTime        = expInfo.AdaptiveRunMaxTimeInSeconds;

            long?outputLimit = 1 * (1024 * 1024); // 1 MB
            long?errorLimit  = 256 * 1024;        // 256 KB

            AzureBenchmarkStorage benchmarkStorage = CreateBenchmarkStorage(benchmarkContainerUri, storage);

            var queue = await storage.CreateResultsQueue(experimentId);

            DateTime before = DateTime.Now;
            Console.Write("Fetching existing results...");
            await FetchSavedResults(experimentId, storage);

            Domain           domain = ResolveDomain(domainString);
            HashSet <string> extensions;
            if (string.IsNullOrEmpty(extensionsString))
            {
                extensions = new HashSet <string>(domain.BenchmarkExtensions.Distinct());
            }
            else
            {
                extensions = new HashSet <string>(extensionsString.Split('|').Select(s => s.Trim().TrimStart('.')).Distinct());
            }

            using (BatchClient batchClient = BatchClient.Open(batchCred))
            {
                // Exclude benchmarks that finished correctly
                var    processedBlobs = new HashSet <string>();
                string prefix         = (benchmarkDirectory.Trim('/') + "/" + benchmarkCategory.Trim('/')).Trim('/');
                foreach (var r in goodResults.Select(g => prefix + "/" + g.BenchmarkFileName))
                {
                    processedBlobs.Add(r.Trim());
                }
                Console.WriteLine(" took {0}.", (DateTime.Now - before));

                // Exclude those that are still in progress
                ODATADetailLevel detailLevel = new ODATADetailLevel();
                detailLevel.FilterClause = "(state eq 'active') or (state eq 'running') or (state eq 'preparing')";
                detailLevel.SelectClause = "id,displayName";

                CloudJob old_job = null;
                try { old_job = batchClient.JobOperations.GetJob(jobId); } catch { /* OK */ }

                if (old_job != null)
                {
                    before = DateTime.Now;
                    Console.Write("Listing existing tasks...");
                    var ts = batchClient.JobOperations.ListTasks(jobId, detailLevel);
                    foreach (CloudTask t in ts)
                    {
                        int id;

                        if (int.TryParse(t.Id, out id))
                        {
                            string n = t.DisplayName.Trim();
                            if (!processedBlobs.Contains(n))
                            {
                                processedBlobs.Add(n);
                            }
                        }
                    }
                    ;
                    Console.WriteLine(" took {0}.", (DateTime.Now - before));

                    // Create new job if the old one is already sealed off
                    switch (old_job.State)
                    {
                    case Microsoft.Azure.Batch.Common.JobState.Completed:
                    case Microsoft.Azure.Batch.Common.JobState.Deleting:
                    case Microsoft.Azure.Batch.Common.JobState.Disabled:
                    case Microsoft.Azure.Batch.Common.JobState.Disabling:
                    case Microsoft.Azure.Batch.Common.JobState.Terminating:
                    {
                        before = DateTime.Now;
                        Console.Write("Creating fresh job...");
                        PoolInformation pool_info = old_job.PoolInformation;
                        string          new_jid;
                        int             cnt      = 1;
                        bool            have_jid = false;
                        do
                        {
                            new_jid = String.Format("{0}-{1}", jobId, cnt++);
                            try
                            {
                                CloudJob new_job = batchClient.JobOperations.CreateJob(new_jid, pool_info);
                                new_job.OnAllTasksComplete = Microsoft.Azure.Batch.Common.OnAllTasksComplete.NoAction;
                                new_job.OnTaskFailure      = old_job.OnTaskFailure;
                                new_job.Constraints        = old_job.Constraints;
                                new_job.DisplayName        = old_job.DisplayName;
                                new_job.Commit();
                                have_jid = true;
                            }
                            catch (Microsoft.Azure.Batch.Common.BatchException)
                            {
                                Console.Write(".");
                            }
                        }while (!have_jid);
                        jobId = new_jid;
                        Console.WriteLine(" took {0}.", (DateTime.Now - before));
                        break;
                    }
                    }
                }

                BlobContinuationToken continuationToken = null;
                BlobResultSegment     resultSegment     = null;

                before = DateTime.Now;
                Console.Write("Adding tasks...");
                List <Task> starterTasks       = new List <Task>();
                int         benchmarksTotal    = processedBlobs.Count();
                string      benchmarksPath     = CombineBlobPath(benchmarkDirectory, benchmarkCategory);
                string      outputQueueUri     = storage.GetOutputQueueSASUri(experimentId, TimeSpan.FromHours(48));
                string      outputContainerUri = storage.GetOutputContainerSASUri(TimeSpan.FromHours(48));
                do
                {
                    resultSegment = await benchmarkStorage.ListBlobsSegmentedAsync(benchmarksPath, continuationToken);

                    string[] blobNamesToProcess = resultSegment.Results.SelectMany(item =>
                    {
                        var blob = item as CloudBlockBlob;
                        if (blob == null || processedBlobs.Contains(blob.Name))
                        {
                            return new string[] { }
                        }
                        ;

                        var nameParts      = blob.Name.Split('/');
                        var shortnameParts = nameParts[nameParts.Length - 1].Split('.');
                        if (shortnameParts.Length == 1 && !extensions.Contains(""))
                        {
                            return new string[] { }
                        }
                        ;
                        var ext = shortnameParts[shortnameParts.Length - 1];
                        if (!extensions.Contains(ext))
                        {
                            return new string[] { }
                        }
                        ;

                        return(new string[] { blob.Name });
                    }).ToArray();
                    starterTasks.Add(StartTasksForSegment(timeout.ToString(), experimentId, executable, arguments, memoryLimit, domainString, outputQueueUri, outputContainerUri, outputLimit, errorLimit, jobId, batchClient, blobNamesToProcess, benchmarksPath, benchmarksTotal, benchmarkStorage, maxRepetitions, maxTime));

                    continuationToken = resultSegment.ContinuationToken;
                    benchmarksTotal  += blobNamesToProcess.Length;
                }while (continuationToken != null);

                await storage.SetBenchmarksTotal(experimentId, benchmarksTotal);

                Program.benchmarksTotal = benchmarksTotal;
                benchmarksToProcess     = benchmarksTotal - goodResults.Count;
                Console.WriteLine(" took {0}.", (DateTime.Now - before));

                before = DateTime.Now;
                Console.Write("Waiting for tasks to start...");
                await Task.WhenAll(starterTasks.ToArray());

                Console.WriteLine(" took {0}.", (DateTime.Now - before));

                CloudJob j = batchClient.JobOperations.GetJob(jobId);
                j.OnAllTasksComplete = Microsoft.Azure.Batch.Common.OnAllTasksComplete.TerminateJob;
                j.CommitChanges();

                before = DateTime.Now;
                Console.Write("Waiting for results...");
                var collectionTask = CollectResults(experimentId, storage);
                Console.WriteLine(" took {0}.", (DateTime.Now - before));

                MonitorTasksUntilCompletion(experimentId, jobId, collectionTask, batchClient, domain);

                if (summaryName != null && expInfo.Creator == "Nightly")
                {
                    Trace.WriteLine(string.Format("Building summary for experiment {0} and summary name {1}...", experimentId, summaryName));
                    AzureSummaryManager manager = new AzureSummaryManager(credentials.WithoutBatchData().ToString(), MEFDomainResolver.Instance);
                    await AppendSummaryAndSendReport(summaryName, experimentId, domain, manager);
                }
                else
                {
                    Trace.WriteLine("No summary requested.");
                }

                try
                {
                    int?amc = storage.GetResultsQueueReference(experimentId).ApproximateMessageCount;

                    if (amc.HasValue && amc.Value == 0)
                    {
                        switch (batchClient.JobOperations.GetJob(jobId).State)
                        {
                        case Microsoft.Azure.Batch.Common.JobState.Completed:
                        case Microsoft.Azure.Batch.Common.JobState.Disabled:
                            Console.WriteLine("Deleting Batch job and results queue.");
                            await batchClient.JobOperations.DeleteJobAsync(jobId);

                            await storage.DeleteResultsQueue(experimentId);

                            break;
                        }
                    }
                }
                catch { /* OK */ }

                Console.WriteLine("Closing.");
            }
        }