/// <summary> /// Creates a new Azure Batch job /// </summary> /// <param name="jobId"></param> /// <param name="jobPreparationTask"></param> /// <param name="cloudTask"></param> /// <param name="poolInformation"></param> /// <returns></returns> public async Task CreateBatchJobAsync(string jobId, JobPreparationTask jobPreparationTask, CloudTask cloudTask, PoolInformation poolInformation) { var job = batchClient.JobOperations.CreateJob(jobId, poolInformation); job.JobPreparationTask = jobPreparationTask; await job.CommitAsync(); try { job = await batchClient.JobOperations.GetJobAsync(job.Id); // Retrieve the "bound" version of the job job.PoolInformation = poolInformation; // Redoing this since the container registry password is not retrieved by GetJobAsync() job.OnAllTasksComplete = OnAllTasksComplete.TerminateJob; await job.AddTaskAsync(cloudTask); await job.CommitAsync(); } catch (Exception ex) { var batchError = JsonConvert.SerializeObject((ex as BatchException)?.RequestInformation?.BatchError); logger.LogError(ex, $"Deleting {job.Id} because adding task to it failed. Batch error: {batchError}"); await batchClient.JobOperations.DeleteJobAsync(job.Id); throw; } }
public void TestOMJobPrepReleaseRunOnNaiveComputeNode() { string jobId = "TestOMJobPrepReleaseRunOnNaiveComputeNode-" + TestUtilities.GetMyName(); Action test = () => { using (BatchClient client = TestUtilities.OpenBatchClient(TestUtilities.GetCredentialsFromEnvironment())) { try { // create job with prep/release { CloudJob unboundJob = client.JobOperations.CreateJob(jobId, null); unboundJob.PoolInformation = new PoolInformation(); unboundJob.PoolInformation.PoolId = this.poolFixture.PoolId; // add the jobPrep task to the job { JobPreparationTask prep = new JobPreparationTask("cmd /c echo JobPrep!!"); unboundJob.JobPreparationTask = prep; prep.WaitForSuccess = true; // be explicit even though this is the default. need JP/JP to not run } // add a jobRelease task to the job { JobReleaseTask relTask = new JobReleaseTask(JobReleaseTaskCommandLine); unboundJob.JobReleaseTask = relTask; } // add the job to the service unboundJob.Commit(); } // the victim nodes. pool should have size 1. List <ComputeNode> computeNodes = client.PoolOperations.ListComputeNodes(this.poolFixture.PoolId).ToList(); Assert.Single(computeNodes); // now we have a job with zero tasks... lets call get-status methods // call it List <JobPreparationAndReleaseTaskExecutionInformation> jrStatusList = client.JobOperations.ListJobPreparationAndReleaseTaskStatus(jobId).ToList(); JobPreparationAndReleaseTaskExecutionInformation prepAndReleaseStatus = jrStatusList.FirstOrDefault(); Assert.Null(prepAndReleaseStatus); } finally { // cleanup TestUtilities.DeleteJobIfExistsAsync(client, jobId).Wait(); } } }; SynchronizationContextHelper.RunTest(test, TestTimeout); }
private static void AssertGoodJobPrepTaskOM(JobPreparationTask jobPrep) { Assert.Equal(JobPrepCommandLine, jobPrep.CommandLine); Assert.NotNull(jobPrep.EnvironmentSettings); Assert.Equal(1, jobPrep.EnvironmentSettings.Count); Assert.Equal(JobPrepEnvSettingOM.Name, jobPrep.EnvironmentSettings[0].Name); Assert.Equal(JobPrepEnvSettingOM.Value, jobPrep.EnvironmentSettings[0].Value); Assert.Equal(JobPrepId, jobPrep.Id); Assert.Equal(JobPrepRerunOnComputeNodeRebootAfterSuccess, jobPrep.RerunOnComputeNodeRebootAfterSuccess); Assert.Equal(JobPrepRunElevated, jobPrep.RunElevated); Assert.Equal(JobPrepTaskConstraintsOM.MaxTaskRetryCount, jobPrep.Constraints.MaxTaskRetryCount); Assert.Equal(JobPrepTaskConstraintsOM.MaxWallClockTime, jobPrep.Constraints.MaxWallClockTime); Assert.Equal(JobPrepTaskConstraintsOM.RetentionTime, jobPrep.Constraints.RetentionTime); Assert.Equal(JobPrepWaitForSuccessCreate, jobPrep.WaitForSuccess); // TODO: assert on resourcefiles and contents therein }
public static async Task <CloudJob> CreateJobIfNotExistAsync( BatchClient batchClient, string poolId, string jobId, bool usesTaskDependencies = false, JobPreparationTask prepTask = null, JobReleaseTask releaseTask = null) { CloudJob job = await GetJobIfExistAsync(batchClient, jobId).ConfigureAwait(continueOnCapturedContext: false); if (job == null) { Console.WriteLine("Job {0} not found, creating...", jobId); CloudJob unboundJob = batchClient.JobOperations.CreateJob(jobId, new PoolInformation() { PoolId = poolId }); unboundJob.UsesTaskDependencies = usesTaskDependencies; if (prepTask != null) { unboundJob.JobPreparationTask = prepTask; } if (releaseTask != null) { unboundJob.JobReleaseTask = releaseTask; } await unboundJob.CommitAsync().ConfigureAwait(continueOnCapturedContext: false); // Get the bound version of the job with all of its properties populated job = await batchClient.JobOperations.GetJobAsync(jobId).ConfigureAwait(continueOnCapturedContext: false); } return(job); }
public void TestOMJobSpecAndRelease() { Action test = () => { StagingStorageAccount stagingCreds = TestUtilities.GetStorageCredentialsFromEnvironment(); using (BatchClient client = TestUtilities.OpenBatchClient(TestUtilities.GetCredentialsFromEnvironment())) { string jsId = "JobPrepAndRelease-" + /* "OM-static-c" */ "dynamic-" + CraftTimeString() + "-" + TestUtilities.GetMyName(); try { // increase request timeout interceptor Protocol.RequestInterceptor increaseTimeoutInterceptor = new Protocol.RequestInterceptor((x) => { this.testOutputHelper.WriteLine("TestOMJobSpecAndRelease: setting request timeout. Request type: " + x.GetType().ToString() + ", ClientRequestID: " + x.Options.ClientRequestId); var timeoutOptions = x.Options as Protocol.Models.ITimeoutOptions; timeoutOptions.Timeout = 5 * 60; }); // lets use a timer too CallTimerViaInterceptors timerInterceptor = new CallTimerViaInterceptors(); // seeing client side timeouts... so increase the durations on every call client.CustomBehaviors.Add(increaseTimeoutInterceptor); // add a call timer spammer/logger client.CustomBehaviors.Add(timerInterceptor.ReqInterceptor); // get some resource files to play with IList <ResourceFile> resFiles = UploadFilesMakeResFiles(stagingCreds); // create job schedule with prep/release { CloudJobSchedule unboundJobSchedule = client.JobScheduleOperations.CreateJobSchedule(jsId, null, null); unboundJobSchedule.JobSpecification = new JobSpecification(new PoolInformation()); unboundJobSchedule.JobSpecification.PoolInformation.PoolId = this.poolFixture.PoolId; unboundJobSchedule.Schedule = new Schedule() { RecurrenceInterval = TimeSpan.FromMinutes(3) }; // add the jobPrep task to the job schedule { JobPreparationTask prep = new JobPreparationTask(JobPrepCommandLine); unboundJobSchedule.JobSpecification.JobPreparationTask = prep; List <EnvironmentSetting> prepEnvSettings = new List <EnvironmentSetting>(); prepEnvSettings.Add(JobPrepEnvSettingOM); prep.EnvironmentSettings = prepEnvSettings; prep.Id = JobPrepId; prep.RerunOnComputeNodeRebootAfterSuccess = JobPrepRerunOnComputeNodeRebootAfterSuccess; prep.ResourceFiles = resFiles; // bug: incorrect type this should be IList<> /* * prep.ResourceFiles = new List<ResourceFile>(); // this is actually read into our concurrent iList thing * * // why not, merge them in. exersize the concurent IList thing * foreach (ResourceFile curRF in resFiles) * { * prep.ResourceFiles.Add(curRF); * } */ prep.UserIdentity = new UserIdentity(JobPrepUserSpec); prep.Constraints = JobPrepTaskConstraintsOM; prep.WaitForSuccess = JobPrepWaitForSuccessCreate; } // add a jobRelease task to the job schedule { JobReleaseTask relTask = new JobReleaseTask(JobReleaseTaskCommandLine); unboundJobSchedule.JobSpecification.JobReleaseTask = relTask; List <EnvironmentSetting> relEnvSettings = new List <EnvironmentSetting>(); relEnvSettings.Add(JobRelEnvSettingOM); relTask.EnvironmentSettings = relEnvSettings; relTask.MaxWallClockTime = JobRelMaxWallClockTime; relTask.Id = JobRelId; relTask.ResourceFiles = null; relTask.ResourceFiles = new List <ResourceFile>(); // why not, merge them in. work the concurrent IList thing foreach (ResourceFile curRF in resFiles) { relTask.ResourceFiles.Add(curRF); } relTask.RetentionTime = JobRelRetentionTime; relTask.UserIdentity = new UserIdentity(JobRelUserSpec); } // set JobCommonEnvSettings { List <EnvironmentSetting> jobCommonES = new List <EnvironmentSetting>(); jobCommonES.Add(JobCommonEnvSettingOM); unboundJobSchedule.JobSpecification.CommonEnvironmentSettings = jobCommonES; } // add the job schedule to the service unboundJobSchedule.Commit(); } // now we have a jobschedule with jobprep/release...now test the values on the jobschedule { CloudJobSchedule boundJobSchedule = client.JobScheduleOperations.GetJobSchedule(jsId); Assert.NotNull(boundJobSchedule); Assert.NotNull(boundJobSchedule.JobSpecification); Assert.NotNull(boundJobSchedule.JobSpecification.JobPreparationTask); Assert.NotNull(boundJobSchedule.JobSpecification.JobReleaseTask); Assert.NotNull(boundJobSchedule.JobSpecification.CommonEnvironmentSettings); AssertGoodCommonEnvSettingsOM(boundJobSchedule.JobSpecification.CommonEnvironmentSettings); AssertGoodJobPrepTaskOM(boundJobSchedule.JobSpecification.JobPreparationTask); AssertGoodJobReleaseTaskOM(boundJobSchedule.JobSpecification.JobReleaseTask); AssertGoodResourceFiles(resFiles, boundJobSchedule.JobSpecification.JobPreparationTask.ResourceFiles); AssertGoodResourceFiles(resFiles, boundJobSchedule.JobSpecification.JobReleaseTask.ResourceFiles); //todo: test mutability } CloudJobSchedule boundJobScheduleWithJob; // set on job test // test the values on the job { boundJobScheduleWithJob = TestUtilities.WaitForJobOnJobSchedule(client.JobScheduleOperations, jsId); CloudJob bndJob = client.JobOperations.GetJob(boundJobScheduleWithJob.ExecutionInformation.RecentJob.Id); Assert.NotNull(bndJob); Assert.NotNull(bndJob.CommonEnvironmentSettings); Assert.NotNull(bndJob.JobPreparationTask); Assert.NotNull(bndJob.JobReleaseTask); AssertGoodCommonEnvSettingsOM(bndJob.CommonEnvironmentSettings as IList <EnvironmentSetting> /* we know it is our internal IList */); AssertGoodJobPrepTaskOM(bndJob.JobPreparationTask); AssertGoodJobReleaseTaskOM(bndJob.JobReleaseTask); AssertGoodResourceFiles(resFiles, bndJob.JobPreparationTask.ResourceFiles); AssertGoodResourceFiles(resFiles, bndJob.JobReleaseTask.ResourceFiles); //TODO: test immutability } // used for on get-status test CloudJobSchedule updatedJobSchedule; // test update on the WI jobprep/jobrelease { // change props boundJobScheduleWithJob.JobSpecification.JobPreparationTask.WaitForSuccess = JobPrepWaitForSuccessUpdate; // commit changes boundJobScheduleWithJob.Commit(); // get new values updatedJobSchedule = client.JobScheduleOperations.GetJobSchedule(jsId); // confirm values changed Assert.Equal(JobPrepWaitForSuccessUpdate, updatedJobSchedule.JobSpecification.JobPreparationTask.WaitForSuccess); } TestGetPrepReleaseStatusCalls(client, updatedJobSchedule, this.poolFixture.PoolId, resFiles); } finally { // cleanup TestUtilities.DeleteJobScheduleIfExistsAsync(client, jsId).Wait(); } } }; SynchronizationContextHelper.RunTest(test, LongTestTimeout); }
public void TestOMJobReleaseSchedulingError() { string jobId = "TestOMJobReleaseSchedulingError-" + CraftTimeString() + "-" + TestUtilities.GetMyName(); Action test = () => { using (BatchClient client = TestUtilities.OpenBatchClient(TestUtilities.GetCredentialsFromEnvironment())) { try { // create job schedule with prep that succeeds and release the triggers scheduling error { PoolInformation poolInfo = new PoolInformation() { PoolId = this.poolFixture.PoolId }; CloudJob unboundJob = client.JobOperations.CreateJob(jobId, poolInfo); // add the jobPrep task to the job { JobPreparationTask prep = new JobPreparationTask("cmd /c echo the quick job prep jumped over the..."); unboundJob.JobPreparationTask = prep; prep.WaitForSuccess = false; // we don't really care but why not set this } // add a jobRelease task to the job { JobReleaseTask relTask = new JobReleaseTask("cmd /c echo Job Release Task"); unboundJob.JobReleaseTask = relTask; ResourceFile[] badResFiles = { ResourceFile.FromUrl("https://not.a.domain.invalid/file", "bob.txt") }; relTask.ResourceFiles = badResFiles; relTask.Id = "jobRelease"; } // add the job to the service unboundJob.Commit(); } // add a trivial task to force the JP client.JobOperations.AddTask(jobId, new CloudTask("ForceJobPrep", "cmd /c echo TestOMJobReleaseSchedulingError")); // wait for the task to complete TaskStateMonitor tsm = client.Utilities.CreateTaskStateMonitor(); tsm.WaitAll( client.JobOperations.ListTasks(jobId), TaskState.Completed, TimeSpan.FromMinutes(10), additionalBehaviors: new[] { // spam/logging interceptor new Protocol.RequestInterceptor((x) => { this.testOutputHelper.WriteLine("Issuing request type: " + x.GetType().ToString()); // print out the compute node states... we are actually waiting on the compute nodes List <ComputeNode> allComputeNodes = client.PoolOperations.ListComputeNodes(this.poolFixture.PoolId).ToList(); this.testOutputHelper.WriteLine(" #compute nodes: " + allComputeNodes.Count); allComputeNodes.ForEach((icn) => { this.testOutputHelper.WriteLine(" computeNode.id: " + icn.Id + ", state: " + icn.State); }); this.testOutputHelper.WriteLine(""); }) } ); // ok terminate job to trigger job release client.JobOperations.TerminateJob(jobId, "BUG: Server will throw 500 if I don't provide reason"); // the victim compute node. pool should have size 1. List <ComputeNode> computeNodes = client.PoolOperations.ListComputeNodes(this.poolFixture.PoolId).ToList(); Assert.Single(computeNodes); // now we have a job that should be trying to run the JP // poll for the JP to have been run, and it must have a scheduling error bool releaseNotCompleted = true; // gotta poll to find out when the jp has been run while (releaseNotCompleted) { List <JobPreparationAndReleaseTaskExecutionInformation> jrStatusList = client.JobOperations.ListJobPreparationAndReleaseTaskStatus(jobId).ToList(); JobPreparationAndReleaseTaskExecutionInformation prepAndReleaseStatus = jrStatusList.FirstOrDefault(); if (prepAndReleaseStatus != null && null != prepAndReleaseStatus.JobReleaseTaskExecutionInformation) { if (JobReleaseTaskState.Completed == prepAndReleaseStatus.JobReleaseTaskExecutionInformation.State) { releaseNotCompleted = false; // we see a JP has been run // now assert the failure info Assert.NotNull(prepAndReleaseStatus); Assert.NotNull(prepAndReleaseStatus.JobReleaseTaskExecutionInformation.FailureInformation); Assert.Equal(TaskExecutionResult.Failure, prepAndReleaseStatus.JobReleaseTaskExecutionInformation.Result); // spew the failure info this.OutputFailureInfo(prepAndReleaseStatus.JobReleaseTaskExecutionInformation.FailureInformation); } } Thread.Sleep(2000); this.testOutputHelper.WriteLine("Job Release tasks still running (waiting for blob dl to timeout)."); } } finally { client.JobOperations.DeleteJob(jobId); } } }; SynchronizationContextHelper.RunTest(test, LongTestTimeout); }
public void TestOMJobPrepSchedulingError() { string jobId = "TestOMJobPrepSchedulingError-" + CraftTimeString() + "-" + TestUtilities.GetMyName(); Action test = () => { using (BatchClient client = TestUtilities.OpenBatchClient(TestUtilities.GetCredentialsFromEnvironment())) { try { // create job with prep that triggers prep scheduling error { CloudJob unboundJob = client.JobOperations.CreateJob(jobId, new PoolInformation() { PoolId = this.poolFixture.PoolId }); // add the jobPrep task to the job { JobPreparationTask prep = new JobPreparationTask("cmd /c JobPrep Task"); unboundJob.JobPreparationTask = prep; ResourceFile[] badResFiles = { ResourceFile.FromUrl("https://not.a.domain.invalid/file", "bob.txt") }; prep.ResourceFiles = badResFiles; prep.WaitForSuccess = true; // be explicit even though this is the default. need JP/ to not run } // add the job to the service unboundJob.Commit(); } CloudJob boundJob = client.JobOperations.GetJob(jobId); // add a trivial task to force the JP client.JobOperations.AddTask(boundJob.Id, new CloudTask("ForceJobPrep", "cmd /c echo TestOMJobPrepSchedulingError")); // the victim compute node. pool should have size 1. List <ComputeNode> nodes = client.PoolOperations.ListComputeNodes(this.poolFixture.PoolId).ToList(); Assert.Single(nodes); // now we have a job that should be trying to run the JP // poll for the JP to have been run, and it must have a scheduling error bool prepNotCompleted = true; // gotta poll to find out when the jp has been run while (prepNotCompleted) { List <JobPreparationAndReleaseTaskExecutionInformation> jpStatsList = client.JobOperations.ListJobPreparationAndReleaseTaskStatus(jobId).ToList(); JobPreparationAndReleaseTaskExecutionInformation jpStatus = jpStatsList.FirstOrDefault(); if (jpStatus == null) { Thread.Sleep(2000); } else { if (JobPreparationTaskState.Completed == jpStatus.JobPreparationTaskExecutionInformation.State) { prepNotCompleted = false; // we see a JP has completed Assert.NotNull(jpStatus.JobPreparationTaskExecutionInformation.FailureInformation); Assert.Equal(TaskExecutionResult.Failure, jpStatus.JobPreparationTaskExecutionInformation.Result); // spew the failure this.OutputFailureInfo(jpStatus.JobPreparationTaskExecutionInformation.FailureInformation); } this.testOutputHelper.WriteLine("Job Prep is running (waiting for blob dl to timeout)"); } } } finally { // cleanup client.JobOperations.DeleteJob(jobId); } } }; SynchronizationContextHelper.RunTest(test, TestTimeout); }
/// <summary> /// Returns job preparation and main Batch tasks that represents the given <see cref="TesTask"/> /// </summary> /// <param name="task">The <see cref="TesTask"/></param> /// <returns>Job preparation and main Batch tasks</returns> private async Task <(JobPreparationTask, CloudTask)> ConvertTesTaskToBatchTaskAsync(TesTask task) { JobPreparationTask jobPreparationTask = null; var cromwellPathPrefixWithoutEndSlash = CromwellPathPrefix.TrimEnd('/'); var taskId = task.Id; var queryStringsToRemoveFromLocalFilePaths = task.Inputs .Select(i => i.Path) .Concat(task.Outputs.Select(o => o.Path)) .Where(p => p != null) .Select(p => queryStringRegex.Match(p).Groups[1].Value) .Where(qs => !string.IsNullOrEmpty(qs)) .ToList(); var inputFiles = task.Inputs.Distinct(); var cromwellExecutionDirectoryPath = GetParentPath(task.Inputs.FirstOrDefault(IsCromwellCommandScript)?.Path); if (cromwellExecutionDirectoryPath == null) { throw new Exception($"Could not identify Cromwell execution directory path for task {task.Id}. This TES instance supports Cromwell tasks only."); } // TODO: Cromwell bug: Cromwell command write_tsv() generates a file in the execution directory, for example execution/write_tsv_3922310b441805fc43d52f293623efbc.tmp. These are not passed on to TES inputs. // WORKAROUND: Get the list of files in the execution directory and add them to task inputs. var executionDirectoryUri = new Uri(await MapLocalPathToSasUrlAsync(cromwellExecutionDirectoryPath, getContainerSas: true)); var blobsInExecutionDirectory = (await azureProxy.ListBlobsAsync(executionDirectoryUri)).Where(b => !b.EndsWith($"/{CromwellScriptFileName}")).Where(b => !b.EndsWith($"/{DownloadFilesScriptFileName}")); var additionalInputFiles = blobsInExecutionDirectory.Select(b => $"{CromwellPathPrefix}{b}").Select(b => new TesInput { Content = null, Path = b, Url = b, Type = TesFileType.FILEEnum }); var filesToDownload = await Task.WhenAll(inputFiles.Union(additionalInputFiles).Select(async f => await GetTesInputFileUrl(f, task.Id, queryStringsToRemoveFromLocalFilePaths))); foreach (var output in task.Outputs) { if (!output.Path.StartsWith(CromwellPathPrefix, StringComparison.OrdinalIgnoreCase)) { throw new Exception($"Unsupported output path '{output.Path}' for task Id {task.Id}. Must start with {CromwellPathPrefix}"); } } var downloadFilesScriptContent = string.Join(" && ", filesToDownload.Select(f => $"blobxfer download --verbose --enable-azure-storage-logger --storage-url '{f.Url}' --local-path '{f.Path}' --rename --no-recursive")); var downloadFilesScriptPath = $"{cromwellExecutionDirectoryPath}/{DownloadFilesScriptFileName}"; var writableDownloadFilesScriptUrl = new Uri(await MapLocalPathToSasUrlAsync(downloadFilesScriptPath, getContainerSas: true)); var downloadFilesScriptUrl = await MapLocalPathToSasUrlAsync(downloadFilesScriptPath); await azureProxy.UploadBlobAsync(writableDownloadFilesScriptUrl, downloadFilesScriptContent); jobPreparationTask = new JobPreparationTask() { UserIdentity = new UserIdentity(new AutoUserSpecification(elevationLevel: ElevationLevel.Admin, scope: AutoUserScope.Task)), ContainerSettings = new TaskContainerSettings( imageName: "mcr.microsoft.com/blobxfer", containerRunOptions: $"--rm -v /mnt{cromwellPathPrefixWithoutEndSlash}:{cromwellPathPrefixWithoutEndSlash} --entrypoint /bin/sh"), ResourceFiles = new List <ResourceFile> { ResourceFile.FromUrl(downloadFilesScriptUrl, $"/mnt{downloadFilesScriptPath}") }, CommandLine = $" -c \"/bin/sh {downloadFilesScriptPath}; chmod -R o+rx {cromwellPathPrefixWithoutEndSlash} \"", WaitForSuccess = true }; var executor = task.Executors.First(); // The first command is sh or bash var command = executor.Command[0] + " -c \"" + string.Join(" && ", executor.Command.Skip(1) .Append($"chmod -R o+rx {cromwellPathPrefixWithoutEndSlash}") .Append($"cd {cromwellPathPrefixWithoutEndSlash}") .Append("for f in $(find -type l -xtype f);do cp --remove-destination $(readlink $f) $f;done;")) + "\""; var cromwellExecutionsContainerUri = await MapLocalPathToSasUrlAsync(CromwellPathPrefix); var cloudTask = new CloudTask(taskId, command) { // It seems root always downloads, tasks run as root or user depending on Admin/NonAdmin, and upload is always done by non-root user. UserIdentity = new UserIdentity(new AutoUserSpecification(elevationLevel: ElevationLevel.Admin, scope: AutoUserScope.Task)), ContainerSettings = new TaskContainerSettings( imageName: executor.Image, containerRunOptions: $"--rm -v /mnt{cromwellPathPrefixWithoutEndSlash}:{cromwellPathPrefixWithoutEndSlash} --entrypoint= --workdir /", registry: (await GetContainerRegistry(executor.Image))), OutputFiles = task.Outputs .Select(o => new OutputFile( "/mnt" + RemoveQueryStringsFromLocalFilePaths(o.Path, queryStringsToRemoveFromLocalFilePaths) + (o.Type == TesFileType.DIRECTORYEnum ? "/**/*" : string.Empty), new OutputFileDestination(new OutputFileBlobContainerDestination(cromwellExecutionsContainerUri, o.Path.Substring(CromwellPathPrefix.Length))), new OutputFileUploadOptions(OutputFileUploadCondition.TaskCompletion))) .ToList() }; return(jobPreparationTask, cloudTask); }
static void Main(string[] args) { string storageConnectionString = $"DefaultEndpointsProtocol=https;AccountName={StorageAccountName};AccountKey={StorageAccountKey}"; // Retrieve the storage account CloudStorageAccount storageAccount = CloudStorageAccount.Parse(storageConnectionString); // Create the blob client CloudBlobClient blobClient = storageAccount.CreateCloudBlobClient(); // Upload the input files to blob storage const string inputContainerName = "batchinput"; List <string> inputFilePaths = new List <string> { "taskdata0.txt", "taskdata1.txt", "taskdata2.txt" }; List <ResourceFile> inputFiles = new List <ResourceFile>(); foreach (string filePath in inputFilePaths) { inputFiles.Add(UploadFileToContainer(blobClient, inputContainerName, filePath)); } // Get a SAS Url for the output container const string outputContainerName = "batchoutput"; string outputContainerSasUrl = GetOutputContainerSasUrl(blobClient, outputContainerName); // Specify a container registry ContainerRegistry containerRegistry = new ContainerRegistry( registryServer: RegistryServer, userName: RegistryUserName, password: RegistryPassword); // Create container configuration, prefetching Docker images from the container registry ContainerConfiguration containerConfig = new ContainerConfiguration() { ContainerImageNames = ContainerImageNames, ContainerRegistries = new List <ContainerRegistry> { containerRegistry } }; // Create the virtual machine image reference - make sure to choose an image that supports containers ImageReference imageReference = new ImageReference( publisher: "MicrosoftWindowsServer", offer: "WindowsServer", sku: "2016-datacenter-with-containers", version: "latest"); // Create the virtual machine configuration for the pool and set the container configuration VirtualMachineConfiguration virtualMachineConfiguration = new VirtualMachineConfiguration( imageReference: imageReference, nodeAgentSkuId: "batch.node.windows amd64"); virtualMachineConfiguration.ContainerConfiguration = containerConfig; BatchSharedKeyCredentials cred = new BatchSharedKeyCredentials(BatchAccountUrl, BatchAccountName, BatchAccountKey); using (BatchClient batchClient = BatchClient.Open(cred)) { Console.WriteLine("Creating pool [{0}]...", PoolId); try { CloudPool pool = batchClient.PoolOperations.CreatePool( poolId: PoolId, targetDedicatedComputeNodes: PoolNodeCount, virtualMachineSize: PoolVMSize, virtualMachineConfiguration: virtualMachineConfiguration); pool.Commit(); } catch (BatchException be) { // Accept the specific error code PoolExists as that is expected if the pool already exists if (be.RequestInformation?.BatchError?.Code == BatchErrorCodeStrings.PoolExists) { Console.WriteLine("The pool {0} already existed when we tried to create it", PoolId); } else { throw; // Any other exception is unexpected } } Console.WriteLine("Creating job [{0}]...", JobId); CloudJob job = null; try { job = batchClient.JobOperations.CreateJob(); job.Id = JobId; job.PoolInformation = new PoolInformation { PoolId = PoolId }; // Add job preparation task to remove existing Docker image Console.WriteLine("Adding job preparation task to job [{0}]...", JobId); string jobPreparationCmdLine = $"cmd /c docker rmi -f {ContainerImageNames[0]}:latest"; JobPreparationTask jobPreparationTask = new JobPreparationTask(jobPreparationCmdLine) { UserIdentity = new UserIdentity(new AutoUserSpecification(elevationLevel: ElevationLevel.Admin, scope: AutoUserScope.Task)) }; job.JobPreparationTask = jobPreparationTask; job.Commit(); } catch (BatchException be) { // Accept the specific error code JobExists as that is expected if the job already exists if (be.RequestInformation?.BatchError?.Code == BatchErrorCodeStrings.JobExists) { Console.WriteLine("The job {0} already existed when we tried to create it", JobId); } else { throw; // Any other exception is unexpected } } if (job != null) { // Create a collection to hold the tasks that we'll be adding to the job Console.WriteLine("Adding {0} tasks to job [{1}]...", inputFiles.Count, JobId); List <CloudTask> tasks = new List <CloudTask>(); // Create each of the tasks to process one of the input files. for (int i = 0; i < inputFiles.Count; i++) { string taskId = String.Format("Task{0}", i); string inputFilename = inputFiles[i].FilePath; string outputFileName = string.Format("out{0}", inputFilename); // Override the default entrypoint of the container string taskCommandLine = string.Format("C:\\ReadWriteFile\\ReadWriteFile.exe {0} {1}", inputFilename, outputFileName); // Specify the container the task will run TaskContainerSettings cmdContainerSettings = new TaskContainerSettings( imageName: "nimccollftacr.azurecr.io/batch/readwritefile" ); CloudTask task = new CloudTask(taskId, taskCommandLine); task.ContainerSettings = cmdContainerSettings; // Set the resource files and output files for the task task.ResourceFiles = new List <ResourceFile> { inputFiles[i] }; task.OutputFiles = new List <OutputFile> { new OutputFile( filePattern: outputFileName, destination: new OutputFileDestination(new OutputFileBlobContainerDestination(containerUrl: outputContainerSasUrl, path: outputFileName)), uploadOptions: new OutputFileUploadOptions(OutputFileUploadCondition.TaskCompletion)) }; // You must elevate the identity of the task in order to run a container task.UserIdentity = new UserIdentity(new AutoUserSpecification(elevationLevel: ElevationLevel.Admin, scope: AutoUserScope.Task)); tasks.Add(task); } // Add all tasks to the job. batchClient.JobOperations.AddTask(JobId, tasks); // Monitor task success/failure, specifying a maximum amount of time to wait for the tasks to complete. TimeSpan timeout = TimeSpan.FromMinutes(30); Console.WriteLine("Monitoring all tasks for 'Completed' state, timeout in {0}...", timeout); IEnumerable <CloudTask> addedTasks = batchClient.JobOperations.ListTasks(JobId); batchClient.Utilities.CreateTaskStateMonitor().WaitAll(addedTasks, TaskState.Completed, timeout); Console.WriteLine("All tasks reached state Completed."); // Print task output Console.WriteLine(); Console.WriteLine("Printing task output..."); IEnumerable <CloudTask> completedtasks = batchClient.JobOperations.ListTasks(JobId); foreach (CloudTask task in completedtasks) { string nodeId = String.Format(task.ComputeNodeInformation.ComputeNodeId); Console.WriteLine("Task: {0}", task.Id); Console.WriteLine("Node: {0}", nodeId); Console.WriteLine("Standard out:"); Console.WriteLine(task.GetNodeFile(Constants.StandardOutFileName).ReadAsString()); } } // Clean up Batch resources (if the user so chooses) Console.WriteLine(); Console.Write("Delete job? [yes] no: "); string response = Console.ReadLine().ToLower(); if (response != "n" && response != "no") { batchClient.JobOperations.DeleteJob(JobId); } Console.Write("Delete pool? [yes] no: "); response = Console.ReadLine().ToLower(); if (response != "n" && response != "no") { batchClient.PoolOperations.DeletePool(PoolId); } } }
private static async Task MainAsync() { string poolId = "JobPrepReleaseSamplePool"; string jobId = "JobPrepReleaseSampleJob"; var settings = Config.LoadAccountSettings(); // Location of the file that the job tasks will work with, a text file in the // node's "shared" directory. string taskOutputFile = "$AZ_BATCH_NODE_SHARED_DIR/job_prep_and_release.txt"; // The job prep task will write the node ID to the text file in the shared directory string jobPrepCmdLine = $@"/bin/bash -c ""echo $AZ_BATCH_NODE_ID tasks: > {taskOutputFile}"""; // Each task then echoes its ID to the same text file string taskCmdLine = $@"/bin/bash -c ""echo $AZ_BATCH_TASK_ID >> {taskOutputFile}"""; // The job release task will then delete the text file from the shared directory string jobReleaseCmdLine = $@"/bin/bash -c ""rm {taskOutputFile}"""; BatchSharedKeyCredentials cred = new BatchSharedKeyCredentials(settings.BatchServiceUrl, settings.BatchAccountName, settings.BatchAccountKey); using (BatchClient batchClient = BatchClient.Open(cred)) { var pool = await BatchUtils.CreatePoolIfNotExistAsync(batchClient, poolId); var prepTask = new JobPreparationTask { CommandLine = jobPrepCmdLine }; var releaseTask = new JobReleaseTask { CommandLine = jobReleaseCmdLine }; var job = await BatchUtils.CreateJobIfNotExistAsync(batchClient, pool.Id, jobId, prepTask : prepTask, releaseTask : releaseTask); // Create the tasks that the job will execute List <CloudTask> tasks = new List <CloudTask>(); for (int i = 1; i <= 8; i++) { string taskId = "task" + i.ToString().PadLeft(3, '0'); string taskCommandLine = taskCmdLine; CloudTask task = new CloudTask(taskId, taskCommandLine); tasks.Add(task); } // Add the tasks in one API call as opposed to a separate AddTask call for each. Bulk task // submission helps to ensure efficient underlying API calls to the Batch service. Console.WriteLine("Submitting tasks and awaiting completion..."); await batchClient.JobOperations.AddTaskAsync(job.Id, tasks); // Wait for the tasks to complete before proceeding. The long timeout here is to allow time // for the nodes within the pool to be created and started if the pool had not yet been created. await batchClient.Utilities.CreateTaskStateMonitor().WhenAll( job.ListTasks(), TaskState.Completed, TimeSpan.FromMinutes(30)); Console.WriteLine("All tasks completed."); Console.WriteLine(); // Print the contents of the shared text file modified by the job preparation and other tasks. ODATADetailLevel nodeDetail = new ODATADetailLevel(selectClause: "id, state"); IPagedEnumerable <ComputeNode> nodes = batchClient.PoolOperations.ListComputeNodes(poolId, nodeDetail); await nodes.ForEachAsync(async (node) => { // Check to ensure that the node is Idle before attempting to pull the text file. // If the pool was just created, there is a chance that another node completed all // of the tasks prior to the other node(s) completing their startup procedure. if (node.State == ComputeNodeState.Idle) { var files = await node.ListNodeFiles().ToListAsync(); NodeFile sharedTextFile = await node.GetNodeFileAsync("shared/job_prep_and_release.txt"); Console.WriteLine("Contents of {0} on {1}:", sharedTextFile.Path, node.Id); Console.WriteLine("-------------------------------------------"); Console.WriteLine(await sharedTextFile.ReadAsStringAsync()); } }); // Terminate the job to mark it as Completed; this will initiate the Job Release Task on any node // that executed job tasks. Note that the Job Release Task is also executed when a job is deleted, // thus you need not call Terminate if you typically delete your jobs upon task completion. await batchClient.JobOperations.TerminateJobAsync(job.Id); // Wait for the job to reach state "Completed." Note that this wait is not typically necessary in // production code, but is done here to enable the checking of the release tasks exit code below. await BatchUtils.WaitForJobToReachStateAsync(batchClient, job.Id, JobState.Completed, TimeSpan.FromMinutes(2)); // Print the exit codes of the prep and release tasks by obtaining their execution info List <JobPreparationAndReleaseTaskExecutionInformation> prepReleaseInfo = await batchClient.JobOperations.ListJobPreparationAndReleaseTaskStatus(job.Id).ToListAsync(); foreach (JobPreparationAndReleaseTaskExecutionInformation info in prepReleaseInfo) { Console.WriteLine(); Console.WriteLine("{0}: ", info.ComputeNodeId); // If no tasks were scheduled to run on the node, the JobPreparationTaskExecutionInformation will be null if (info.JobPreparationTaskExecutionInformation != null) { Console.WriteLine(" Prep task exit code: {0}", info.JobPreparationTaskExecutionInformation.ExitCode); } // If no tasks were scheduled to run on the node, the JobReleaseTaskExecutionInformation will be null if (info.JobReleaseTaskExecutionInformation != null) { Console.WriteLine(" Release task exit code: {0}", info.JobReleaseTaskExecutionInformation.ExitCode); } } // Clean up the resources we've created in the Batch account Console.WriteLine(); Console.WriteLine("Delete job? [yes] no"); string response = Console.ReadLine().ToLower(); if (response != "n" && response != "no") { // Note that deleting the job will execute the job release task if the job was not previously terminated await batchClient.JobOperations.DeleteJobAsync(job.Id); } Console.WriteLine("Delete pool? [yes] no"); response = Console.ReadLine(); if (response != "n" && response != "no") { await batchClient.PoolOperations.DeletePoolAsync(poolId); } } }
private static async Task CreateJob(BatchClient batchClient, string jobId, string poolId, JobPreparationTask jobPrepTask = null) { Console.WriteLine($"Creating job { jobId }..."); var job = batchClient.JobOperations.CreateJob(); job.Id = jobId; job.PoolInformation = new PoolInformation { PoolId = poolId }; if (jobPrepTask != null) { job.JobPreparationTask = jobPrepTask; } await job.CommitAsync(); }
static async Task Execute_2() { // setup unique names for pool and job var poolId = "DemoPool2"; var jobId = "DemoJob2"; var ApplicationFile = "process-data.tar.gz"; var SetupFile = "dotnetcoreinstall.sh"; var settings = Config.LoadAccountSettings(); // create a storage blob client to upload files var blobClient = CreateBlobClient(settings.StorageAccountName, settings.StorageAccountKey); Console.WriteLine("\n>>> Creating Storage Containers <<<\n"); // Use the blob client to create the containers in Azure Blob Storage Console.WriteLine("Creating container to store Application files"); CreateStorageContainersIfNotExist(blobClient, AppContainerName); // Use the blob client to create the containers in Azure Blob Storage. Console.WriteLine("Creating container to store csv files"); CreateStorageContainersIfNotExist(blobClient, InputContainerName); // store input csv files Console.WriteLine("Creating container to store output files files"); CreateStorageContainersIfNotExist(blobClient, OutputContainerName); // store output text files PromptContinue(); Console.WriteLine("\n>>> Setting up storage references <<<\n"); // get reference to application SAS url Console.WriteLine("Uploading .NET Core install script to Azure Storage"); ResourceFile setupFile = await UploadFileOrGetReference(blobClient, AppContainerName, SetupFile, false); Console.WriteLine($"[INFO] {SetupFile} SAS: {setupFile.HttpUrl}"); // get reference to application SAS url Console.WriteLine("Uploading application to Azure Storage"); ResourceFile application = await UploadFileOrGetReference(blobClient, AppContainerName, ApplicationFile, false); Console.WriteLine($"[INFO] {ApplicationFile} SAS: {application.HttpUrl}"); // get reference to output container SAS uri Console.WriteLine("Getting reference to container for storing task output"); var outputContainerSasUrl = GetContainerSasUrl(blobClient, OutputContainerName); Console.WriteLine($"[INFO] {OutputContainerName} SAS: {outputContainerSasUrl}"); PromptContinue(); // up until here, we haven't used the Batch API at all (except for ResourceFile type) BatchSharedKeyCredentials cred = new BatchSharedKeyCredentials(settings.BatchServiceUrl, settings.BatchAccountName, settings.BatchAccountKey); using (var batchClient = BatchClient.Open(cred)) { Console.WriteLine("\n>>> Creating Pool <<<\n"); var startTask = new StartTask { CommandLine = $@"/bin/bash -c ""sudo sh dotnetcoreinstall.sh""", WaitForSuccess = true, UserIdentity = new UserIdentity( new AutoUserSpecification( scope: AutoUserScope.Pool, elevationLevel: ElevationLevel.Admin) ), ResourceFiles = new[] { setupFile } }; Console.WriteLine("Creating start task:"); Console.WriteLine($" CommandLine: {startTask.CommandLine}"); Console.WriteLine($" ResourceFiles:"); foreach (var f in startTask.ResourceFiles) { Console.WriteLine($" {f.HttpUrl}"); } Console.WriteLine(); await CreatePoolIfNotExist(batchClient, poolId, startTask : startTask); PromptContinue(); Console.WriteLine("\n>>> Creating Job <<<\n"); JobPreparationTask jobPrepTask = new JobPreparationTask { CommandLine = $@"/bin/bash -c ""cd $AZ_BATCH_NODE_SHARED_DIR && tar -xvf $AZ_BATCH_JOB_PREP_WORKING_DIR/{ApplicationFile}""", WaitForSuccess = true, ResourceFiles = new[] { application } }; Console.WriteLine("Creating job preparation task:"); Console.WriteLine($" CommandLine: {jobPrepTask.CommandLine}"); Console.WriteLine($" ResourceFiles:"); foreach (var f in jobPrepTask.ResourceFiles) { Console.WriteLine($" {f.HttpUrl}"); } Console.WriteLine(); await CreateJob(batchClient, jobId, poolId, jobPrepTask : jobPrepTask); PromptContinue(); Console.WriteLine("\n>>> Creating Tasks <<<\n"); var executable = "dotnet $AZ_BATCH_NODE_SHARED_DIR/process-data.dll"; await AddAllTasksToJob(batchClient, jobId, executable, blobClient, outputContainerSasUrl); Console.WriteLine("\n>>> Monitor tasks <<<\n"); var tasksSucceeded = await MonitorTasks(batchClient, jobId, TimeSpan.FromMinutes(60)); // Clean up Batch resources (if the user so chooses) Console.WriteLine(); Console.Write("Delete job? [yes] no: "); string response = Console.ReadLine().ToLower(); if (response != "n" && response != "no") { batchClient.JobOperations.DeleteJob(jobId); } Console.Write("Delete pool? [yes] no: "); response = Console.ReadLine().ToLower(); if (response != "n" && response != "no") { batchClient.PoolOperations.DeletePool(poolId); } } }