/// <summary> /// Waits for all tasks under the specified job to complete and then prints each task's output to the console. /// </summary> /// <param name="batchClient">The BatchClient to use when interacting with the Batch service.</param> /// <param name="jobId">The ID of the job.</param> /// <returns>An asynchronous <see cref="Task"/> representing the operation.</returns> private static async Task WaitForJobAndPrintOutputAsync(BatchClient batchClient, string jobId) { Console.WriteLine("Waiting for all tasks to complete on job: {0} ...", jobId); // We use the task state monitor to monitor the state of our tasks -- in this case we will wait for them all to complete. TaskStateMonitor taskStateMonitor = batchClient.Utilities.CreateTaskStateMonitor(); List <CloudTask> ourTasks = await batchClient.JobOperations.ListTasks(jobId).ToListAsync(); // Wait for all tasks to reach the completed state. // If the pool is being resized then enough time is needed for the nodes to reach the idle state in order // for tasks to run on them. bool timedOut = await taskStateMonitor.WhenAllAsync(ourTasks, TaskState.Completed, TimeSpan.FromMinutes(10)); if (timedOut) { throw new TimeoutException("Timed out waiting for tasks"); } // dump task output foreach (CloudTask t in ourTasks) { Console.WriteLine("Task {0}", t.Id); //Read the standard out of the task NodeFile standardOutFile = await t.GetNodeFileAsync(Constants.StandardOutFileName); string standardOutText = await standardOutFile.ReadAsStringAsync(); Console.WriteLine("Standard out:"); Console.WriteLine(standardOutText); Console.WriteLine(); } }
/// <summary> /// Waits for all tasks under the specified job to complete and then prints each task's output to the console. /// </summary> /// <param name="batchClient">The BatchClient to use when interacting with the Batch service.</param> /// <param name="tasks">The tasks to wait for.</param> /// <param name="timeout">The timeout. After this time has elapsed if the job is not complete and exception will be thrown.</param> /// <returns>An asynchronous <see cref="Task"/> representing the operation.</returns> public static async Task WaitForTasksAndPrintOutputAsync(BatchClient batchClient, IEnumerable <CloudTask> tasks, TimeSpan timeout) { // We use the task state monitor to monitor the state of our tasks -- in this case we will wait for them all to complete. TaskStateMonitor taskStateMonitor = batchClient.Utilities.CreateTaskStateMonitor(); // Wait until the tasks are in completed state. List <CloudTask> ourTasks = tasks.ToList(); await taskStateMonitor.WhenAll(ourTasks, TaskState.Completed, timeout).ConfigureAwait(continueOnCapturedContext: false); // dump task output foreach (CloudTask t in ourTasks) { Console.WriteLine("Task {0}", t.Id); //Read the standard out of the task NodeFile standardOutFile = await t.GetNodeFileAsync(Constants.StandardOutFileName).ConfigureAwait(continueOnCapturedContext: false); string standardOutText = await standardOutFile.ReadAsStringAsync().ConfigureAwait(continueOnCapturedContext: false); Console.WriteLine("Standard out:"); Console.WriteLine(standardOutText); //Read the standard error of the task NodeFile standardErrorFile = await t.GetNodeFileAsync(Constants.StandardErrorFileName).ConfigureAwait(continueOnCapturedContext: false); string standardErrorText = await standardErrorFile.ReadAsStringAsync().ConfigureAwait(continueOnCapturedContext: false); Console.WriteLine("Standard error:"); Console.WriteLine(standardErrorText); Console.WriteLine(); } }
/// <summary> /// Monitors the specified task for completion and whether errors occurred. /// </summary> /// <param name="batchClient">A BatchClient object.</param> /// <param name="jobId">ID of the job containing the task to be monitored.</param> /// <param name="taskId">ID of the task to be monitored.</param> /// <param name="timeout">The period of time to wait for the tasks to reach the completed state.</param> private static async Task MonitorSpecificTaskToCompleteAsync(BatchClient batchClient, string jobId, string taskId, TimeSpan timeout) { // List the task which we track ODATADetailLevel detail = new ODATADetailLevel(selectClause: "id", filterClause: $"id eq '{taskId}'"); List <CloudTask> monitoredCloudTasks = await batchClient.JobOperations.ListTasks(jobId, detail).ToListAsync(); // Task Monitor will be watching a single task TaskStateMonitor monitor = batchClient.Utilities.CreateTaskStateMonitor(); try { // Waiting for the task to get to state Completed await monitor.WhenAll(monitoredCloudTasks, TaskState.Completed, timeout); } catch (Exception e) { Shared.Logger.Error($"AzureFitness.MonitorSpecificTaskToCompleteAsync(): {e.Message}"); throw; } // All tasks have reached the "Completed" state, however, this does not guarantee all tasks completed successfully. // Here we further check for any tasks with an execution result of "Failure". // Update the detail level to populate only the executionInfo property. detail.SelectClause = "executionInfo"; // Filter for tasks with 'Failure' result. detail.FilterClause = "executionInfo/result eq 'Failure'"; List <CloudTask> failedTasks = await batchClient.JobOperations.ListTasks(jobId, detail).ToListAsync(); if (failedTasks.Any()) { Shared.Logger.Error($"{taskId} failed."); } }
private async Task <string> WaitForReducerTaskToCompleteAsync(BatchClient batchClient) { //Get the bound reducer task and monitor it for completion. CloudTask boundReducerTask = await batchClient.JobOperations.GetTaskAsync(this.jobId, Constants.ReducerTaskId); TaskStateMonitor taskStateMonitor = batchClient.Utilities.CreateTaskStateMonitor(); string stdOut; try { await taskStateMonitor.WhenAll(new List <CloudTask> { boundReducerTask }, TaskState.Completed, TimeSpan.FromMinutes(2)); } catch (TimeoutException) { Console.WriteLine("Reducer task did not complete within expected timeout."); throw; } finally { //Refresh the reducer task to get the most recent information about it from the Batch Service. await boundReducerTask.RefreshAsync(); //Dump the reducer tasks exit code and scheduling error for debugging purposes. stdOut = await Helpers.CheckForTaskSuccessAsync(boundReducerTask, dumpStandardOutOnTaskSuccess : true); } return(stdOut); }
public void Bug2338301_CheckStreamPositionAfterFileRead() { Action test = () => { using (BatchClient batchCli = TestUtilities.OpenBatchClientAsync(TestUtilities.GetCredentialsFromEnvironment()).Result) { JobOperations jobOperations = batchCli.JobOperations; { string jobId = "Bug2338301Job-" + TestUtilities.GetMyName(); try { const string taskId = "hiWorld"; // // Create the job // CloudJob unboundJob = jobOperations.CreateJob(jobId, new PoolInformation() { PoolId = this.poolFixture.PoolId }); unboundJob.Commit(); CloudJob boundJob = jobOperations.GetJob(jobId); CloudTask myTask = new CloudTask(taskId, "cmd /c echo hello world"); boundJob.AddTask(myTask); this.testOutputHelper.WriteLine("Initial job commit()"); // // Wait for task to go to completion // Utilities utilities = batchCli.Utilities; TaskStateMonitor taskStateMonitor = utilities.CreateTaskStateMonitor(); taskStateMonitor.WaitAll( boundJob.ListTasks(), Microsoft.Azure.Batch.Common.TaskState.Completed, TimeSpan.FromMinutes(3)); CloudTask boundTask = boundJob.GetTask(taskId); //Get the task file const string fileToGet = "stdout.txt"; NodeFile file = boundTask.GetNodeFile(fileToGet); //Download the file data string result = file.ReadAsString(); Assert.True(result.Length > 0); } finally { jobOperations.DeleteJob(jobId); } } } }; SynchronizationContextHelper.RunTest(test, TestTimeout); }
public async Task TaskStateMonitorCancelled_ThrowsCancellationException() { TimeSpan timeout = TimeSpan.FromSeconds(0); const string dummyJobId = "Dummy"; using (BatchClient batchCli = BatchClient.Open(ClientUnitTestCommon.CreateDummySharedKeyCredential())) { List <string> taskIds = new List <string>() { "task1", "task2" }; //Create some tasks which are "bound" IEnumerable <Protocol.Models.CloudTask> protocolTasks = taskIds.Select(CreateProtocolCloudTask); IEnumerable <CloudTask> taskList = protocolTasks.Select(protoTask => CreateBoundCloudTask(batchCli, dummyJobId, protoTask)); TaskStateMonitor taskStateMonitor = batchCli.Utilities.CreateTaskStateMonitor(); using (CancellationTokenSource cts = new CancellationTokenSource(timeout)) { await Assert.ThrowsAsync <OperationCanceledException>(async() => await taskStateMonitor.WhenAll( taskList, TaskState.Completed, cts.Token, additionalBehaviors: InterceptorFactory.CreateListTasksRequestInterceptor(protocolTasks))); } } }
public async Task TaskStateMonitorTimedOut_ThrowsTimeoutException() { TimeSpan timeout = TimeSpan.FromSeconds(0); const string dummyJobId = "Dummy"; using (BatchClient batchCli = BatchClient.Open(ClientUnitTestCommon.CreateDummySharedKeyCredential())) { List <string> taskIds = new List <string>() { "task1", "task2" }; //Create some tasks which are "bound" IEnumerable <Protocol.Models.CloudTask> protocolTasks = taskIds.Select(CreateProtocolCloudTask); IEnumerable <CloudTask> taskList = protocolTasks.Select(protoTask => CreateBoundCloudTask(batchCli, dummyJobId, protoTask)); TaskStateMonitor taskStateMonitor = batchCli.Utilities.CreateTaskStateMonitor(); TimeoutException e = await Assert.ThrowsAsync <TimeoutException>(async() => await taskStateMonitor.WhenAll( taskList, TaskState.Completed, timeout, additionalBehaviors: InterceptorFactory.CreateListTasksRequestInterceptor(protocolTasks))); Assert.Contains(string.Format("waiting for resources after {0}", timeout), e.Message); Assert.IsType <OperationCanceledException>(e.InnerException); } }
private async Task <string> WaitForReducerTaskToCompleteAsync(BatchClient batchClient) { //Get the bound reducer task and monitor it for completion. CloudTask boundReducerTask = await batchClient.JobOperations.GetTaskAsync(this.jobId, Constants.ReducerTaskId); TaskStateMonitor taskStateMonitor = batchClient.Utilities.CreateTaskStateMonitor(); bool timedOut = await taskStateMonitor.WaitAllAsync(new List <CloudTask> { boundReducerTask }, TaskState.Completed, TimeSpan.FromMinutes(2)); //Refresh the reducer task to get the most recent information about it from the Batch Service. await boundReducerTask.RefreshAsync(); //Dump the reducer tasks exit code and scheduling error for debugging purposes. string stdOut = await Helpers.CheckForTaskSuccessAsync(boundReducerTask, dumpStandardOutOnTaskSuccess : true); //Handle the possibilty that the reducer task did not complete in the expected timeout. if (timedOut) { const string errorMessage = "Reducer task did not complete within expected timeout."; Console.WriteLine("Task {0} is in state: {1}", boundReducerTask.Id, boundReducerTask.State); Console.WriteLine(errorMessage); throw new TimeoutException(errorMessage); } return(stdOut); }
/// <summary> /// Monitors the specified tasks for completion and whether errors occurred. /// </summary> /// <param name="batchClient">A BatchClient object.</param> /// <param name="jobId">ID of the job containing the tasks to be monitored.</param> /// <param name="timeout">The period of time to wait for the tasks to reach the completed state.</param> private static async Task <bool> MonitorTasks(BatchClient batchClient, string jobId, TimeSpan timeout) { bool allTasksSuccessful = true; const string completeMessage = "All tasks reached state Completed."; const string incompleteMessage = "One or more tasks failed to reach the Completed state within the timeout period."; const string successMessage = "Success! All tasks completed successfully. Output files uploaded to output container."; const string failureMessage = "One or more tasks failed."; // Obtain the collection of tasks currently managed by the job. // Use a detail level to specify that only the "id" property of each task should be populated. // See https://docs.microsoft.com/en-us/azure/batch/batch-efficient-list-queries ODATADetailLevel detail = new ODATADetailLevel(selectClause: "id"); List <CloudTask> addedTasks = await batchClient.JobOperations.ListTasks(jobId, detail).ToListAsync(); Console.WriteLine("Monitoring all tasks for 'Completed' state, timeout in {0}...", timeout.ToString()); // We use a TaskStateMonitor to monitor the state of our tasks. In this case, we will wait for all tasks to // reach the Completed state. TaskStateMonitor taskStateMonitor = batchClient.Utilities.CreateTaskStateMonitor(); try { await taskStateMonitor.WhenAll(addedTasks, TaskState.Completed, timeout); } catch (TimeoutException) { await batchClient.JobOperations.TerminateJobAsync(jobId); Console.WriteLine(incompleteMessage); return(false); } await batchClient.JobOperations.TerminateJobAsync(jobId); Console.WriteLine(completeMessage); // All tasks have reached the "Completed" state, however, this does not guarantee all tasks completed successfully. // Here we further check for any tasks with an execution result of "Failure". // Update the detail level to populate only the executionInfo property. detail.SelectClause = "executionInfo"; // Filter for tasks with 'Failure' result. detail.FilterClause = "executionInfo/result eq 'Failure'"; List <CloudTask> failedTasks = await batchClient.JobOperations.ListTasks(jobId, detail).ToListAsync(); if (failedTasks.Any()) { allTasksSuccessful = false; Console.WriteLine(failureMessage); } else { Console.WriteLine(successMessage); } return(allTasksSuccessful); }
/// <summary> /// Waits for the specified task to complete /// </summary> public static void WaitForTaskCompletion(BatchController controller, BatchAccountContext context, string jobId, string taskId) { BatchClient client = new BatchClient(controller.BatchManagementClient, controller.ResourceManagementClient); ListTaskOptions options = new ListTaskOptions(context, jobId, null) { TaskId = taskId }; IEnumerable <PSCloudTask> tasks = client.ListTasks(options); // Save time by not waiting during playback scenarios if (HttpMockServer.Mode == HttpRecorderMode.Record) { TaskStateMonitor monitor = context.BatchOMClient.Utilities.CreateTaskStateMonitor(); monitor.WaitAll(tasks.Select(t => t.omObject), TaskState.Completed, TimeSpan.FromMinutes(10), null); } }
private async Task WaitForMapperTasksToCompleteAsync(BatchClient batchClient) { Console.WriteLine("Waiting for the mapper tasks to complete..."); //List all the mapper tasks using an id filter. DetailLevel mapperTaskIdFilter = new ODATADetailLevel() { FilterClause = string.Format("startswith(id, '{0}')", Constants.MapperTaskPrefix) }; IEnumerable <CloudTask> tasksToMonitor = batchClient.JobOperations.ListTasks( this.jobId, detailLevel: mapperTaskIdFilter); // Use the task state monitor to wait for the tasks to complete. Monitoring the tasks // for completion is necessary if you are using KillJobOnCompletion = TRUE, as otherwise when the job manager // exits it will kill all of the tasks that are still running under the job. TaskStateMonitor taskStateMonitor = batchClient.Utilities.CreateTaskStateMonitor(); bool timedOut = await taskStateMonitor.WaitAllAsync(tasksToMonitor, TaskState.Completed, TimeSpan.FromMinutes(5)); //Get the list of mapper tasks in order to analyze their state and ensure they completed successfully. IPagedEnumerable <CloudTask> asyncEnumerable = batchClient.JobOperations.ListTasks( this.jobId, detailLevel: mapperTaskIdFilter); await asyncEnumerable.ForEachAsync(async cloudTask => { Console.WriteLine("Task {0} is in state: {1}", cloudTask.Id, cloudTask.State); await Helpers.CheckForTaskSuccessAsync(cloudTask, dumpStandardOutOnTaskSuccess: false); Console.WriteLine(); }); //If not all the tasks reached the desired state within the timeout then the job manager //cannot continue. if (timedOut) { const string errorMessage = "Mapper tasks did not complete within expected timeout."; Console.WriteLine(errorMessage); throw new TimeoutException(errorMessage); } }
/// <summary> /// Create a job and add two simple tasks to it. Wait for completion using the Task state monitor /// </summary> private static void AddJobTwoTasks(BatchClient client, string sharedPoolId) { string jobId = CreateJobId("HelloWorldTwoTaskJob"); Console.WriteLine("Creating job: " + jobId); CloudJob boundJob = CreateBoundJob(client.JobOperations, sharedPoolId, jobId); // add 2 quick tasks. Each task within a job must have a unique ID List <CloudTask> tasksToRun = new List <CloudTask>(2); tasksToRun.Add(new CloudTask("task1", "hostname")); tasksToRun.Add(new CloudTask("task2", "cmd /c dir /s")); client.JobOperations.AddTask(boundJob.Id, tasksToRun); Console.WriteLine("Waiting for all tasks to complete on Job: {0} ...", boundJob.Id); //We use the task state monitor to monitor the state of our tasks -- in this case we will wait for them all to complete. TaskStateMonitor taskStateMonitor = client.Utilities.CreateTaskStateMonitor(); // blocking wait on the list of tasks until all tasks reach completed state or the timeout is reached. // If the pool is being resized then enough time is needed for the VMs to reach the idle state in order // for tasks to run on them. IPagedEnumerable <CloudTask> ourTasks = boundJob.ListTasks(); bool timedOut = taskStateMonitor.WaitAll(ourTasks, TaskState.Completed, new TimeSpan(0, 10, 0)); if (timedOut) { throw new TimeoutException("Timed out waiting for tasks"); } // dump task output foreach (CloudTask t in ourTasks) { Console.WriteLine("Task " + t.Id); Console.WriteLine("stdout:\n" + t.GetNodeFile(Constants.StandardOutFileName).ReadAsString()); Console.WriteLine("\nstderr:\n" + t.GetNodeFile(Constants.StandardErrorFileName).ReadAsString()); } //Delete the job to ensure the tasks are cleaned up Console.WriteLine("Deleting job: {0}", boundJob.Id); client.JobOperations.DeleteJob(boundJob.Id); }
public void TestContainerTask() { void test() { using BatchClient client = TestUtilities.OpenBatchClient(TestUtilities.GetCredentialsFromEnvironment()); string jobId = "ContainerJob" + TestUtilities.GetMyName(); try { CloudJob job = client.JobOperations.CreateJob(jobId, new PoolInformation { PoolId = poolFixture.PoolId }); job.Commit(); CloudTask newTask = new CloudTask("a", "cat /etc/centos-release") { ContainerSettings = new TaskContainerSettings("centos") }; client.JobOperations.AddTask(jobId, newTask); IPagedEnumerable <CloudTask> tasks = client.JobOperations.ListTasks(jobId); TaskStateMonitor monitor = client.Utilities.CreateTaskStateMonitor(); monitor.WaitAll(tasks, TaskState.Completed, TimeSpan.FromMinutes(7)); CloudTask task = tasks.Single(); task.Refresh(); Assert.Equal("ContainerPoolNotSupported", task.ExecutionInformation.FailureInformation.Code); } finally { TestUtilities.DeleteJobIfExistsAsync(client, jobId).Wait(); } } SynchronizationContextHelper.RunTest(test, TimeSpan.FromMinutes(10)); }
public void Bug1665834TaskStateMonitor() { void test() { using BatchClient batchCli = TestUtilities.OpenBatchClient(TestUtilities.GetCredentialsFromEnvironment()); string jobId = "Bug1665834Job-" + TestUtilities.GetMyName(); try { CloudJob unboundJob = batchCli.JobOperations.CreateJob(jobId, new PoolInformation()); unboundJob.PoolInformation.PoolId = poolFixture.PoolId; unboundJob.Commit(); CloudJob boundJob = batchCli.JobOperations.GetJob(jobId); // add some noise tasks for (int j = 0; j < 5; j++) { CloudTask unboundTaskQuick = new CloudTask((10 + j).ToString(), "cmd /c hostname"); boundJob.AddTask(unboundTaskQuick); } Thread.Sleep(5000); // wait for fast tasks to complete { bool repeat = true; while (repeat) { CloudPool boundPool = batchCli.PoolOperations.GetPool(poolFixture.PoolId); repeat = false; foreach (CloudTask curTask in boundJob.ListTasks()) { if (curTask.State != TaskState.Completed) { repeat = true; testOutputHelper.WriteLine("Manual Wait Task Id: " + curTask.Id + ", state = " + curTask.State); testOutputHelper.WriteLine(" poolstate: " + boundPool.State + ", currentdedicated: " + boundPool.CurrentDedicatedComputeNodes); testOutputHelper.WriteLine(" compute nodes:"); foreach (ComputeNode curComputeNode in boundPool.ListComputeNodes()) { testOutputHelper.WriteLine(" computeNode.Id: " + curComputeNode.Id + ", state: " + curComputeNode.State); } } } } } // add some longer running tasks testOutputHelper.WriteLine("Adding longer running tasks"); for (int i = 0; i < 15; i++) { CloudTask unboundTask = new CloudTask(i.ToString() + "_a234567890a234567890a234567890a234567890a234567890a234567890", "cmd /c ping 127.0.0.1 -n 4"); boundJob.AddTask(unboundTask); } Utilities utilities = batchCli.Utilities; TaskStateMonitor tsm = utilities.CreateTaskStateMonitor(); IPagedEnumerable <CloudTask> taskList = boundJob.ListTasks(); // try to set really low delay ODATAMonitorControl odmc = new ODATAMonitorControl { DelayBetweenDataFetch = new TimeSpan(0) }; // confirm the floor is enforced Assert.Equal(500, odmc.DelayBetweenDataFetch.Milliseconds); testOutputHelper.WriteLine("Calling TaskStateMonitor.WaitAll(). This will take a while."); TimeSpan timeToWait = TimeSpan.FromMinutes(5); Task whenAll = tsm.WhenAll(taskList, TaskState.Completed, timeToWait, controlParams: odmc); //This could throw, if it does the test will fail, which is what we want whenAll.Wait(); foreach (CloudTask curTask in boundJob.ListTasks()) { Assert.Equal(TaskState.Completed, curTask.State); } } finally { // cleanup TestUtilities.DeleteJobIfExistsAsync(batchCli, jobId).Wait(); } } SynchronizationContextHelper.RunTest(test, TestTimeout); }
public void TestGetNodeFileByTask() { Action test = () => { using (BatchClient batchCli = TestUtilities.OpenBatchClientAsync(TestUtilities.GetCredentialsFromEnvironment()).Result) { JobOperations jobOperations = batchCli.JobOperations; string jobId = Constants.DefaultConveniencePrefix + TestUtilities.GetMyName() + "-" + nameof(TestGetNodeFileByTask); try { // // Create the job // CloudJob job = jobOperations.CreateJob(jobId, new PoolInformation()); job.PoolInformation = new PoolInformation() { PoolId = this.poolFixture.PoolId }; this.testOutputHelper.WriteLine("Initial job schedule commit()"); job.Commit(); // // Wait for the job // this.testOutputHelper.WriteLine("Waiting for job"); CloudJob boundJob = jobOperations.GetJob(jobId); // // Add task to the job // const string taskId = "T1"; const string taskMessage = "This is a test"; this.testOutputHelper.WriteLine("Adding task: {0}", taskId); CloudTask task = new CloudTask(taskId, string.Format("cmd /c echo {0}", taskMessage)); boundJob.AddTask(task); // // Wait for the task to complete // this.testOutputHelper.WriteLine("Waiting for the task to complete"); Utilities utilities = batchCli.Utilities; TaskStateMonitor taskStateMonitor = utilities.CreateTaskStateMonitor(); //Wait for the task state to be running taskStateMonitor.WaitAll( jobOperations.ListTasks(jobId), TaskState.Completed, TimeSpan.FromSeconds(30)); //Download the data this.testOutputHelper.WriteLine("Downloading the stdout for the file"); NodeFile file = jobOperations.GetNodeFile(jobId, taskId, Constants.StandardOutFileName); string data = file.ReadAsString(); this.testOutputHelper.WriteLine("Data: {0}", data); Assert.Contains(taskMessage, data); // Download the data again using the JobOperations read file content helper data = batchCli.JobOperations.CopyNodeFileContentToString(jobId, taskId, Constants.StandardOutFileName); this.testOutputHelper.WriteLine("Data: {0}", data); Assert.Contains(taskMessage, data); } finally { jobOperations.DeleteJob(jobId); } } }; SynchronizationContextHelper.RunTest(test, TestTimeout); }
public void Bug1480491NodeFileFileProperties() { Action test = () => { using (BatchClient batchCli = TestUtilities.OpenBatchClientAsync(TestUtilities.GetCredentialsFromEnvironment()).Result) { string jobId = "Bug1480491Job-" + TestUtilities.GetMyName(); try { const string taskId = "hiWorld"; // // Create the job // CloudJob unboundJob = batchCli.JobOperations.CreateJob(jobId, new PoolInformation()); unboundJob.PoolInformation.PoolId = this.poolFixture.PoolId; unboundJob.Commit(); CloudJob boundJob = batchCli.JobOperations.GetJob(jobId); CloudTask myTask = new CloudTask(taskId, "cmd /c echo hello world"); boundJob.AddTask(myTask); this.testOutputHelper.WriteLine("Initial job commit()"); // // Wait for task to go to completion // Utilities utilities = batchCli.Utilities; TaskStateMonitor taskStateMonitor = utilities.CreateTaskStateMonitor(); taskStateMonitor.WaitAll( boundJob.ListTasks(), Microsoft.Azure.Batch.Common.TaskState.Completed, TimeSpan.FromMinutes(3)); const int expectedFileSize = 13; //Magic number based on output generated by the task // // NodeFile by task // NodeFile file = batchCli.JobOperations.GetNodeFile(jobId, taskId, Constants.StandardOutFileName); this.testOutputHelper.WriteLine("File {0} has content length: {1}", Constants.StandardOutFileName, file.Properties.ContentLength); this.testOutputHelper.WriteLine("File {0} has content type: {1}", Constants.StandardOutFileName, file.Properties.ContentType); this.testOutputHelper.WriteLine("File {0} has creation time: {1}", Constants.StandardOutFileName, file.Properties.CreationTime); this.testOutputHelper.WriteLine("File {0} has last modified time: {1}", Constants.StandardOutFileName, file.Properties.LastModified); Assert.Equal(expectedFileSize, file.Properties.ContentLength); Assert.Equal("text/plain", file.Properties.ContentType); // // NodeFile by node // CloudTask boundTask = boundJob.GetTask(taskId); string computeNodeId = boundTask.ComputeNodeInformation.AffinityId.Split(':')[1]; ComputeNode computeNode = batchCli.PoolOperations.GetComputeNode(this.poolFixture.PoolId, computeNodeId); this.testOutputHelper.WriteLine("Task ran on compute node: {0}", computeNodeId); List <NodeFile> files = computeNode.ListNodeFiles(recursive: true).ToList(); foreach (NodeFile nodeFile in files) { this.testOutputHelper.WriteLine("Found file: {0}", nodeFile.Path); } string filePathToGet = string.Format("workitems/{0}/{1}/{2}/{3}", jobId, "job-1", taskId, Constants.StandardOutFileName); file = computeNode.GetNodeFile(filePathToGet); this.testOutputHelper.WriteLine("File {0} has content length: {1}", filePathToGet, file.Properties.ContentLength); this.testOutputHelper.WriteLine("File {0} has content type: {1}", filePathToGet, file.Properties.ContentType); this.testOutputHelper.WriteLine("File {0} has creation time: {1}", filePathToGet, file.Properties.CreationTime); this.testOutputHelper.WriteLine("File {0} has last modified time: {1}", filePathToGet, file.Properties.LastModified); Assert.Equal(expectedFileSize, file.Properties.ContentLength); Assert.Equal("text/plain", file.Properties.ContentType); } finally { batchCli.JobOperations.DeleteJob(jobId); } } }; SynchronizationContextHelper.RunTest(test, TestTimeout); }
public void Bug1480489NodeFileMissingIsDirectory() { Action test = () => { using (BatchClient batchCli = TestUtilities.OpenBatchClientAsync(TestUtilities.GetCredentialsFromEnvironment()).Result) { string jobId = "Bug1480489Job-" + TestUtilities.GetMyName(); try { // here we show how to use an unbound Job + Commit() to run millions of Tasks CloudJob unboundJob = batchCli.JobOperations.CreateJob(jobId, new PoolInformation() { PoolId = this.poolFixture.PoolId }); unboundJob.Commit(); // Open the new Job as bound. CloudJob boundJob = batchCli.JobOperations.GetJob(jobId); CloudTask myTask = new CloudTask(id: "Bug1480489Task", commandline: @"md Bug1480489Directory"); // add the task to the job boundJob.AddTask(myTask); // wait for the task to complete Utilities utilities = batchCli.Utilities; TaskStateMonitor taskStateMonitor = utilities.CreateTaskStateMonitor(); taskStateMonitor.WaitAll( boundJob.ListTasks(), Microsoft.Azure.Batch.Common.TaskState.Completed, TimeSpan.FromMinutes(3)); CloudTask myCompletedTask = new List <CloudTask>(boundJob.ListTasks(null))[0]; string stdOut = myCompletedTask.GetNodeFile(Constants.StandardOutFileName).ReadAsString(); string stdErr = myCompletedTask.GetNodeFile(Constants.StandardErrorFileName).ReadAsString(); this.testOutputHelper.WriteLine("TaskId: " + myCompletedTask.Id); this.testOutputHelper.WriteLine("StdOut: "); this.testOutputHelper.WriteLine(stdOut); this.testOutputHelper.WriteLine("StdErr: "); this.testOutputHelper.WriteLine(stdErr); this.testOutputHelper.WriteLine("Task Files:"); bool foundAtLeastOneDir = false; foreach (NodeFile curFile in myCompletedTask.ListNodeFiles()) { this.testOutputHelper.WriteLine(" Filepath: " + curFile.Path); this.testOutputHelper.WriteLine(" IsDirectory: " + curFile.IsDirectory.ToString()); // turns out wd is created for each task so use it as sentinal if (curFile.Path.Equals("wd") && curFile.IsDirectory.HasValue && curFile.IsDirectory.Value) { foundAtLeastOneDir = true; } } Assert.True(foundAtLeastOneDir); } finally { TestUtilities.DeleteJobIfExistsAsync(batchCli, jobId).Wait(); } } }; SynchronizationContextHelper.RunTest(test, TestTimeout); }
public void TestNode_GetListDeleteFiles() { Action test = () => { using (BatchClient batchCli = TestUtilities.OpenBatchClientAsync(TestUtilities.GetCredentialsFromEnvironment()).Result) { string jobId = "TestNodeGetListDeleteFiles-" + TestUtilities.GetMyName(); try { const string taskId = "hiWorld"; const string directoryCreationTaskId1 = "dirTask1"; const string directoryCreationTaskId2 = "dirTask2"; const string directoryNameOne = "Foo"; const string directoryNameTwo = "Bar"; // // Create the job // CloudJob unboundJob = batchCli.JobOperations.CreateJob(jobId, new PoolInformation()); unboundJob.PoolInformation.PoolId = this.poolFixture.PoolId; unboundJob.Commit(); CloudJob boundJob = batchCli.JobOperations.GetJob(jobId); CloudTask myTask = new CloudTask(taskId, "cmd /c echo hello world"); CloudTask directoryCreationTask1 = new CloudTask(directoryCreationTaskId1, string.Format("cmd /c mkdir {0} && echo test > {0}/testfile.txt", directoryNameOne)); CloudTask directoryCreationTask2 = new CloudTask(directoryCreationTaskId2, string.Format("cmd /c mkdir {0} && echo test > {0}/testfile.txt", directoryNameTwo)); boundJob.AddTask(myTask); boundJob.AddTask(directoryCreationTask1); boundJob.AddTask(directoryCreationTask2); this.testOutputHelper.WriteLine("Initial job commit()"); // // Wait for task to go to completion // Utilities utilities = batchCli.Utilities; TaskStateMonitor taskStateMonitor = utilities.CreateTaskStateMonitor(); taskStateMonitor.WaitAll( boundJob.ListTasks(), Microsoft.Azure.Batch.Common.TaskState.Completed, TimeSpan.FromMinutes(3)); CloudTask boundTask = boundJob.GetTask(taskId); //Since the compute node name comes back as "Node:<computeNodeId>" we need to split on : to get the actual compute node name string computeNodeId = boundTask.ComputeNodeInformation.AffinityId.Split(':')[1]; ComputeNode computeNode = batchCli.PoolOperations.GetComputeNode(this.poolFixture.PoolId, computeNodeId); this.testOutputHelper.WriteLine("Task ran on compute node: {0}", computeNodeId); //Ensure that ListFiles done without a recursive option, or with recursive false return the same values { List <NodeFile> filesByComputeNodeRecursiveOmitted = batchCli.PoolOperations.ListNodeFiles( this.poolFixture.PoolId, computeNodeId).ToList(); List <NodeFile> filesByComputeNodeRecursiveFalse = batchCli.PoolOperations.ListNodeFiles( this.poolFixture.PoolId, computeNodeId, recursive: false).ToList(); AssertFileListsMatch(filesByComputeNodeRecursiveOmitted, filesByComputeNodeRecursiveFalse); } { List <NodeFile> filesByTaskRecursiveOmitted = batchCli.JobOperations.ListNodeFiles( jobId, taskId).ToList(); List <NodeFile> filesByTaskRecursiveFalse = batchCli.JobOperations.ListNodeFiles( jobId, taskId, recursive: false).ToList(); AssertFileListsMatch(filesByTaskRecursiveOmitted, filesByTaskRecursiveFalse); } // // List all node files from operations -- recursive true // //TODO: Detail level? List <NodeFile> fileListFromComputeNodeOperations = batchCli.PoolOperations.ListNodeFiles(this.poolFixture.PoolId, computeNodeId, recursive: true).ToList(); foreach (NodeFile f in fileListFromComputeNodeOperations) { this.testOutputHelper.WriteLine("Found file: {0}", f.Path); } //Check to make sure the expected folder named "Shared" exists Assert.Contains("shared", fileListFromComputeNodeOperations.Select(f => f.Path)); // // List all node files from the compute node -- recursive true // List <NodeFile> fileListFromComputeNode = computeNode.ListNodeFiles(recursive: true).ToList(); foreach (NodeFile f in fileListFromComputeNodeOperations) { this.testOutputHelper.WriteLine("Found file: {0}", f.Path); } //Check to make sure the expected folder named "Shared" exists Assert.Contains("shared", fileListFromComputeNode.Select(f => f.Path)); // // Get file from operations // string filePathToGet = fileListFromComputeNode.First(f => !f.IsDirectory.Value && f.Properties.ContentLength > 0).Path; this.testOutputHelper.WriteLine("Getting file: {0}", filePathToGet); NodeFile computeNodeFileFromManager = batchCli.PoolOperations.GetNodeFile(this.poolFixture.PoolId, computeNodeId, filePathToGet); this.testOutputHelper.WriteLine("Successfully retrieved file: {0}", filePathToGet); this.testOutputHelper.WriteLine("---- File data: ----"); var computeNodeFileContentFromManager = computeNodeFileFromManager.ReadAsString(); this.testOutputHelper.WriteLine(computeNodeFileContentFromManager); Assert.NotEmpty(computeNodeFileContentFromManager); // // Get file directly from operations (bypassing the properties call) // var computeNodeFileContentDirect = batchCli.PoolOperations.CopyNodeFileContentToString(this.poolFixture.PoolId, computeNodeId, filePathToGet); this.testOutputHelper.WriteLine("---- File data: ----"); this.testOutputHelper.WriteLine(computeNodeFileContentDirect); Assert.NotEmpty(computeNodeFileContentDirect); // // Get file from compute node // this.testOutputHelper.WriteLine("Getting file: {0}", filePathToGet); NodeFile fileFromComputeNode = computeNode.GetNodeFile(filePathToGet); this.testOutputHelper.WriteLine("Successfully retrieved file: {0}", filePathToGet); this.testOutputHelper.WriteLine("---- File data: ----"); var computeNodeFileContentFromNode = fileFromComputeNode.ReadAsString(); this.testOutputHelper.WriteLine(computeNodeFileContentFromNode); Assert.NotEmpty(computeNodeFileContentFromNode); // // Get file from compute node (bypassing the properties call) // computeNodeFileContentDirect = computeNode.CopyNodeFileContentToString(filePathToGet); this.testOutputHelper.WriteLine("---- File data: ----"); this.testOutputHelper.WriteLine(computeNodeFileContentDirect); Assert.NotEmpty(computeNodeFileContentDirect); // // NodeFile delete // string filePath = Path.Combine(@"workitems", jobId, "job-1", taskId, Constants.StandardOutFileName); NodeFile nodeFile = batchCli.PoolOperations.GetNodeFile(this.poolFixture.PoolId, computeNodeId, filePath); nodeFile.Delete(); //Ensure delete succeeded TestUtilities.AssertThrows <BatchException>(() => nodeFile.Refresh()); //Delete directory NodeFile directory = batchCli.PoolOperations.ListNodeFiles(this.poolFixture.PoolId, computeNodeId, recursive: true).First(item => item.Path.Contains(directoryNameOne)); Assert.True(directory.IsDirectory); TestUtilities.AssertThrows <BatchException>(() => directory.Delete(recursive: false)); directory.Delete(recursive: true); Assert.Null(batchCli.PoolOperations.ListNodeFiles(this.poolFixture.PoolId, computeNodeId, recursive: true).FirstOrDefault(item => item.Path.Contains(directoryNameOne))); // // PoolManager delete node file // filePath = Path.Combine(@"workitems", jobId, "job-1", taskId, Constants.StandardErrorFileName); NodeFile file = batchCli.JobOperations.GetNodeFile(jobId, taskId, Constants.StandardErrorFileName); batchCli.PoolOperations.DeleteNodeFile(this.poolFixture.PoolId, computeNodeId, filePath); //Ensure delete succeeded TestUtilities.AssertThrows <BatchException>(() => batchCli.JobOperations.GetNodeFile(jobId, taskId, Constants.StandardErrorFileName)); //Delete directory directory = batchCli.PoolOperations.ListNodeFiles(this.poolFixture.PoolId, computeNodeId, recursive: true).First(item => item.Path.Contains(directoryNameTwo)); Assert.True(directory.IsDirectory); TestUtilities.AssertThrows <BatchException>(() => batchCli.PoolOperations.DeleteNodeFile(this.poolFixture.PoolId, computeNodeId, directory.Path, recursive: false)); batchCli.PoolOperations.DeleteNodeFile(this.poolFixture.PoolId, computeNodeId, directory.Path, recursive: true); Assert.Null(batchCli.PoolOperations.ListNodeFiles(this.poolFixture.PoolId, computeNodeId, recursive: true).FirstOrDefault(item => item.Path.Contains(directoryNameTwo))); } finally { batchCli.JobOperations.DeleteJob(jobId); } } }; SynchronizationContextHelper.RunTest(test, TestTimeout); }
public void Bug230385SupportDeleteNodeFileByTask() { Action test = () => { using (BatchClient batchCli = TestUtilities.OpenBatchClientAsync(TestUtilities.GetCredentialsFromEnvironment()).Result) { string jobId = "Bug230285Job-" + TestUtilities.GetMyName(); try { const string taskId = "hiWorld"; const string directoryCreationTaskId1 = "dirTask1"; const string directoryCreationTaskId2 = "dirTask2"; const string directoryNameOne = "Foo"; const string directoryNameTwo = "Bar"; const string directory2PathOnNode = "wd/" + directoryNameTwo; // // Create the job // CloudJob unboundJob = batchCli.JobOperations.CreateJob(jobId, new PoolInformation()); unboundJob.PoolInformation.PoolId = this.poolFixture.PoolId; unboundJob.Commit(); CloudJob boundJob = batchCli.JobOperations.GetJob(jobId); CloudTask myTask = new CloudTask(taskId, "cmd /c echo hello world"); CloudTask directoryCreationTask1 = new CloudTask(directoryCreationTaskId1, string.Format("cmd /c mkdir {0} && echo test > {0}/testfile.txt", directoryNameOne)); CloudTask directoryCreationTask2 = new CloudTask(directoryCreationTaskId2, string.Format("cmd /c mkdir {0} && echo test > {0}/testfile.txt", directoryNameTwo)); boundJob.AddTask(myTask); boundJob.AddTask(directoryCreationTask1); boundJob.AddTask(directoryCreationTask2); this.testOutputHelper.WriteLine("Initial job commit()"); // // Wait for task to go to completion // Utilities utilities = batchCli.Utilities; TaskStateMonitor taskStateMonitor = utilities.CreateTaskStateMonitor(); taskStateMonitor.WaitAll( boundJob.ListTasks(), Microsoft.Azure.Batch.Common.TaskState.Completed, TimeSpan.FromMinutes(3)); // // NodeFile delete // //Delete single file NodeFile file = batchCli.JobOperations.GetNodeFile(jobId, taskId, Constants.StandardOutFileName); file.Delete(); //Ensure delete succeeded TestUtilities.AssertThrows <BatchException>(() => batchCli.JobOperations.GetNodeFile(jobId, taskId, Constants.StandardOutFileName)); //Delete directory NodeFile directory = batchCli.JobOperations.ListNodeFiles(jobId, directoryCreationTaskId1, recursive: true).First(item => item.Path.Contains(directoryNameOne)); Assert.True(directory.IsDirectory); TestUtilities.AssertThrows <BatchException>(() => directory.Delete(recursive: false)); directory.Delete(recursive: true); Assert.Null(batchCli.JobOperations.ListNodeFiles(jobId, directoryCreationTaskId1, recursive: true).FirstOrDefault(item => item.Path.Contains(directoryNameOne))); // // JobScheduleOperations delete task file // batchCli.JobOperations.GetNodeFile(jobId, taskId, Constants.StandardErrorFileName); batchCli.JobOperations.DeleteNodeFile(jobId, taskId, Constants.StandardErrorFileName); //Ensure delete succeeded TestUtilities.AssertThrows <BatchException>(() => batchCli.JobOperations.GetNodeFile(jobId, taskId, Constants.StandardErrorFileName)); //Delete directory directory = batchCli.JobOperations.ListNodeFiles(jobId, directoryCreationTaskId2, recursive: true).First(item => item.Path.Contains(directoryNameTwo)); Assert.True(directory.IsDirectory); TestUtilities.AssertThrows <BatchException>(() => batchCli.JobOperations.DeleteNodeFile(jobId, directoryCreationTaskId2, directory2PathOnNode, recursive: false)); batchCli.JobOperations.DeleteNodeFile(jobId, directoryCreationTaskId2, directory2PathOnNode, recursive: true); Assert.Null(batchCli.JobOperations.ListNodeFiles(jobId, directoryCreationTaskId2, recursive: true).FirstOrDefault(item => item.Path.Contains(directoryNameTwo))); } finally { TestUtilities.DeleteJobIfExistsAsync(batchCli, jobId).Wait(); } } }; SynchronizationContextHelper.RunTest(test, TestTimeout); }
public void Bug2329884_ComputeNodeRecentTasksAndComputeNodeError() { Action test = () => { using (BatchClient batchCli = TestUtilities.OpenBatchClientAsync(TestUtilities.GetCredentialsFromEnvironment()).Result) { string jobId = "Bug2329884Job-" + TestUtilities.GetMyName(); Protocol.RequestInterceptor interceptor = null; try { const string taskId = "hiWorld"; // // Create the job // CloudJob unboundJob = batchCli.JobOperations.CreateJob(jobId, new PoolInformation()); unboundJob.PoolInformation.PoolId = this.poolFixture.PoolId; unboundJob.Commit(); CloudJob boundJob = batchCli.JobOperations.GetJob(jobId); CloudTask myTask = new CloudTask(taskId, "cmd /c echo hello world"); boundJob.AddTask(myTask); this.testOutputHelper.WriteLine("Initial job commit()"); // // Wait for task to go to completion // Utilities utilities = batchCli.Utilities; TaskStateMonitor taskStateMonitor = utilities.CreateTaskStateMonitor(); taskStateMonitor.WaitAll( boundJob.ListTasks(), Microsoft.Azure.Batch.Common.TaskState.Completed, new TimeSpan(0, 3 /*min*/, 0)); CloudTask boundTask = boundJob.GetTask(taskId); //Since the compute node name comes back as "Node:<computeNodeId>" we need to split on : to get the actual compute node name string computeNodeId = boundTask.ComputeNodeInformation.AffinityId.Split(':')[1]; // // Check recent tasks // ComputeNode computeNode = batchCli.PoolOperations.GetComputeNode(this.poolFixture.PoolId, computeNodeId); this.testOutputHelper.WriteLine("Recent tasks:"); foreach (TaskInformation recentTask in computeNode.RecentTasks) { this.testOutputHelper.WriteLine("Compute node has recent task Job: {0}, Task: {1}, State: {2}, Subtask: {3}", recentTask.JobId, recentTask.TaskId, recentTask.TaskState, recentTask.SubtaskId); } TaskInformation myTaskInfo = computeNode.RecentTasks.First(taskInfo => taskInfo.JobId.Equals( jobId, StringComparison.InvariantCultureIgnoreCase) && taskInfo.TaskId.Equals(taskId, StringComparison.InvariantCultureIgnoreCase)); Assert.Equal(TaskState.Completed, myTaskInfo.TaskState); Assert.NotNull(myTaskInfo.ExecutionInformation); Assert.Equal(0, myTaskInfo.ExecutionInformation.ExitCode); // // Check compute node Error // const string expectedErrorCode = "TestErrorCode"; const string expectedErrorMessage = "Test error message"; const string nvpValue = "Test"; //We use mocking to return a fake compute node object here to test Compute Node Error because we cannot force one easily interceptor = new Protocol.RequestInterceptor((req => { if (req is ComputeNodeGetBatchRequest) { var typedRequest = req as ComputeNodeGetBatchRequest; typedRequest.ServiceRequestFunc = (token) => { var response = new AzureOperationResponse <Protocol.Models.ComputeNode, Protocol.Models.ComputeNodeGetHeaders>(); List <Protocol.Models.ComputeNodeError> errors = new List <Protocol.Models.ComputeNodeError>(); //Generate first Compute Node Error List <Protocol.Models.NameValuePair> nvps = new List <Protocol.Models.NameValuePair>(); nvps.Add(new Protocol.Models.NameValuePair() { Name = nvpValue, Value = nvpValue }); Protocol.Models.ComputeNodeError error1 = new Protocol.Models.ComputeNodeError(); error1.Code = expectedErrorCode; error1.Message = expectedErrorMessage; error1.ErrorDetails = nvps; errors.Add(error1); //Generate second Compute Node Error nvps = new List <Protocol.Models.NameValuePair>(); nvps.Add(new Protocol.Models.NameValuePair() { Name = nvpValue, Value = nvpValue }); Protocol.Models.ComputeNodeError error2 = new Protocol.Models.ComputeNodeError(); error2.Code = expectedErrorCode; error2.Message = expectedErrorMessage; error2.ErrorDetails = nvps; errors.Add(error2); Protocol.Models.ComputeNode protoComputeNode = new Protocol.Models.ComputeNode(); protoComputeNode.Id = computeNodeId; protoComputeNode.State = Protocol.Models.ComputeNodeState.Idle; protoComputeNode.Errors = errors; response.Body = protoComputeNode; return(Task.FromResult(response)); }; } })); batchCli.PoolOperations.CustomBehaviors.Add(interceptor); computeNode = batchCli.PoolOperations.GetComputeNode(this.poolFixture.PoolId, computeNodeId); Assert.Equal(computeNodeId, computeNode.Id); Assert.NotNull(computeNode.Errors); Assert.Equal(2, computeNode.Errors.Count()); foreach (ComputeNodeError computeNodeError in computeNode.Errors) { Assert.Equal(expectedErrorCode, computeNodeError.Code); Assert.Equal(expectedErrorMessage, computeNodeError.Message); Assert.NotNull(computeNodeError.ErrorDetails); Assert.Equal(1, computeNodeError.ErrorDetails.Count()); Assert.Contains(nvpValue, computeNodeError.ErrorDetails.First().Name); } } finally { batchCli.JobOperations.DeleteJob(jobId); } } }; SynchronizationContextHelper.RunTest(test, TestTimeout); }
public void TestOMJobReleaseSchedulingError() { string jobId = "TestOMJobReleaseSchedulingError-" + CraftTimeString() + "-" + TestUtilities.GetMyName(); Action test = () => { using (BatchClient client = TestUtilities.OpenBatchClient(TestUtilities.GetCredentialsFromEnvironment())) { try { // create job schedule with prep that succeeds and release the triggers scheduling error { PoolInformation poolInfo = new PoolInformation() { PoolId = this.poolFixture.PoolId }; CloudJob unboundJob = client.JobOperations.CreateJob(jobId, poolInfo); // add the jobPrep task to the job { JobPreparationTask prep = new JobPreparationTask("cmd /c echo the quick job prep jumped over the..."); unboundJob.JobPreparationTask = prep; prep.WaitForSuccess = false; // we don't really care but why not set this } // add a jobRelease task to the job { JobReleaseTask relTask = new JobReleaseTask("cmd /c echo Job Release Task"); unboundJob.JobReleaseTask = relTask; ResourceFile[] badResFiles = { ResourceFile.FromUrl("https://not.a.domain.invalid/file", "bob.txt") }; relTask.ResourceFiles = badResFiles; relTask.Id = "jobRelease"; } // add the job to the service unboundJob.Commit(); } // add a trivial task to force the JP client.JobOperations.AddTask(jobId, new CloudTask("ForceJobPrep", "cmd /c echo TestOMJobReleaseSchedulingError")); // wait for the task to complete TaskStateMonitor tsm = client.Utilities.CreateTaskStateMonitor(); tsm.WaitAll( client.JobOperations.ListTasks(jobId), TaskState.Completed, TimeSpan.FromMinutes(10), additionalBehaviors: new[] { // spam/logging interceptor new Protocol.RequestInterceptor((x) => { this.testOutputHelper.WriteLine("Issuing request type: " + x.GetType().ToString()); // print out the compute node states... we are actually waiting on the compute nodes List <ComputeNode> allComputeNodes = client.PoolOperations.ListComputeNodes(this.poolFixture.PoolId).ToList(); this.testOutputHelper.WriteLine(" #compute nodes: " + allComputeNodes.Count); allComputeNodes.ForEach((icn) => { this.testOutputHelper.WriteLine(" computeNode.id: " + icn.Id + ", state: " + icn.State); }); this.testOutputHelper.WriteLine(""); }) } ); // ok terminate job to trigger job release client.JobOperations.TerminateJob(jobId, "BUG: Server will throw 500 if I don't provide reason"); // the victim compute node. pool should have size 1. List <ComputeNode> computeNodes = client.PoolOperations.ListComputeNodes(this.poolFixture.PoolId).ToList(); Assert.Single(computeNodes); // now we have a job that should be trying to run the JP // poll for the JP to have been run, and it must have a scheduling error bool releaseNotCompleted = true; // gotta poll to find out when the jp has been run while (releaseNotCompleted) { List <JobPreparationAndReleaseTaskExecutionInformation> jrStatusList = client.JobOperations.ListJobPreparationAndReleaseTaskStatus(jobId).ToList(); JobPreparationAndReleaseTaskExecutionInformation prepAndReleaseStatus = jrStatusList.FirstOrDefault(); if (prepAndReleaseStatus != null && null != prepAndReleaseStatus.JobReleaseTaskExecutionInformation) { if (JobReleaseTaskState.Completed == prepAndReleaseStatus.JobReleaseTaskExecutionInformation.State) { releaseNotCompleted = false; // we see a JP has been run // now assert the failure info Assert.NotNull(prepAndReleaseStatus); Assert.NotNull(prepAndReleaseStatus.JobReleaseTaskExecutionInformation.FailureInformation); Assert.Equal(TaskExecutionResult.Failure, prepAndReleaseStatus.JobReleaseTaskExecutionInformation.Result); // spew the failure info this.OutputFailureInfo(prepAndReleaseStatus.JobReleaseTaskExecutionInformation.FailureInformation); } } Thread.Sleep(2000); this.testOutputHelper.WriteLine("Job Release tasks still running (waiting for blob dl to timeout)."); } } finally { client.JobOperations.DeleteJob(jobId); } } }; SynchronizationContextHelper.RunTest(test, LongTestTimeout); }
/// <summary> /// Populates Azure Storage with the required files, and /// submits the job to the Azure Batch service. /// </summary> public async Task RunAsync() { Console.WriteLine("Running with the following settings: "); Console.WriteLine("----------------------------------------"); Console.WriteLine(this.textSearchSettings.ToString()); Console.WriteLine(this.accountSettings.ToString()); CloudStorageAccount cloudStorageAccount = new CloudStorageAccount( new StorageCredentials( this.accountSettings.StorageAccountName, this.accountSettings.StorageAccountKey), this.accountSettings.StorageServiceUrl, useHttps: true); //Upload resources if required. if (this.textSearchSettings.ShouldUploadResources) { Console.WriteLine("Splitting file: {0} into {1} subfiles", Constants.TextFilePath, this.textSearchSettings.NumberOfMapperTasks); //Split the text file into the correct number of files for consumption by the mapper tasks. FileSplitter splitter = new FileSplitter(); List <string> mapperTaskFiles = await splitter.SplitAsync( Constants.TextFilePath, this.textSearchSettings.NumberOfMapperTasks); List <string> files = Constants.RequiredExecutableFiles.Union(mapperTaskFiles).ToList(); await SampleHelpers.UploadResourcesAsync( cloudStorageAccount, this.textSearchSettings.BlobContainer, files); } //Generate a SAS for the container. string containerSasUrl = SampleHelpers.ConstructContainerSas( cloudStorageAccount, this.textSearchSettings.BlobContainer); //Set up the Batch Service credentials used to authenticate with the Batch Service. BatchSharedKeyCredentials credentials = new BatchSharedKeyCredentials( this.accountSettings.BatchServiceUrl, this.accountSettings.BatchAccountName, this.accountSettings.BatchAccountKey); using (BatchClient batchClient = await BatchClient.OpenAsync(credentials)) { // // Construct the job properties in local memory before commiting them to the Batch Service. // //Allow enough compute nodes in the pool to run each mapper task, and 1 extra to run the job manager. int numberOfPoolComputeNodes = 1 + this.textSearchSettings.NumberOfMapperTasks; //Define the pool specification for the pool which the job will run on. PoolSpecification poolSpecification = new PoolSpecification() { TargetDedicated = numberOfPoolComputeNodes, VirtualMachineSize = "small", //You can learn more about os families and versions at: //http://azure.microsoft.com/documentation/articles/cloud-services-guestos-update-matrix OSFamily = "4", TargetOSVersion = "*" }; //Use the auto pool feature of the Batch Service to create a pool when the job is created. //This creates a new pool for each job which is added. AutoPoolSpecification autoPoolSpecification = new AutoPoolSpecification() { AutoPoolIdPrefix = "TextSearchPool", KeepAlive = false, PoolLifetimeOption = PoolLifetimeOption.Job, PoolSpecification = poolSpecification }; //Define the pool information for this job -- it will run on the pool defined by the auto pool specification above. PoolInformation poolInformation = new PoolInformation() { AutoPoolSpecification = autoPoolSpecification }; //Define the job manager for this job. This job manager will run first and will submit the tasks for //the job. The job manager is the executable which manages the lifetime of the job //and all tasks which should run for the job. In this case, the job manager submits the mapper and reducer tasks. List <ResourceFile> jobManagerResourceFiles = SampleHelpers.GetResourceFiles(containerSasUrl, Constants.RequiredExecutableFiles); const string jobManagerTaskId = "JobManager"; JobManagerTask jobManagerTask = new JobManagerTask() { ResourceFiles = jobManagerResourceFiles, CommandLine = Constants.JobManagerExecutable, //Determines if the job should terminate when the job manager process exits. KillJobOnCompletion = true, Id = jobManagerTaskId }; //Create the unbound job in local memory. An object which exists only in local memory (and not on the Batch Service) is "unbound". string jobId = Environment.GetEnvironmentVariable("USERNAME") + DateTime.UtcNow.ToString("yyyyMMdd-HHmmss"); CloudJob unboundJob = batchClient.JobOperations.CreateJob(jobId, poolInformation); unboundJob.JobManagerTask = jobManagerTask; //Assign the job manager task to this job try { //Commit the unbound job to the Batch Service. Console.WriteLine("Adding job: {0} to the Batch Service.", unboundJob.Id); await unboundJob.CommitAsync(); //Issues a request to the Batch Service to add the job which was defined above. // // Wait for the job manager task to complete. // //An object which is backed by a corresponding Batch Service object is "bound." CloudJob boundJob = await batchClient.JobOperations.GetJobAsync(jobId); CloudTask boundJobManagerTask = await boundJob.GetTaskAsync(jobManagerTaskId); TimeSpan maxJobCompletionTimeout = TimeSpan.FromMinutes(30); // Monitor the current tasks to see when they are done. // Occasionally a task may get killed and requeued during an upgrade or hardware failure, including the job manager // task. The job manager will be re-run in this case. Robustness against this was not added into the sample for // simplicity, but should be added into any production code. Console.WriteLine("Waiting for job's tasks to complete"); TaskStateMonitor taskStateMonitor = batchClient.Utilities.CreateTaskStateMonitor(); bool timedOut = await taskStateMonitor.WaitAllAsync(new List <CloudTask> { boundJobManagerTask }, TaskState.Completed, maxJobCompletionTimeout); Console.WriteLine("Done waiting for job manager task."); await boundJobManagerTask.RefreshAsync(); //Check to ensure the job manager task exited successfully. await Helpers.CheckForTaskSuccessAsync(boundJobManagerTask, dumpStandardOutOnTaskSuccess : false); if (timedOut) { throw new TimeoutException(string.Format("Timed out waiting for job manager task to complete.")); } // // Download and write out the reducer tasks output // string reducerText = await SampleHelpers.DownloadBlobTextAsync(cloudStorageAccount, this.textSearchSettings.BlobContainer, Constants.ReducerTaskResultBlobName); Console.WriteLine("Reducer reuslts:"); Console.WriteLine(reducerText); } finally { //Delete the job. //This will delete the auto pool associated with the job as long as the pool //keep alive property is set to false. if (this.textSearchSettings.ShouldDeleteJob) { Console.WriteLine("Deleting job {0}", jobId); batchClient.JobOperations.DeleteJob(jobId); } //Note that there were files uploaded to a container specified in the //configuration file. This container will not be deleted or cleaned up by this sample. } } }
public static async Task MainAsync() { const string poolId = "MultiInstanceSamplePool"; const string jobId = "MultiInstanceSampleJob"; const string taskId = "MultiInstanceSampleTask"; const int numberOfNodes = 5; //jmeno package kterou uploaduju na azure s polu s MSMpiSetup const string appPackageId = "Parallel"; const string appPackageVersion = "1.0"; TimeSpan timeout = TimeSpan.FromMinutes(15); AccountSettings accountSettings = SampleHelpers.LoadAccountSettings(); //nakonfigurované batch accounty abych se mohl připojit ke svému účtu BatchSharedKeyCredentials cred = new BatchSharedKeyCredentials( accountSettings.BatchServiceUrl, accountSettings.BatchAccountName, accountSettings.BatchAccountKey); using (BatchClient batchClient = BatchClient.Open(cred)) { // Vytvoření fondu výpočetních uzlů a úlohu, do které přidáme úlohu s více instancemi. await CreatePoolAsync(batchClient, poolId, numberOfNodes, appPackageId, appPackageVersion); await CreateJobAsync(batchClient, jobId, poolId); //batch vytvoří jednu hlavní a několik dílčích úkolů CloudTask multiInstanceTask = new CloudTask(id: taskId, commandline: $"cmd /c mpiexec.exe -c 1 -wdir %AZ_BATCH_TASK_SHARED_DIR% %AZ_BATCH_APP_PACKAGE_{appPackageId.ToUpper()}#{appPackageVersion}%\\ParallelMpiApp.exe"); // příkaz SPMD = více samostatných procesorů současně spouští stejný program multiInstanceTask.MultiInstanceSettings = new MultiInstanceSettings(@"cmd /c start cmd /c smpd.exe -d", numberOfNodes); //zadání úkolů, vytvoří se jeden primární a několik dílčích, //aby odpovídaly počtu uzlů a naplánuje se jejich provedení v uzlech Console.WriteLine($"Adding task [{taskId}] to job [{jobId}]..."); await batchClient.JobOperations.AddTaskAsync(jobId, multiInstanceTask); //verze úlohy CloudTask mainTask = await batchClient.JobOperations.GetTaskAsync(jobId, taskId); // sledování stavu úkolů,čekáme až bude úloha dokončena Console.WriteLine($"Awaiting task completion, timeout in {timeout}..."); TaskStateMonitor taskStateMonitor = batchClient.Utilities.CreateTaskStateMonitor(); await taskStateMonitor.WhenAll(new List <CloudTask> { mainTask }, TaskState.Completed, timeout); //aktualizace úlohy await mainTask.RefreshAsync(); string stdOut = mainTask.GetNodeFile(Constants.StandardOutFileName).ReadAsString(); string stdErr = mainTask.GetNodeFile(Constants.StandardErrorFileName).ReadAsString(); Console.WriteLine(); Console.WriteLine($"Main task [{mainTask.Id}] is in state [{mainTask.State}] and ran on compute node [{mainTask.ComputeNodeInformation.ComputeNodeId}]:"); Console.WriteLine("---- stdout.txt ----"); Console.WriteLine(stdOut); Console.WriteLine("---- stderr.txt ----"); Console.WriteLine(stdErr); // par sekund čas aby se stačily dílčí úlohy dokončit TimeSpan subtaskTimeout = TimeSpan.FromSeconds(10); Console.WriteLine($"Main task completed, waiting {subtaskTimeout} for subtasks to complete..."); System.Threading.Thread.Sleep(subtaskTimeout); Console.WriteLine(); Console.WriteLine("---- Subtask information ----"); //kolekce dílčích úlohů a tisk informací o každém IPagedEnumerable <SubtaskInformation> subtasks = mainTask.ListSubtasks(); await subtasks.ForEachAsync(async (subtask) => { Console.WriteLine("subtask: " + subtask.Id); Console.WriteLine("\texit code: " + subtask.ExitCode); if (subtask.State == SubtaskState.Completed) { //získání souborů z uzlů ComputeNode node = await batchClient.PoolOperations.GetComputeNodeAsync(subtask.ComputeNodeInformation.PoolId, subtask.ComputeNodeInformation.ComputeNodeId); string outPath = subtask.ComputeNodeInformation.TaskRootDirectory + "\\" + Constants.StandardOutFileName; string errPath = subtask.ComputeNodeInformation.TaskRootDirectory + "\\" + Constants.StandardErrorFileName; NodeFile stdOutFile = await node.GetNodeFileAsync(outPath.Trim('\\')); NodeFile stdErrFile = await node.GetNodeFileAsync(errPath.Trim('\\')); stdOut = await stdOutFile.ReadAsStringAsync(); stdErr = await stdErrFile.ReadAsStringAsync(); Console.WriteLine($"\tnode: " + node.Id); Console.WriteLine("\tstdout.txt: " + stdOut); Console.WriteLine("\tstderr.txt: " + stdErr); } else { Console.WriteLine($"\tSubtask {subtask.Id} is in state {subtask.State}"); } }); // vymazání zdrojů které jsme vytvořili, abychom to nemuseli dělat manuálně(fondy,úlohy) Console.WriteLine(); Console.Write("Delete job? [yes] no: "); string response = Console.ReadLine().ToLower(); if (response != "n" && response != "no") { await batchClient.JobOperations.DeleteJobAsync(jobId); } Console.Write("Delete pool? [yes] no: "); response = Console.ReadLine().ToLower(); if (response != "n" && response != "no") { await batchClient.PoolOperations.DeletePoolAsync(poolId); } } }
public static void HelloWorld( BatchClient batchCli, ITestOutputHelper testOutputHelper, CloudPool sharedPool, out string jobId, out string taskId, bool deleteJob = true, bool isLinux = false) { jobId = "HelloWorldJob-" + GetMyName() + "-" + GetTimeStamp(); try { // here we show how to use an unbound Job + Commit() to run a simple "Hello World" task // get an empty unbound Job CloudJob quickJob = batchCli.JobOperations.CreateJob(); quickJob.Id = jobId; quickJob.PoolInformation = new PoolInformation() { PoolId = sharedPool.Id }; // Commit Job quickJob.Commit(); // get an empty unbound Task taskId = "dwsHelloWorldTask"; const string winPaasHWTaskCmdLine = "cmd /c echo Hello World"; const string linuxIaasHWTaskCmdLine = "echo Hello World"; string winnerTaskCmdLine = isLinux ? linuxIaasHWTaskCmdLine : winPaasHWTaskCmdLine; CloudTask hwTask = new CloudTask(id: taskId, commandline: winnerTaskCmdLine); // Open the new Job as bound. CloudJob boundJob = batchCli.JobOperations.GetJob(jobId); // add Task to Job boundJob.AddTask(hwTask); // wait for the task to complete Utilities utilities = batchCli.Utilities; TaskStateMonitor taskStateMonitor = utilities.CreateTaskStateMonitor(); taskStateMonitor.WaitAll( boundJob.ListTasks(), TaskState.Completed, TimeSpan.FromMinutes(3)); CloudTask myCompletedTask = new List <CloudTask>(boundJob.ListTasks(null))[0]; string stdOut = myCompletedTask.GetNodeFile(Constants.StandardOutFileName).ReadAsString(); string stdErr = myCompletedTask.GetNodeFile(Constants.StandardErrorFileName).ReadAsString(); // confirm that stdout includes correct value Assert.Contains("Hello World", stdOut); testOutputHelper.WriteLine("StdOut: "); testOutputHelper.WriteLine(stdOut); testOutputHelper.WriteLine("StdErr: "); testOutputHelper.WriteLine(stdErr); } finally { // delete the job to free the Pool compute nodes. if (deleteJob) { TestUtilities.DeleteJobIfExistsAsync(batchCli, jobId).Wait(); } } }
public void CanAddTaskWithFilesToStage() { StagingStorageAccount storageCreds = TestUtilities.GetStorageCredentialsFromEnvironment(); using (BatchClient batchCli = TestUtilities.OpenBatchClient(TestUtilities.GetCredentialsFromEnvironment())) { string jobId = "TestTaskWithFilesToStage-" + TestUtilities.GetMyName(); try { CloudJob job = batchCli.JobOperations.CreateJob(jobId, new PoolInformation() { PoolId = this.poolFixture.PoolId }); job.Commit(); CloudJob boundJob = batchCli.JobOperations.GetJob(jobId); CloudTask myTask = new CloudTask(id: "CountWordsTask", commandline: @"cmd /c dir /s .. & dir & wc localwords.txt"); myTask.FilesToStage = new List <IFileStagingProvider> { new FileToStage(Resources.LocalWordsDotText, storageCreds) }; // add the task to the job var artifacts = boundJob.AddTask(myTask); var specificArtifact = artifacts[typeof(FileToStage)]; SequentialFileStagingArtifact sfsa = specificArtifact as SequentialFileStagingArtifact; Assert.NotNull(sfsa); // Open the new Job as bound. CloudPool boundPool = batchCli.PoolOperations.GetPool(boundJob.ExecutionInformation.PoolId); // wait for the task to complete TaskStateMonitor taskStateMonitor = batchCli.Utilities.CreateTaskStateMonitor(); taskStateMonitor.WaitAll( boundJob.ListTasks(), Microsoft.Azure.Batch.Common.TaskState.Completed, TimeSpan.FromMinutes(10), controlParams: null, additionalBehaviors: new[] { // spam/logging interceptor new Microsoft.Azure.Batch.Protocol.RequestInterceptor((x) => { this.testOutputHelper.WriteLine("Issuing request type: " + x.GetType().ToString()); try { // print out the compute node states... we are actually waiting on the compute nodes List <ComputeNode> allComputeNodes = boundPool.ListComputeNodes().ToList(); this.testOutputHelper.WriteLine(" #compute nodes: " + allComputeNodes.Count); allComputeNodes.ForEach( (icn) => { this.testOutputHelper.WriteLine(" computeNode.id: " + icn.Id + ", state: " + icn.State); }); } catch (Exception ex) { // there is a race between the pool-life-job and the end of the job.. and the ListComputeNodes above Assert.True(false, "SampleWithFilesAndPool probably can ignore this if its pool not found: " + ex.ToString()); } }) }); List <CloudTask> tasks = boundJob.ListTasks().ToList(); CloudTask myCompletedTask = tasks.Single(); foreach (CloudTask curTask in tasks) { this.testOutputHelper.WriteLine("Task Id: " + curTask.Id + ", state: " + curTask.State); } boundPool.Refresh(); this.testOutputHelper.WriteLine("Pool Id: " + boundPool.Id + ", state: " + boundPool.State); string stdOut = myCompletedTask.GetNodeFile(Constants.StandardOutFileName).ReadAsString(); string stdErr = myCompletedTask.GetNodeFile(Constants.StandardErrorFileName).ReadAsString(); this.testOutputHelper.WriteLine("StdOut: "); this.testOutputHelper.WriteLine(stdOut); this.testOutputHelper.WriteLine("StdErr: "); this.testOutputHelper.WriteLine(stdErr); this.testOutputHelper.WriteLine("Task Files:"); foreach (NodeFile curFile in myCompletedTask.ListNodeFiles(recursive: true)) { this.testOutputHelper.WriteLine(" File path: " + curFile.Path); } var files = myCompletedTask.ListNodeFiles(recursive: true).ToList(); // confirm the files are there Assert.True(files.Any(file => file.Path.Contains("localWords.txt")), "missing file: localWords.txt"); } finally { TestUtilities.DeleteJobIfExistsAsync(batchCli, jobId).Wait(); } } }
public void LongRunning_Bug1965363Wat7OSVersionFeaturesQuickJobWithAutoPool() { void test() { using BatchClient batchCli = TestUtilities.OpenBatchClient(TestUtilities.GetCredentialsFromEnvironment()); string jobId = "Bug1965363Job-" + TestUtilities.GetMyName(); try { PoolInformation poolInfo = new PoolInformation() { AutoPoolSpecification = new AutoPoolSpecification() { PoolLifetimeOption = PoolLifetimeOption.Job, PoolSpecification = new PoolSpecification() { CloudServiceConfiguration = new CloudServiceConfiguration(PoolFixture.OSFamily), VirtualMachineSize = PoolFixture.VMSize, TargetDedicatedComputeNodes = 1 } } }; CloudJob unboundJob = batchCli.JobOperations.CreateJob(jobId, poolInfo); testOutputHelper.WriteLine("Commiting quickjob"); unboundJob.Commit(); CloudTask task = new CloudTask("Bug1965363Wat7OSVersionFeaturesQuickJobWithAutoPoolTask", "cmd /c echo Bug1965363"); CloudJob boundJob = batchCli.JobOperations.GetJob(jobId); boundJob.AddTask(task); testOutputHelper.WriteLine("Getting pool name: {0}", boundJob.ExecutionInformation.PoolId); CloudPool boundPool = batchCli.PoolOperations.GetPool(boundJob.ExecutionInformation.PoolId); TaskStateMonitor tsm = batchCli.Utilities.CreateTaskStateMonitor(); ODATAMonitorControl odControl = new ODATAMonitorControl(); // we know that the autopool compute nodes will take a long time to become scheduleable so we slow down polling/spam odControl.DelayBetweenDataFetch = TimeSpan.FromSeconds(5); testOutputHelper.WriteLine("Invoking TaskStateMonitor"); tsm.WaitAll( boundJob.ListTasks(), TaskState.Completed, TimeSpan.FromMinutes(15), odControl, new[] { // spam/logging interceptor new Protocol.RequestInterceptor((x) => { testOutputHelper.WriteLine("Issuing request type: " + x.GetType().ToString()); // print out the compute node states... we are actually waiting on the compute nodes List <ComputeNode> allComputeNodes = boundPool.ListComputeNodes().ToList(); testOutputHelper.WriteLine(" #comnpute nodes: " + allComputeNodes.Count); allComputeNodes.ForEach((icn) => { testOutputHelper.WriteLine(" computeNode.id: " + icn.Id + ", state: " + icn.State); }); testOutputHelper.WriteLine(""); }) }); // confirm the task ran by inspecting the stdOut string stdOut = boundJob.ListTasks().ToList()[0].GetNodeFile(Constants.StandardOutFileName).ReadAsString(); Assert.Contains("Bug1965363", stdOut); } finally { TestUtilities.DeleteJobIfExistsAsync(batchCli, jobId).Wait(); } } SynchronizationContextHelper.RunTest(test, LongTestTimeout); }
/// <summary> /// Monitors the specified tasks for completion and returns a value indicating whether all tasks completed successfully /// within the timeout period. /// </summary> /// <param name="batchClient">A <see cref="BatchClient"/>.</param> /// <param name="jobId">The id of the job containing the tasks that should be monitored.</param> /// <param name="timeout">The period of time to wait for the tasks to reach the completed state.</param> /// <returns><c>true</c> if all tasks in the specified job completed with an exit code of 0 within the specified timeout period, otherwise <c>false</c>.</returns> private static async Task <bool> MonitorTasks(BatchClient batchClient, string jobId, TimeSpan timeout) { bool allTasksSuccessful = true; const string successMessage = "All tasks reached state Completed."; const string failureMessage = "One or more tasks failed to reach the Completed state within the timeout period."; // Obtain the collection of tasks currently managed by the job. Note that we use a detail level to // specify that only the "id" property of each task should be populated. Using a detail level for // all list operations helps to lower response time from the Batch service. ODATADetailLevel detail = new ODATADetailLevel(selectClause: "id"); List <CloudTask> tasks = await batchClient.JobOperations.ListTasks(JobId, detail).ToListAsync(); Console.WriteLine("Awaiting task completion, timeout in {0}...", timeout.ToString()); // We use a TaskStateMonitor to monitor the state of our tasks. In this case, we will wait for all tasks to // reach the Completed state. TaskStateMonitor taskStateMonitor = batchClient.Utilities.CreateTaskStateMonitor(); try { await taskStateMonitor.WhenAll(tasks, TaskState.Completed, timeout); } catch (TimeoutException) { await batchClient.JobOperations.TerminateJobAsync(jobId, failureMessage); Console.WriteLine(failureMessage); return(false); } await batchClient.JobOperations.TerminateJobAsync(jobId, successMessage); // All tasks have reached the "Completed" state, however, this does not guarantee all tasks completed successfully. // Here we further check each task's ExecutionInfo property to ensure that it did not encounter a scheduling error // or return a non-zero exit code. // Update the detail level to populate only the task id and executionInfo properties. // We refresh the tasks below, and need only this information for each task. detail.SelectClause = "id, executionInfo"; foreach (CloudTask task in tasks) { // Populate the task's properties with the latest info from the Batch service await task.RefreshAsync(detail); if (task.ExecutionInformation.SchedulingError != null) { // A scheduling error indicates a problem starting the task on the node. It is important to note that // the task's state can be "Completed," yet still have encountered a scheduling error. allTasksSuccessful = false; Console.WriteLine("WARNING: Task [{0}] encountered a scheduling error: {1}", task.Id, task.ExecutionInformation.SchedulingError.Message); } else if (task.ExecutionInformation.ExitCode != 0) { // A non-zero exit code may indicate that the application executed by the task encountered an error // during execution. As not every application returns non-zero on failure by default (e.g. robocopy), // your implementation of error checking may differ from this example. allTasksSuccessful = false; Console.WriteLine("WARNING: Task [{0}] returned a non-zero exit code - this may indicate task execution or completion failure.", task.Id); } } if (allTasksSuccessful) { Console.WriteLine("Success! All tasks completed successfully within the specified timeout period."); } return(allTasksSuccessful); }
public static async Task MainAsync() { const string poolId = "MultiInstanceSamplePool"; const string jobId = "MultiInstanceSampleJob"; const string taskId = "MultiInstanceSampleTask"; const int numberOfNodes = 3; // The application package and version to deploy to the compute nodes. // It should contain your MPIHelloWorld sample MS-MPI program: // https://blogs.technet.microsoft.com/windowshpc/2015/02/02/how-to-compile-and-run-a-simple-ms-mpi-program/ // And the MSMpiSetup.exe installer: // https://www.microsoft.com/download/details.aspx?id=52981 // Then upload it as an application package: // https://azure.microsoft.com/documentation/articles/batch-application-packages/ const string appPackageId = "MPIHelloWorld"; const string appPackageVersion = "1.0"; TimeSpan timeout = TimeSpan.FromMinutes(30); // Configure your AccountSettings in the Microsoft.Azure.Batch.Samples.Common project within this solution BatchSharedKeyCredentials cred = new BatchSharedKeyCredentials(AccountSettings.Default.BatchServiceUrl, AccountSettings.Default.BatchAccountName, AccountSettings.Default.BatchAccountKey); using (BatchClient batchClient = BatchClient.Open(cred)) { // Create the pool of compute nodes and the job to which we add the multi-instance task. await CreatePoolAsync(batchClient, poolId, numberOfNodes, appPackageId, appPackageVersion); await CreateJobAsync(batchClient, jobId, poolId); // Create the multi-instance task. The MultiInstanceSettings property (configured // below) tells Batch to create one primary and several subtasks, the total number // of which matches the number of instances you specify in the MultiInstanceSettings. // This main task's command line is the "application command," and is executed *only* // by the primary, and only after the primary and all subtasks have executed the // "coordination command" (the MultiInstanceSettings.CoordinationCommandLine). CloudTask multiInstanceTask = new CloudTask(id: taskId, commandline: $"cmd /c mpiexec.exe -c 1 -wdir %AZ_BATCH_TASK_SHARED_DIR% %AZ_BATCH_APP_PACKAGE_{appPackageId.ToUpper()}#{appPackageVersion}%\\MPIHelloWorld.exe"); // Configure the task's MultiInstanceSettings. Specify the number of nodes // to allocate to the multi-instance task, and the "coordination command". // The CoordinationCommandLine is run by the primary and subtasks, and is // used in this sample to start SMPD on the compute nodes. multiInstanceTask.MultiInstanceSettings = new MultiInstanceSettings(@"cmd /c start cmd /c smpd.exe -d", numberOfNodes); // Submit the task to the job. Batch will take care of creating one primary and // enough subtasks to match the total number of nodes allocated to the task, // and schedule them for execution on the nodes. Console.WriteLine($"Adding task [{taskId}] to job [{jobId}]..."); await batchClient.JobOperations.AddTaskAsync(jobId, multiInstanceTask); // Get the "bound" version of the multi-instance task. CloudTask mainTask = await batchClient.JobOperations.GetTaskAsync(jobId, taskId); // We use a TaskStateMonitor to monitor the state of our tasks. In this case, // we will wait for the task to reach the Completed state. Console.WriteLine($"Awaiting task completion, timeout in {timeout}..."); TaskStateMonitor taskStateMonitor = batchClient.Utilities.CreateTaskStateMonitor(); await taskStateMonitor.WhenAll(new List <CloudTask> { mainTask }, TaskState.Completed, timeout); // Refresh the task to obtain up-to-date property values from Batch, such as // its current state and information about the node on which it executed. await mainTask.RefreshAsync(); string stdOut = mainTask.GetNodeFile(Constants.StandardOutFileName).ReadAsString(); string stdErr = mainTask.GetNodeFile(Constants.StandardErrorFileName).ReadAsString(); Console.WriteLine(); Console.WriteLine($"Main task [{mainTask.Id}] is in state [{mainTask.State}] and ran on compute node [{mainTask.ComputeNodeInformation.ComputeNodeId}]:"); Console.WriteLine("---- stdout.txt ----"); Console.WriteLine(stdOut); Console.WriteLine("---- stderr.txt ----"); Console.WriteLine(stdErr); // Need to delay a bit to allow the Batch service to mark the subtasks as Complete TimeSpan subtaskTimeout = TimeSpan.FromSeconds(10); Console.WriteLine($"Main task completed, waiting {subtaskTimeout} for subtasks to complete..."); System.Threading.Thread.Sleep(subtaskTimeout); Console.WriteLine(); Console.WriteLine("---- Subtask information ----"); // Obtain the collection of subtasks for the multi-instance task, and print // some information about each. IPagedEnumerable <SubtaskInformation> subtasks = mainTask.ListSubtasks(); await subtasks.ForEachAsync(async (subtask) => { Console.WriteLine("subtask: " + subtask.Id); Console.WriteLine("\texit code: " + subtask.ExitCode); if (subtask.State == SubtaskState.Completed) { // Obtain the file from the node on which the subtask executed. For normal CloudTasks, // we could simply call CloudTask.GetNodeFile(Constants.StandardOutFileName), but the // subtasks are not "normal" tasks in Batch, and thus must be handled differently. ComputeNode node = await batchClient.PoolOperations.GetComputeNodeAsync(subtask.ComputeNodeInformation.PoolId, subtask.ComputeNodeInformation.ComputeNodeId); string outPath = subtask.ComputeNodeInformation.TaskRootDirectory + "\\" + Constants.StandardOutFileName; string errPath = subtask.ComputeNodeInformation.TaskRootDirectory + "\\" + Constants.StandardErrorFileName; NodeFile stdOutFile = await node.GetNodeFileAsync(outPath.Trim('\\')); NodeFile stdErrFile = await node.GetNodeFileAsync(errPath.Trim('\\')); stdOut = await stdOutFile.ReadAsStringAsync(); stdErr = await stdErrFile.ReadAsStringAsync(); Console.WriteLine($"\tnode: " + node.Id); Console.WriteLine("\tstdout.txt: " + stdOut); Console.WriteLine("\tstderr.txt: " + stdErr); } else { Console.WriteLine($"\tSubtask {subtask.Id} is in state {subtask.State}"); } }); // Clean up the resources we've created in the Batch account Console.WriteLine(); Console.Write("Delete job? [yes] no: "); string response = Console.ReadLine().ToLower(); if (response != "n" && response != "no") { await batchClient.JobOperations.DeleteJobAsync(jobId); } Console.Write("Delete pool? [yes] no: "); response = Console.ReadLine().ToLower(); if (response != "n" && response != "no") { await batchClient.PoolOperations.DeletePoolAsync(poolId); } } }
/// <summary> /// calls the two new get-status REST APIs and asserts their values /// /// 1: add a single quick task (quick because we don't need it to run very long) /// 2: this forces a victim compute node to run the JobPrep /// 3: poll for this compute node, ignore others (sharedPool.size probably > 1) /// 4: check status of JobPrep /// 4a: assert as many values as makes sense... this is not a retry test /// 5: JobPrep succeeds, task runs /// 6: poll for JobRelease.. it is long running /// 7: assert as many values as makes sense. /// </summary> /// <param name="batchCli"></param> private void TestGetPrepReleaseStatusCalls(BatchClient batchCli, CloudJobSchedule boundJobSchedule, string sharedPool, IEnumerable <ResourceFile> correctResFiles) { // need this often enough lets just pull it out string jobId = boundJobSchedule.ExecutionInformation.RecentJob.Id; PoolOperations poolOps = batchCli.PoolOperations; JobScheduleOperations jobScheduleOperations = batchCli.JobScheduleOperations; { DateTime beforeJobPrepRuns = DateTime.UtcNow; // used to test start time // need a task to force JobPrep CloudTask sillyTask = new CloudTask("forceJobPrep", "cmd /c hostname"); // add the task batchCli.JobOperations.AddTask(jobId, sillyTask); bool keepLooking = true; while (keepLooking) { this.testOutputHelper.WriteLine("Waiting for task to be scheduled."); foreach (CloudTask curTask in batchCli.JobOperations.GetJob(jobId).ListTasks()) { if (curTask.State != TaskState.Active) { keepLooking = false; break; } } Thread.Sleep(1000); } List <JobPreparationAndReleaseTaskExecutionInformation> jobPrepStatusList = new List <JobPreparationAndReleaseTaskExecutionInformation>(); while (jobPrepStatusList.Count == 0) { jobPrepStatusList = batchCli.JobOperations.ListJobPreparationAndReleaseTaskStatus(jobId).ToList(); } JobPreparationAndReleaseTaskExecutionInformation jptei = jobPrepStatusList.First(); ComputeNode victimComputeNodeRunningPrepAndRelease = poolOps.GetComputeNode(sharedPool, jptei.ComputeNodeId); // job prep tests { Assert.NotNull(jptei); Assert.Equal(0, jptei.JobPreparationTaskExecutionInformation.RetryCount); Assert.True(beforeJobPrepRuns < jptei.JobPreparationTaskExecutionInformation.StartTime + TimeSpan.FromSeconds(10)); // test that the start time is rational -- 10s of wiggle room Assert.Null(jptei.JobPreparationTaskExecutionInformation.FailureInformation); this.testOutputHelper.WriteLine(""); this.testOutputHelper.WriteLine("listing files for compute node: " + victimComputeNodeRunningPrepAndRelease.Id); // fiter the list so reduce noise List <NodeFile> filteredListJobPrep = new List <NodeFile>(); foreach (NodeFile curTF in victimComputeNodeRunningPrepAndRelease.ListNodeFiles(recursive: true)) { // filter on the jsId since we only run one job per job in this test. if (curTF.Path.IndexOf(boundJobSchedule.Id, StringComparison.InvariantCultureIgnoreCase) >= 0) { this.testOutputHelper.WriteLine(" name:" + curTF.Path + ", size: " + ((curTF.IsDirectory.HasValue && curTF.IsDirectory.Value) ? "<dir>" : curTF.Properties.ContentLength.ToString())); filteredListJobPrep.Add(curTF); } } // confirm resource files made it foreach (ResourceFile curCorrectRF in correctResFiles) { bool found = false; foreach (NodeFile curTF in filteredListJobPrep) { // look for the resfile filepath in the taskfile name found |= curTF.Path.IndexOf(curCorrectRF.FilePath, StringComparison.InvariantCultureIgnoreCase) >= 0; } Assert.True(found, "Looking for resourcefile: " + curCorrectRF.FilePath); } // poll for completion while (JobPreparationTaskState.Completed != jptei.JobPreparationTaskExecutionInformation.State) { this.testOutputHelper.WriteLine("waiting for jopPrep to complete"); Thread.Sleep(2000); // refresh the state info ODATADetailLevel detailLevel = new ODATADetailLevel() { FilterClause = string.Format("nodeId eq '{0}'", victimComputeNodeRunningPrepAndRelease.Id) }; jobPrepStatusList = batchCli.JobOperations.ListJobPreparationAndReleaseTaskStatus(jobId, detailLevel: detailLevel).ToList(); jptei = jobPrepStatusList.First(); } // need success Assert.Equal(0, jptei.JobPreparationTaskExecutionInformation.ExitCode); // check stdout to confirm prep ran //Why do I have to use the hardcoded string job-1 here...? string stdOutFileSpec = Path.Combine("workitems", boundJobSchedule.Id, "job-1", boundJobSchedule.JobSpecification.JobPreparationTask.Id, Constants.StandardOutFileName); string stdOut = victimComputeNodeRunningPrepAndRelease.GetNodeFile(stdOutFileSpec).ReadAsString(); string stdErrFileSpec = Path.Combine("workitems", boundJobSchedule.Id, "job-1", boundJobSchedule.JobSpecification.JobPreparationTask.Id, Constants.StandardErrorFileName); string stdErr = string.Empty; try { stdErr = victimComputeNodeRunningPrepAndRelease.GetNodeFile(stdErrFileSpec).ReadAsString(); } catch (Exception) { //Swallow any exceptions here since stderr may not exist } this.testOutputHelper.WriteLine(stdOut); this.testOutputHelper.WriteLine(stdErr); Assert.True(!string.IsNullOrWhiteSpace(stdOut)); Assert.Contains("jobpreparation", stdOut.ToLower()); } // jobPrep tests completed. let JobPrep complete and task run and wait for JobRelease TaskStateMonitor tsm = batchCli.Utilities.CreateTaskStateMonitor(); // spam/logging interceptor Protocol.RequestInterceptor consoleSpammer = new Protocol.RequestInterceptor((x) => { this.testOutputHelper.WriteLine("TestGetPrepReleaseStatusCalls: waiting for JobPrep and task to complete"); ODATADetailLevel detailLevel = new ODATADetailLevel() { FilterClause = string.Format("nodeId eq '{0}'", victimComputeNodeRunningPrepAndRelease.Id) }; jobPrepStatusList = batchCli.JobOperations.ListJobPreparationAndReleaseTaskStatus(jobId, detailLevel: detailLevel).ToList(); JobPreparationAndReleaseTaskExecutionInformation jpteiInterceptor = jobPrepStatusList.First(); this.testOutputHelper.WriteLine(" JobPrep.State: " + jpteiInterceptor.JobPreparationTaskExecutionInformation.State); this.testOutputHelper.WriteLine(""); }); // waiting for the task to complete means so JobRelease is run. tsm.WaitAll( batchCli.JobOperations.GetJob(jobId).ListTasks(additionalBehaviors: new[] { consoleSpammer }), TaskState.Completed, TimeSpan.FromSeconds(120), additionalBehaviors: new[] { consoleSpammer }); // trigger JobRelease batchCli.JobOperations.TerminateJob(jobId, terminateReason: "die! I want JobRelease to run!"); // now that the task has competed, we are racing with the JobRelease... but it is sleeping so we can can catch it while (true) { ODATADetailLevel detailLevel = new ODATADetailLevel() { FilterClause = string.Format("nodeId eq '{0}'", victimComputeNodeRunningPrepAndRelease.Id) }; jobPrepStatusList = batchCli.JobOperations.ListJobPreparationAndReleaseTaskStatus(jobId, detailLevel: detailLevel).ToList(); JobPreparationAndReleaseTaskExecutionInformation jrtei = jobPrepStatusList.FirstOrDefault(); if ((jrtei == null) || (null == jrtei.JobReleaseTaskExecutionInformation)) { Thread.Sleep(2000); } else { Assert.NotNull(jrtei); if (jrtei.JobReleaseTaskExecutionInformation.State != JobReleaseTaskState.Completed) { this.testOutputHelper.WriteLine("JobReleaseTask state is: " + jrtei.JobReleaseTaskExecutionInformation.State); Thread.Sleep(5000); } else { this.testOutputHelper.WriteLine("JobRelease commpleted!"); // we are done break; } } } } }
public void TestSampleWithFilesAndPool() { Action test = () => { StagingStorageAccount storageCreds = TestUtilities.GetStorageCredentialsFromEnvironment(); using (BatchClient batchCli = TestUtilities.OpenBatchClientAsync(TestUtilities.GetCredentialsFromEnvironment()).Result) { string jobId = "SampleWithFilesJob-" + TestUtilities.GetMyName(); try { CloudJob quickJob = batchCli.JobOperations.CreateJob(); quickJob.Id = jobId; quickJob.PoolInformation = new PoolInformation() { PoolId = this.poolFixture.PoolId }; quickJob.Commit(); CloudJob boundJob = batchCli.JobOperations.GetJob(jobId); CloudTask myTask = new CloudTask(id: "CountWordsTask", commandline: @"cmd /c dir /s .. & dir & wc localwords.txt"); // first we have local files that we want pushed to the compute node before the commandline is invoked FileToStage wordsDotText = new FileToStage(Resources.LocalWordsDotText, storageCreds); // use "default" mapping to base name of local file myTask.FilesToStage = new List <IFileStagingProvider>(); myTask.FilesToStage.Add(wordsDotText); // add the task to the job var artifacts = boundJob.AddTask(myTask); var specificArtifact = artifacts[typeof(FileToStage)]; SequentialFileStagingArtifact sfsa = specificArtifact as SequentialFileStagingArtifact; Assert.NotNull(sfsa); // add a million more tasks... // test to ensure the task is read only TestUtilities.AssertThrows <InvalidOperationException>(() => myTask.FilesToStage = new List <IFileStagingProvider>()); // Open the new Job as bound. CloudPool boundPool = batchCli.PoolOperations.GetPool(boundJob.ExecutionInformation.PoolId); // wait for the task to complete Utilities utilities = batchCli.Utilities; TaskStateMonitor taskStateMonitor = utilities.CreateTaskStateMonitor(); taskStateMonitor.WaitAll( boundJob.ListTasks(), Microsoft.Azure.Batch.Common.TaskState.Completed, TimeSpan.FromMinutes(10), controlParams: null, additionalBehaviors: new[] { // spam/logging interceptor new Microsoft.Azure.Batch.Protocol.RequestInterceptor((x) => { this.testOutputHelper.WriteLine("Issuing request type: " + x.GetType().ToString()); try { // print out the compute node states... we are actually waiting on the compute nodes List <ComputeNode> allComputeNodes = boundPool.ListComputeNodes().ToList(); this.testOutputHelper.WriteLine(" #compute nodes: " + allComputeNodes.Count); allComputeNodes.ForEach( (icn) => { this.testOutputHelper.WriteLine(" computeNode.id: " + icn.Id + ", state: " + icn.State); }); } catch (Exception ex) { // there is a race between the pool-life-job and the end of the job.. and the ListComputeNodes above Assert.True(false, "SampleWithFilesAndPool probably can ignore this if its pool not found: " + ex.ToString()); } }) }); List <CloudTask> tasks = boundJob.ListTasks(null).ToList(); CloudTask myCompletedTask = tasks[0]; foreach (CloudTask curTask in tasks) { this.testOutputHelper.WriteLine("Task Id: " + curTask.Id + ", state: " + curTask.State); } boundPool.Refresh(); this.testOutputHelper.WriteLine("Pool Id: " + boundPool.Id + ", state: " + boundPool.State); string stdOut = myCompletedTask.GetNodeFile(Constants.StandardOutFileName).ReadAsString(); string stdErr = myCompletedTask.GetNodeFile(Constants.StandardErrorFileName).ReadAsString(); this.testOutputHelper.WriteLine("StdOut: "); this.testOutputHelper.WriteLine(stdOut); this.testOutputHelper.WriteLine("StdErr: "); this.testOutputHelper.WriteLine(stdErr); this.testOutputHelper.WriteLine("Task Files:"); foreach (NodeFile curFile in myCompletedTask.ListNodeFiles(recursive: true)) { this.testOutputHelper.WriteLine(" Filename: " + curFile.Name); } // confirm the files are there Assert.True(FoundFile("localwords.txt", myCompletedTask.ListNodeFiles(recursive: true)), "mising file: localwords.txt"); // test validation of StagingStorageAccount TestUtilities.AssertThrows <ArgumentOutOfRangeException>(() => { new StagingStorageAccount(storageAccount: " ", storageAccountKey: "key", blobEndpoint: "blob"); }); TestUtilities.AssertThrows <ArgumentOutOfRangeException>(() => { new StagingStorageAccount(storageAccount: "account", storageAccountKey: " ", blobEndpoint: "blob"); }); TestUtilities.AssertThrows <ArgumentOutOfRangeException>(() => { new StagingStorageAccount(storageAccount: "account", storageAccountKey: "key", blobEndpoint: ""); }); if (null != sfsa) { // TODO: delete the container! } } finally { TestUtilities.DeleteJobIfExistsAsync(batchCli, jobId).Wait(); } } }; SynchronizationContextHelper.RunTest(test, TestTimeout); }