public void TestOMJobPrepReleaseRunOnNaiveComputeNode() { string jobId = "TestOMJobPrepReleaseRunOnNaiveComputeNode-" + TestUtilities.GetMyName(); Action test = () => { using (BatchClient client = TestUtilities.OpenBatchClient(TestUtilities.GetCredentialsFromEnvironment())) { try { // create job with prep/release { CloudJob unboundJob = client.JobOperations.CreateJob(jobId, null); unboundJob.PoolInformation = new PoolInformation(); unboundJob.PoolInformation.PoolId = this.poolFixture.PoolId; // add the jobPrep task to the job { JobPreparationTask prep = new JobPreparationTask("cmd /c echo JobPrep!!"); unboundJob.JobPreparationTask = prep; prep.WaitForSuccess = true; // be explicit even though this is the default. need JP/JP to not run } // add a jobRelease task to the job { JobReleaseTask relTask = new JobReleaseTask(JobReleaseTaskCommandLine); unboundJob.JobReleaseTask = relTask; } // add the job to the service unboundJob.Commit(); } // the victim nodes. pool should have size 1. List <ComputeNode> computeNodes = client.PoolOperations.ListComputeNodes(this.poolFixture.PoolId).ToList(); Assert.Single(computeNodes); // now we have a job with zero tasks... lets call get-status methods // call it List <JobPreparationAndReleaseTaskExecutionInformation> jrStatusList = client.JobOperations.ListJobPreparationAndReleaseTaskStatus(jobId).ToList(); JobPreparationAndReleaseTaskExecutionInformation prepAndReleaseStatus = jrStatusList.FirstOrDefault(); Assert.Null(prepAndReleaseStatus); } finally { // cleanup TestUtilities.DeleteJobIfExistsAsync(client, jobId).Wait(); } } }; SynchronizationContextHelper.RunTest(test, TestTimeout); }
public void TestRandomBoundPrepReleaseTaskExecutionInformationProperties() { using BatchClient client = ClientUnitTestCommon.CreateDummyClient(); for (int i = 0; i < TestRunCount; i++) { Protocol.Models.JobPreparationAndReleaseTaskExecutionInformation jobPrepReleaseExecutionInfo = this.customizedObjectFactory.GenerateNew <Protocol.Models.JobPreparationAndReleaseTaskExecutionInformation>(); JobPreparationAndReleaseTaskExecutionInformation boundJobPrepReleaseExecutionInfo = new JobPreparationAndReleaseTaskExecutionInformation(jobPrepReleaseExecutionInfo); ObjectComparer.CheckEqualityResult result = this.objectComparer.CheckEquality(boundJobPrepReleaseExecutionInfo, jobPrepReleaseExecutionInfo); Assert.True(result.Equal, result.Message); } }
/// <summary> /// calls the two new get-status REST APIs and asserts their values /// /// 1: add a single quick task (quick because we don't need it to run very long) /// 2: this forces a victim compute node to run the JobPrep /// 3: poll for this compute node, ignore others (sharedPool.size probably > 1) /// 4: check status of JobPrep /// 4a: assert as many values as makes sense... this is not a retry test /// 5: JobPrep succeeds, task runs /// 6: poll for JobRelease.. it is long running /// 7: assert as many values as makes sense. /// </summary> /// <param name="batchCli"></param> private void TestGetPrepReleaseStatusCalls(BatchClient batchCli, CloudJobSchedule boundJobSchedule, string sharedPool, IEnumerable <ResourceFile> correctResFiles) { // need this often enough lets just pull it out string jobId = boundJobSchedule.ExecutionInformation.RecentJob.Id; PoolOperations poolOps = batchCli.PoolOperations; JobScheduleOperations jobScheduleOperations = batchCli.JobScheduleOperations; { DateTime beforeJobPrepRuns = DateTime.UtcNow; // used to test start time // need a task to force JobPrep CloudTask sillyTask = new CloudTask("forceJobPrep", "cmd /c hostname"); // add the task batchCli.JobOperations.AddTask(jobId, sillyTask); bool keepLooking = true; while (keepLooking) { this.testOutputHelper.WriteLine("Waiting for task to be scheduled."); foreach (CloudTask curTask in batchCli.JobOperations.GetJob(jobId).ListTasks()) { if (curTask.State != TaskState.Active) { keepLooking = false; break; } } Thread.Sleep(1000); } List <JobPreparationAndReleaseTaskExecutionInformation> jobPrepStatusList = new List <JobPreparationAndReleaseTaskExecutionInformation>(); while (jobPrepStatusList.Count == 0) { jobPrepStatusList = batchCli.JobOperations.ListJobPreparationAndReleaseTaskStatus(jobId).ToList(); } JobPreparationAndReleaseTaskExecutionInformation jptei = jobPrepStatusList.First(); ComputeNode victimComputeNodeRunningPrepAndRelease = poolOps.GetComputeNode(sharedPool, jptei.ComputeNodeId); // job prep tests { Assert.NotNull(jptei); Assert.Equal(0, jptei.JobPreparationTaskExecutionInformation.RetryCount); Assert.True(beforeJobPrepRuns < jptei.JobPreparationTaskExecutionInformation.StartTime + TimeSpan.FromSeconds(10)); // test that the start time is rational -- 10s of wiggle room Assert.Null(jptei.JobPreparationTaskExecutionInformation.FailureInformation); this.testOutputHelper.WriteLine(""); this.testOutputHelper.WriteLine("listing files for compute node: " + victimComputeNodeRunningPrepAndRelease.Id); // fiter the list so reduce noise List <NodeFile> filteredListJobPrep = new List <NodeFile>(); foreach (NodeFile curTF in victimComputeNodeRunningPrepAndRelease.ListNodeFiles(recursive: true)) { // filter on the jsId since we only run one job per job in this test. if (curTF.Path.IndexOf(boundJobSchedule.Id, StringComparison.InvariantCultureIgnoreCase) >= 0) { this.testOutputHelper.WriteLine(" name:" + curTF.Path + ", size: " + ((curTF.IsDirectory.HasValue && curTF.IsDirectory.Value) ? "<dir>" : curTF.Properties.ContentLength.ToString())); filteredListJobPrep.Add(curTF); } } // confirm resource files made it foreach (ResourceFile curCorrectRF in correctResFiles) { bool found = false; foreach (NodeFile curTF in filteredListJobPrep) { // look for the resfile filepath in the taskfile name found |= curTF.Path.IndexOf(curCorrectRF.FilePath, StringComparison.InvariantCultureIgnoreCase) >= 0; } Assert.True(found, "Looking for resourcefile: " + curCorrectRF.FilePath); } // poll for completion while (JobPreparationTaskState.Completed != jptei.JobPreparationTaskExecutionInformation.State) { this.testOutputHelper.WriteLine("waiting for jopPrep to complete"); Thread.Sleep(2000); // refresh the state info ODATADetailLevel detailLevel = new ODATADetailLevel() { FilterClause = string.Format("nodeId eq '{0}'", victimComputeNodeRunningPrepAndRelease.Id) }; jobPrepStatusList = batchCli.JobOperations.ListJobPreparationAndReleaseTaskStatus(jobId, detailLevel: detailLevel).ToList(); jptei = jobPrepStatusList.First(); } // need success Assert.Equal(0, jptei.JobPreparationTaskExecutionInformation.ExitCode); // check stdout to confirm prep ran //Why do I have to use the hardcoded string job-1 here...? string stdOutFileSpec = Path.Combine("workitems", boundJobSchedule.Id, "job-1", boundJobSchedule.JobSpecification.JobPreparationTask.Id, Constants.StandardOutFileName); string stdOut = victimComputeNodeRunningPrepAndRelease.GetNodeFile(stdOutFileSpec).ReadAsString(); string stdErrFileSpec = Path.Combine("workitems", boundJobSchedule.Id, "job-1", boundJobSchedule.JobSpecification.JobPreparationTask.Id, Constants.StandardErrorFileName); string stdErr = string.Empty; try { stdErr = victimComputeNodeRunningPrepAndRelease.GetNodeFile(stdErrFileSpec).ReadAsString(); } catch (Exception) { //Swallow any exceptions here since stderr may not exist } this.testOutputHelper.WriteLine(stdOut); this.testOutputHelper.WriteLine(stdErr); Assert.True(!string.IsNullOrWhiteSpace(stdOut)); Assert.Contains("jobpreparation", stdOut.ToLower()); } // jobPrep tests completed. let JobPrep complete and task run and wait for JobRelease TaskStateMonitor tsm = batchCli.Utilities.CreateTaskStateMonitor(); // spam/logging interceptor Protocol.RequestInterceptor consoleSpammer = new Protocol.RequestInterceptor((x) => { this.testOutputHelper.WriteLine("TestGetPrepReleaseStatusCalls: waiting for JobPrep and task to complete"); ODATADetailLevel detailLevel = new ODATADetailLevel() { FilterClause = string.Format("nodeId eq '{0}'", victimComputeNodeRunningPrepAndRelease.Id) }; jobPrepStatusList = batchCli.JobOperations.ListJobPreparationAndReleaseTaskStatus(jobId, detailLevel: detailLevel).ToList(); JobPreparationAndReleaseTaskExecutionInformation jpteiInterceptor = jobPrepStatusList.First(); this.testOutputHelper.WriteLine(" JobPrep.State: " + jpteiInterceptor.JobPreparationTaskExecutionInformation.State); this.testOutputHelper.WriteLine(""); }); // waiting for the task to complete means so JobRelease is run. tsm.WaitAll( batchCli.JobOperations.GetJob(jobId).ListTasks(additionalBehaviors: new[] { consoleSpammer }), TaskState.Completed, TimeSpan.FromSeconds(120), additionalBehaviors: new[] { consoleSpammer }); // trigger JobRelease batchCli.JobOperations.TerminateJob(jobId, terminateReason: "die! I want JobRelease to run!"); // now that the task has competed, we are racing with the JobRelease... but it is sleeping so we can can catch it while (true) { ODATADetailLevel detailLevel = new ODATADetailLevel() { FilterClause = string.Format("nodeId eq '{0}'", victimComputeNodeRunningPrepAndRelease.Id) }; jobPrepStatusList = batchCli.JobOperations.ListJobPreparationAndReleaseTaskStatus(jobId, detailLevel: detailLevel).ToList(); JobPreparationAndReleaseTaskExecutionInformation jrtei = jobPrepStatusList.FirstOrDefault(); if ((jrtei == null) || (null == jrtei.JobReleaseTaskExecutionInformation)) { Thread.Sleep(2000); } else { Assert.NotNull(jrtei); if (jrtei.JobReleaseTaskExecutionInformation.State != JobReleaseTaskState.Completed) { this.testOutputHelper.WriteLine("JobReleaseTask state is: " + jrtei.JobReleaseTaskExecutionInformation.State); Thread.Sleep(5000); } else { this.testOutputHelper.WriteLine("JobRelease commpleted!"); // we are done break; } } } } }
public void TestOMJobReleaseSchedulingError() { string jobId = "TestOMJobReleaseSchedulingError-" + CraftTimeString() + "-" + TestUtilities.GetMyName(); Action test = () => { using (BatchClient client = TestUtilities.OpenBatchClient(TestUtilities.GetCredentialsFromEnvironment())) { try { // create job schedule with prep that succeeds and release the triggers scheduling error { PoolInformation poolInfo = new PoolInformation() { PoolId = this.poolFixture.PoolId }; CloudJob unboundJob = client.JobOperations.CreateJob(jobId, poolInfo); // add the jobPrep task to the job { JobPreparationTask prep = new JobPreparationTask("cmd /c echo the quick job prep jumped over the..."); unboundJob.JobPreparationTask = prep; prep.WaitForSuccess = false; // we don't really care but why not set this } // add a jobRelease task to the job { JobReleaseTask relTask = new JobReleaseTask("cmd /c echo Job Release Task"); unboundJob.JobReleaseTask = relTask; ResourceFile[] badResFiles = { ResourceFile.FromUrl("https://not.a.domain.invalid/file", "bob.txt") }; relTask.ResourceFiles = badResFiles; relTask.Id = "jobRelease"; } // add the job to the service unboundJob.Commit(); } // add a trivial task to force the JP client.JobOperations.AddTask(jobId, new CloudTask("ForceJobPrep", "cmd /c echo TestOMJobReleaseSchedulingError")); // wait for the task to complete TaskStateMonitor tsm = client.Utilities.CreateTaskStateMonitor(); tsm.WaitAll( client.JobOperations.ListTasks(jobId), TaskState.Completed, TimeSpan.FromMinutes(10), additionalBehaviors: new[] { // spam/logging interceptor new Protocol.RequestInterceptor((x) => { this.testOutputHelper.WriteLine("Issuing request type: " + x.GetType().ToString()); // print out the compute node states... we are actually waiting on the compute nodes List <ComputeNode> allComputeNodes = client.PoolOperations.ListComputeNodes(this.poolFixture.PoolId).ToList(); this.testOutputHelper.WriteLine(" #compute nodes: " + allComputeNodes.Count); allComputeNodes.ForEach((icn) => { this.testOutputHelper.WriteLine(" computeNode.id: " + icn.Id + ", state: " + icn.State); }); this.testOutputHelper.WriteLine(""); }) } ); // ok terminate job to trigger job release client.JobOperations.TerminateJob(jobId, "BUG: Server will throw 500 if I don't provide reason"); // the victim compute node. pool should have size 1. List <ComputeNode> computeNodes = client.PoolOperations.ListComputeNodes(this.poolFixture.PoolId).ToList(); Assert.Single(computeNodes); // now we have a job that should be trying to run the JP // poll for the JP to have been run, and it must have a scheduling error bool releaseNotCompleted = true; // gotta poll to find out when the jp has been run while (releaseNotCompleted) { List <JobPreparationAndReleaseTaskExecutionInformation> jrStatusList = client.JobOperations.ListJobPreparationAndReleaseTaskStatus(jobId).ToList(); JobPreparationAndReleaseTaskExecutionInformation prepAndReleaseStatus = jrStatusList.FirstOrDefault(); if (prepAndReleaseStatus != null && null != prepAndReleaseStatus.JobReleaseTaskExecutionInformation) { if (JobReleaseTaskState.Completed == prepAndReleaseStatus.JobReleaseTaskExecutionInformation.State) { releaseNotCompleted = false; // we see a JP has been run // now assert the failure info Assert.NotNull(prepAndReleaseStatus); Assert.NotNull(prepAndReleaseStatus.JobReleaseTaskExecutionInformation.FailureInformation); Assert.Equal(TaskExecutionResult.Failure, prepAndReleaseStatus.JobReleaseTaskExecutionInformation.Result); // spew the failure info this.OutputFailureInfo(prepAndReleaseStatus.JobReleaseTaskExecutionInformation.FailureInformation); } } Thread.Sleep(2000); this.testOutputHelper.WriteLine("Job Release tasks still running (waiting for blob dl to timeout)."); } } finally { client.JobOperations.DeleteJob(jobId); } } }; SynchronizationContextHelper.RunTest(test, LongTestTimeout); }
public void TestOMJobPrepSchedulingError() { string jobId = "TestOMJobPrepSchedulingError-" + CraftTimeString() + "-" + TestUtilities.GetMyName(); Action test = () => { using (BatchClient client = TestUtilities.OpenBatchClient(TestUtilities.GetCredentialsFromEnvironment())) { try { // create job with prep that triggers prep scheduling error { CloudJob unboundJob = client.JobOperations.CreateJob(jobId, new PoolInformation() { PoolId = this.poolFixture.PoolId }); // add the jobPrep task to the job { JobPreparationTask prep = new JobPreparationTask("cmd /c JobPrep Task"); unboundJob.JobPreparationTask = prep; ResourceFile[] badResFiles = { ResourceFile.FromUrl("https://not.a.domain.invalid/file", "bob.txt") }; prep.ResourceFiles = badResFiles; prep.WaitForSuccess = true; // be explicit even though this is the default. need JP/ to not run } // add the job to the service unboundJob.Commit(); } CloudJob boundJob = client.JobOperations.GetJob(jobId); // add a trivial task to force the JP client.JobOperations.AddTask(boundJob.Id, new CloudTask("ForceJobPrep", "cmd /c echo TestOMJobPrepSchedulingError")); // the victim compute node. pool should have size 1. List <ComputeNode> nodes = client.PoolOperations.ListComputeNodes(this.poolFixture.PoolId).ToList(); Assert.Single(nodes); // now we have a job that should be trying to run the JP // poll for the JP to have been run, and it must have a scheduling error bool prepNotCompleted = true; // gotta poll to find out when the jp has been run while (prepNotCompleted) { List <JobPreparationAndReleaseTaskExecutionInformation> jpStatsList = client.JobOperations.ListJobPreparationAndReleaseTaskStatus(jobId).ToList(); JobPreparationAndReleaseTaskExecutionInformation jpStatus = jpStatsList.FirstOrDefault(); if (jpStatus == null) { Thread.Sleep(2000); } else { if (JobPreparationTaskState.Completed == jpStatus.JobPreparationTaskExecutionInformation.State) { prepNotCompleted = false; // we see a JP has completed Assert.NotNull(jpStatus.JobPreparationTaskExecutionInformation.FailureInformation); Assert.Equal(TaskExecutionResult.Failure, jpStatus.JobPreparationTaskExecutionInformation.Result); // spew the failure this.OutputFailureInfo(jpStatus.JobPreparationTaskExecutionInformation.FailureInformation); } this.testOutputHelper.WriteLine("Job Prep is running (waiting for blob dl to timeout)"); } } } finally { // cleanup client.JobOperations.DeleteJob(jobId); } } }; SynchronizationContextHelper.RunTest(test, TestTimeout); }