public Task <NextOperation> TaskCompletedAsync([FromBody] ComputeNodeTaskCompletionEventArgs taskInfo, CancellationToken token) { // TODO: move task key to url var taskKey = this.utilities.GetTaskKey(taskInfo.JobId, taskInfo.TaskInfo.TaskId, taskInfo.TaskInfo.TaskRequeueCount ?? 0); try { this.logger.LogInformation("TaskCompleted. NodeName {0}, TaskKey {1} ExitCode {2} TaskMessage {3}", taskInfo.NodeName, taskKey, taskInfo.TaskInfo.ExitCode, taskInfo.TaskInfo.Message); this.monitor.CompleteTask(taskKey, taskInfo); return(Task.FromResult(NextOperation.CancelTask)); } catch (Exception ex) { this.logger.LogError(ex, "Linux TaskCompleted. NodeName {0}, TaskId {1} ExitCode {2} TaskMessage {3}", taskInfo.NodeName, taskInfo.TaskInfo.TaskId, taskInfo.TaskInfo.ExitCode, taskInfo.TaskInfo.Message); this.monitor.FailTask(taskKey, ex); return(Task.FromResult(NextOperation.CancelJob)); } }
public override async Task <bool> DoWorkAsync(TaskItem taskItem, CancellationToken token) { var job = taskItem.GetMessage <InternalJob>(); var nodeName = this.Configuration.GetValue <string>(Constants.HpcHostNameEnv); using (this.logger.BeginScope("Do work for InternalJob {0} on node {1}", job.Id, nodeName)) { // TODO: make sure invisible. logger.LogInformation("Executing job {0}", job.Id); var tasks = Enumerable.Range(0, job.CommandLines.Length).Select(async taskId => { var cmd = job.CommandLines[taskId]; logger.LogInformation("Executing command {0}, job {1}", cmd, job.Id); var taskKey = this.utilities.GetTaskKey(job.Id, taskId, job.RequeueCount); var resultKey = this.utilities.GetJobResultKey(nodeName, taskKey); var taskResultBlob = await this.utilities.CreateOrReplaceTaskOutputBlobAsync(job.Id, resultKey, token); using (var monitor = this.Monitor.StartMonitorTask(taskKey, async(output, cancellationToken) => { try { await taskResultBlob.AppendTextAsync(output, Encoding.UTF8, null, null, null, cancellationToken); } catch (Exception ex) { this.logger.LogError(ex, "Error happened when append to blob {0}", taskResultBlob.Name); } })) { this.logger.LogInformation("Call startjobandtask for job {0}, task {1}", job.Id, taskKey); var jobPartitionName = this.utilities.GetJobPartitionKey($"{job.Type}", job.Id); var nodePartitionName = this.utilities.GetNodePartitionKey(nodeName); var taskResultArgs = new ComputeNodeTaskCompletionEventArgs(nodeName, job.Id, null) { State = TaskState.Dispatching }; var taskResultEntity = new JsonTableEntity(jobPartitionName, resultKey, taskResultArgs); var result = await jobsTable.ExecuteAsync(TableOperation.InsertOrReplace(taskResultEntity), null, null, token); this.logger.LogInformation("Saved task result {0} to jobs table, status code {1}", resultKey, result.HttpStatusCode); if (!result.IsSuccessfulStatusCode()) { return(false); } var nodeResultEntity = new JsonTableEntity(nodePartitionName, resultKey, taskResultArgs); result = await nodesTable.ExecuteAsync(TableOperation.InsertOrReplace(nodeResultEntity), null, null, token); this.logger.LogInformation("Saved task result {0} to nodes table, status code {1}", resultKey, result.HttpStatusCode); if (!result.IsSuccessfulStatusCode()) { return(false); } await this.communicator.StartJobAndTaskAsync( nodeName, new StartJobAndTaskArg(new int[0], job.Id, taskId), "", "", new ProcessStartInfo(cmd, "", "", $"{this.communicator.Options.AgentUriBase}/output/{taskKey}", "", new System.Collections.Hashtable(), new long[0], job.RequeueCount), token); taskResultArgs = new ComputeNodeTaskCompletionEventArgs(nodeName, job.Id, null) { State = TaskState.Running }; taskResultEntity = new JsonTableEntity(jobPartitionName, resultKey, taskResultArgs); result = await jobsTable.ExecuteAsync(TableOperation.InsertOrReplace(taskResultEntity), null, null, token); this.logger.LogInformation("Saved task result {0} to jobs table, status code {1}", resultKey, result.HttpStatusCode); if (!result.IsSuccessfulStatusCode()) { return(false); } nodeResultEntity = new JsonTableEntity(nodePartitionName, resultKey, taskResultArgs); result = await nodesTable.ExecuteAsync(TableOperation.InsertOrReplace(nodeResultEntity), null, null, token); this.logger.LogInformation("Saved task result {0} to nodes table, status code {1}", resultKey, result.HttpStatusCode); if (!result.IsSuccessfulStatusCode()) { return(false); } this.logger.LogInformation("Wait for response for job {0}, task {1}", job.Id, taskKey); taskResultArgs = await monitor.Execution; this.logger.LogInformation("Saving result for job {0}, task {1}", job.Id, taskKey); taskResultArgs.State = TaskState.Finished; taskResultEntity = new JsonTableEntity(jobPartitionName, resultKey, taskResultArgs); result = await jobsTable.ExecuteAsync(TableOperation.InsertOrReplace(taskResultEntity), null, null, token); this.logger.LogInformation("Saved task result {0} to jobs table, status code {1}", resultKey, result.HttpStatusCode); if (!result.IsSuccessfulStatusCode()) { return(false); } nodeResultEntity = new JsonTableEntity(nodePartitionName, resultKey, taskResultArgs); result = await nodesTable.ExecuteAsync(TableOperation.InsertOrReplace(nodeResultEntity), null, null, token); this.logger.LogInformation("Saved task result {0} to nodes table, status code {1}", resultKey, result.HttpStatusCode); if (!result.IsSuccessfulStatusCode()) { return(false); } return(true); } }); var results = await Task.WhenAll <bool>(tasks); return(results.All(r => r)); } }