public override async T.Task <bool> ProcessAsync(TaskEventMessage message, DateTimeOffset?insertionTime, CancellationToken token) { var jobsTable = this.Utilities.GetJobsTable(); var nodeName = this.ServerOptions.HostName; string taskKey = null; if (message.Id > 0) { taskKey = this.Utilities.GetTaskKey(message.JobId, message.Id, message.RequeueCount); } // TODO: cancel single task //if (message.Id != 0 //{ // var taskKey = this.Utilities.GetTaskKey(message.JobId, message.Id, message.RequeueCount); // var task = await this.jobsTable.RetrieveAsync<Task>(this.jobPartitionKey, taskKey, token); // var taskResultKey = this.Utilities.GetTaskResultKey(task.JobId, task.Id, task.RequeueCount); // var nodeTaskResultKey = this.Utilities.GetNodeTaskResultKey(nodeName, task.JobId, task.RequeueCount, task.Id); //} this.Logger.Information("Do work {0} for job {1} task {2} on node {3}", message.EventVerb, message.JobId, message.Id, nodeName); var jobPartitionKey = this.Utilities.GetJobPartitionKey(message.JobType, message.JobId); var job = await jobsTable.RetrieveAsync <Job>(jobPartitionKey, this.Utilities.JobEntryKey, token); if (job != null && job.RequeueCount != message.RequeueCount) { return(true); } try { if (taskKey != null) { await this.Utilities.UpdateTaskAsync(jobPartitionKey, taskKey, t => t.State = t.State == TaskState.Failed || t.State == TaskState.Finished?t.State : TaskState.Canceled, token, this.Logger); await this.Communicator.EndTaskAsync(nodeName, new EndTaskArg(null, message.JobId, message.Id), token); this.Monitor.CancelTask(message.JobId, taskKey); } else { // end the whole job await this.Utilities.UpdateJobAsync(message.JobType, message.JobId, j => j.State = j.State == JobState.Failed || j.State == JobState.Finished?j.State : JobState.Canceled, token, this.Logger); await this.Communicator.EndJobAsync(nodeName, new EndJobArg(null, message.JobId), token); this.Monitor.CancelJob(message.JobId); } } catch (Exception ex) { if (job != null) { await this.Utilities.AddJobsEventAsync(job, $"Failed to end Job {job.Id}, exception {ex}", EventType.Warning, token); } } return(true); }
public override async T.Task <bool> ProcessAsync(TaskEventMessage message, DateTimeOffset?insertionTime, CancellationToken token) { this.logger = this.Logger.ForContext("Job", message.JobId).ForContext("Task", message.Id); this.jobsTable = this.Utilities.GetJobsTable(); this.nodesTable = this.Utilities.GetNodesTable(); var nodeName = this.ServerOptions.HostName; JobType jobType = message.JobType; int jobId = message.JobId; int taskId = message.Id; int requeueCount = message.RequeueCount; this.nodePartitionKey = this.Utilities.GetNodePartitionKey(nodeName); this.jobPartitionKey = this.Utilities.GetJobPartitionKey(message.JobType, jobId); var taskKey = this.Utilities.GetTaskKey(jobId, taskId, requeueCount); var taskInfoKey = this.Utilities.GetTaskInfoKey(jobId, taskId, requeueCount); var taskResultKey = this.Utilities.GetTaskResultKey(jobId, taskId, requeueCount); logger.Information("Do work {0} for Task {1} on node {2}", message.EventVerb, taskKey, nodeName); if (insertionTime != null && insertionTime + TimeSpan.FromSeconds(10) < DateTimeOffset.UtcNow) { // Only when the insertion time is 10 seconds ago, we check the job status. var job = await this.jobsTable.RetrieveAsync <Job>(jobPartitionKey, this.Utilities.JobEntryKey, token); if (job.State != JobState.Running) { logger.Warning("Trying to start a task {0} when {1} Job {2} is in state {3}", taskKey, job.Type, job.Id, job.State); return(true); } } var task = await this.jobsTable.RetrieveAsync <Task>(this.jobPartitionKey, taskKey, token); int?exitCode = null; CloudAppendBlob taskResultBlob = null; var cmd = task.CommandLine; DateTimeOffset startTime = DateTimeOffset.UtcNow; var taskResult = new ComputeClusterTaskInformation() { ExitCode = -1, Message = "Running", CommandLine = cmd, JobId = jobId, TaskId = taskId, NodeName = nodeName, ResultKey = taskResultKey, StartTime = startTime, }; try { if (task.State != TaskState.Dispatching && task.State != TaskState.Running && task.State != TaskState.Queued) { Logger.Information("Job {0} task {1} state {2}, skip Executing command {3}", jobId, taskId, task.State, cmd); return(true); } var taskInfo = await this.jobsTable.RetrieveAsync <TaskStartInfo>(this.jobPartitionKey, taskInfoKey, token); Logger.Information("Executing command {0}", cmd); var rawResult = new StringBuilder(); using (var monitor = string.IsNullOrEmpty(cmd) ? null : this.Monitor.StartMonitorTask(jobId, taskKey, async(output, eof, cancellationToken) => { try { if (rawResult.Length < MaxRawResultLength) { rawResult.Append(output); } taskResultBlob = taskResultBlob ?? await this.Utilities.CreateOrReplaceJobOutputBlobAsync(jobType, taskResultKey, token); await taskResultBlob.AppendTextAsync(output, Encoding.UTF8, null, null, null, cancellationToken); if (eof) { taskResultBlob.Metadata[TaskOutputPage.EofMark] = eof.ToString(); await taskResultBlob.SetMetadataAsync(null, null, null, cancellationToken); } } catch (Exception ex) { logger.Error(ex, "Error happened when append to blob {0}", taskResultBlob.Name); } })) { if (!await this.PersistTaskResult(taskResultKey, taskResult, token)) { return(false); } logger.Information("Call startjobandtask for task {0}", taskKey); (taskInfo.StartInfo.environmentVariables ?? (taskInfo.StartInfo.environmentVariables = new Dictionary <string, string>())) .Add("blobEndpoint", this.Utilities.Account.BlobEndpoint.AbsoluteUri); taskInfo.StartInfo.stdout = taskInfo.StartInfo.stderr = $"{this.Communicator.Options.AgentUriBase}/output/{jobId}/{taskKey}"; await this.Utilities.UpdateTaskAsync(jobPartitionKey, taskKey, t => t.State = TaskState.Dispatching, token, this.logger); await this.Communicator.StartJobAndTaskAsync( nodeName, new StartJobAndTaskArg(new int[0], taskInfo.JobId, taskInfo.Id), taskInfo.UserName, taskInfo.Password, taskInfo.StartInfo, taskInfo.PrivateKey, taskInfo.PublicKey, token); logger.Information("Update task state to running"); await this.Utilities.UpdateTaskAsync(jobPartitionKey, taskKey, t => t.State = TaskState.Running, token, this.logger); logger.Information("Waiting for response"); if (monitor == null) { return(true); } ComputeNodeTaskCompletionEventArgs taskResultArgs; try { if (monitor.Result.Execution == await T.Task.WhenAny(monitor.Result.Execution, T.Task.Delay(TimeSpan.FromSeconds(task.MaximumRuntimeSeconds)))) { taskResultArgs = monitor.Result.Execution.Result; } else { logger.Information("Task has timed out"); return(true); } } catch (AggregateException ex) when(ex.InnerExceptions.All(e => e is OperationCanceledException)) { logger.Information("Task has been canceled"); return(true); } catch (T.TaskCanceledException) { logger.Information("Task has been canceled"); return(true); } taskResult = taskResultArgs.TaskInfo ?? taskResult; logger.Information("Updating task state with exit code {0}", taskResult?.ExitCode); await this.Utilities.UpdateTaskAsync(jobPartitionKey, taskKey, t => t.State = taskResult?.ExitCode == 0?TaskState.Finished : TaskState.Failed, token, this.logger); if (taskResult != null) { taskResult.StartTime = startTime; exitCode = taskResult.ExitCode; taskResult.Message = rawResult.Length > MaxRawResultLength?rawResult.ToString(0, MaxRawResultLength) : rawResult.ToString(); taskResult.CommandLine = cmd; taskResult.JobId = jobId; taskResult.TaskId = taskId; taskResult.NodeName = nodeName; taskResult.ResultKey = taskResultKey; taskResult.EndTime = DateTimeOffset.UtcNow; logger.Information("Saving result"); if (!await this.PersistTaskResult(taskResultKey, taskResult, token)) { return(false); } } } } catch (StorageException ex) when(ex.IsCancellation()) { return(false); } catch (OperationCanceledException) when(token.IsCancellationRequested) { return(false); } catch (Exception ex) { taskResult.Message = ex.ToString(); taskResult.EndTime = DateTimeOffset.UtcNow; await this.PersistTaskResult(taskResultKey, taskResult, token); await this.Utilities.UpdateTaskAsync(jobPartitionKey, taskKey, t => t.State = t.State == TaskState.Dispatching || t.State == TaskState.Running?TaskState.Failed : t.State, token, this.logger); await this.Utilities.AddJobsEventAsync(jobType, jobId, $"Task {taskId}, exception {ex}", EventType.Warning, token); } finally { var queue = this.Utilities.GetJobTaskCompletionQueue(jobId); logger.Information("Adding task completion message"); await queue.AddMessageAsync(new CloudQueueMessage(JsonConvert.SerializeObject(new TaskCompletionMessage() { JobId = jobId, Id = taskId, ExitCode = exitCode, JobType = jobType, RequeueCount = requeueCount, ChildIds = task.ChildIds, }, Formatting.Indented)), null, null, null, null, token); taskResultBlob = taskResultBlob ?? await this.Utilities.CreateOrReplaceJobOutputBlobAsync(jobType, taskResultKey, token); taskResultBlob.Metadata[TaskOutputPage.EofMark] = true.ToString(); await taskResultBlob.SetMetadataAsync(null, null, null, token); logger.Information("Finished"); } return(true); }
public abstract Task <bool> ProcessAsync(TaskEventMessage message, DateTimeOffset?insertionTime, CancellationToken token);