public override async T.Task <bool> ProcessAsync(TaskEventMessage message, DateTimeOffset?insertionTime, CancellationToken token)
        {
            var jobsTable = this.Utilities.GetJobsTable();
            var nodeName  = this.ServerOptions.HostName;

            string taskKey = null;

            if (message.Id > 0)
            {
                taskKey = this.Utilities.GetTaskKey(message.JobId, message.Id, message.RequeueCount);
            }


            // TODO: cancel single task
            //if (message.Id != 0
            //{
            //    var taskKey = this.Utilities.GetTaskKey(message.JobId, message.Id, message.RequeueCount);
            //    var task = await this.jobsTable.RetrieveAsync<Task>(this.jobPartitionKey, taskKey, token);
            //    var taskResultKey = this.Utilities.GetTaskResultKey(task.JobId, task.Id, task.RequeueCount);
            //    var nodeTaskResultKey = this.Utilities.GetNodeTaskResultKey(nodeName, task.JobId, task.RequeueCount, task.Id);
            //}

            this.Logger.Information("Do work {0} for job {1} task {2} on node {3}", message.EventVerb, message.JobId, message.Id, nodeName);

            var jobPartitionKey = this.Utilities.GetJobPartitionKey(message.JobType, message.JobId);
            var job             = await jobsTable.RetrieveAsync <Job>(jobPartitionKey, this.Utilities.JobEntryKey, token);

            if (job != null && job.RequeueCount != message.RequeueCount)
            {
                return(true);
            }

            try
            {
                if (taskKey != null)
                {
                    await this.Utilities.UpdateTaskAsync(jobPartitionKey, taskKey, t => t.State = t.State == TaskState.Failed || t.State == TaskState.Finished?t.State : TaskState.Canceled, token, this.Logger);

                    await this.Communicator.EndTaskAsync(nodeName, new EndTaskArg(null, message.JobId, message.Id), token);

                    this.Monitor.CancelTask(message.JobId, taskKey);
                }
                else
                {
                    // end the whole job
                    await this.Utilities.UpdateJobAsync(message.JobType, message.JobId, j => j.State = j.State == JobState.Failed || j.State == JobState.Finished?j.State : JobState.Canceled, token, this.Logger);

                    await this.Communicator.EndJobAsync(nodeName, new EndJobArg(null, message.JobId), token);

                    this.Monitor.CancelJob(message.JobId);
                }
            }
            catch (Exception ex)
            {
                if (job != null)
                {
                    await this.Utilities.AddJobsEventAsync(job, $"Failed to end Job {job.Id}, exception {ex}", EventType.Warning, token);
                }
            }

            return(true);
        }
Exemple #2
0
        public override async T.Task <bool> ProcessAsync(TaskEventMessage message, DateTimeOffset?insertionTime, CancellationToken token)
        {
            this.logger     = this.Logger.ForContext("Job", message.JobId).ForContext("Task", message.Id);
            this.jobsTable  = this.Utilities.GetJobsTable();
            this.nodesTable = this.Utilities.GetNodesTable();
            var nodeName = this.ServerOptions.HostName;

            JobType jobType      = message.JobType;
            int     jobId        = message.JobId;
            int     taskId       = message.Id;
            int     requeueCount = message.RequeueCount;

            this.nodePartitionKey = this.Utilities.GetNodePartitionKey(nodeName);
            this.jobPartitionKey  = this.Utilities.GetJobPartitionKey(message.JobType, jobId);
            var taskKey       = this.Utilities.GetTaskKey(jobId, taskId, requeueCount);
            var taskInfoKey   = this.Utilities.GetTaskInfoKey(jobId, taskId, requeueCount);
            var taskResultKey = this.Utilities.GetTaskResultKey(jobId, taskId, requeueCount);

            logger.Information("Do work {0} for Task {1} on node {2}", message.EventVerb, taskKey, nodeName);

            if (insertionTime != null && insertionTime + TimeSpan.FromSeconds(10) < DateTimeOffset.UtcNow)
            {
                // Only when the insertion time is 10 seconds ago, we check the job status.
                var job = await this.jobsTable.RetrieveAsync <Job>(jobPartitionKey, this.Utilities.JobEntryKey, token);

                if (job.State != JobState.Running)
                {
                    logger.Warning("Trying to start a task {0} when {1} Job {2} is in state {3}", taskKey, job.Type, job.Id, job.State);
                    return(true);
                }
            }

            var task = await this.jobsTable.RetrieveAsync <Task>(this.jobPartitionKey, taskKey, token);

            int?exitCode = null;

            CloudAppendBlob taskResultBlob = null;

            var            cmd        = task.CommandLine;
            DateTimeOffset startTime  = DateTimeOffset.UtcNow;
            var            taskResult = new ComputeClusterTaskInformation()
            {
                ExitCode    = -1,
                Message     = "Running",
                CommandLine = cmd,
                JobId       = jobId,
                TaskId      = taskId,
                NodeName    = nodeName,
                ResultKey   = taskResultKey,
                StartTime   = startTime,
            };

            try
            {
                if (task.State != TaskState.Dispatching && task.State != TaskState.Running && task.State != TaskState.Queued)
                {
                    Logger.Information("Job {0} task {1} state {2}, skip Executing command {3}", jobId, taskId, task.State, cmd);
                    return(true);
                }

                var taskInfo = await this.jobsTable.RetrieveAsync <TaskStartInfo>(this.jobPartitionKey, taskInfoKey, token);

                Logger.Information("Executing command {0}", cmd);

                var rawResult = new StringBuilder();
                using (var monitor = string.IsNullOrEmpty(cmd) ? null : this.Monitor.StartMonitorTask(jobId, taskKey, async(output, eof, cancellationToken) =>
                {
                    try
                    {
                        if (rawResult.Length < MaxRawResultLength)
                        {
                            rawResult.Append(output);
                        }

                        taskResultBlob = taskResultBlob ?? await this.Utilities.CreateOrReplaceJobOutputBlobAsync(jobType, taskResultKey, token);
                        await taskResultBlob.AppendTextAsync(output, Encoding.UTF8, null, null, null, cancellationToken);

                        if (eof)
                        {
                            taskResultBlob.Metadata[TaskOutputPage.EofMark] = eof.ToString();
                            await taskResultBlob.SetMetadataAsync(null, null, null, cancellationToken);
                        }
                    }
                    catch (Exception ex)
                    {
                        logger.Error(ex, "Error happened when append to blob {0}", taskResultBlob.Name);
                    }
                }))
                {
                    if (!await this.PersistTaskResult(taskResultKey, taskResult, token))
                    {
                        return(false);
                    }

                    logger.Information("Call startjobandtask for task {0}", taskKey);
                    (taskInfo.StartInfo.environmentVariables ?? (taskInfo.StartInfo.environmentVariables = new Dictionary <string, string>()))
                    .Add("blobEndpoint", this.Utilities.Account.BlobEndpoint.AbsoluteUri);
                    taskInfo.StartInfo.stdout = taskInfo.StartInfo.stderr = $"{this.Communicator.Options.AgentUriBase}/output/{jobId}/{taskKey}";

                    await this.Utilities.UpdateTaskAsync(jobPartitionKey, taskKey, t => t.State = TaskState.Dispatching, token, this.logger);

                    await this.Communicator.StartJobAndTaskAsync(
                        nodeName,
                        new StartJobAndTaskArg(new int[0], taskInfo.JobId, taskInfo.Id), taskInfo.UserName, taskInfo.Password,
                        taskInfo.StartInfo, taskInfo.PrivateKey, taskInfo.PublicKey, token);


                    logger.Information("Update task state to running");
                    await this.Utilities.UpdateTaskAsync(jobPartitionKey, taskKey, t => t.State = TaskState.Running, token, this.logger);

                    logger.Information("Waiting for response");
                    if (monitor == null)
                    {
                        return(true);
                    }

                    ComputeNodeTaskCompletionEventArgs taskResultArgs;

                    try
                    {
                        if (monitor.Result.Execution == await T.Task.WhenAny(monitor.Result.Execution, T.Task.Delay(TimeSpan.FromSeconds(task.MaximumRuntimeSeconds))))
                        {
                            taskResultArgs = monitor.Result.Execution.Result;
                        }
                        else
                        {
                            logger.Information("Task has timed out");
                            return(true);
                        }
                    }
                    catch (AggregateException ex) when(ex.InnerExceptions.All(e => e is OperationCanceledException))
                    {
                        logger.Information("Task has been canceled");
                        return(true);
                    }
                    catch (T.TaskCanceledException)
                    {
                        logger.Information("Task has been canceled");
                        return(true);
                    }

                    taskResult = taskResultArgs.TaskInfo ?? taskResult;
                    logger.Information("Updating task state with exit code {0}", taskResult?.ExitCode);
                    await this.Utilities.UpdateTaskAsync(jobPartitionKey, taskKey,
                                                         t => t.State = taskResult?.ExitCode == 0?TaskState.Finished : TaskState.Failed,
                                                         token,
                                                         this.logger);

                    if (taskResult != null)
                    {
                        taskResult.StartTime = startTime;
                        exitCode             = taskResult.ExitCode;
                        taskResult.Message   = rawResult.Length > MaxRawResultLength?rawResult.ToString(0, MaxRawResultLength) : rawResult.ToString();

                        taskResult.CommandLine = cmd;
                        taskResult.JobId       = jobId;
                        taskResult.TaskId      = taskId;
                        taskResult.NodeName    = nodeName;
                        taskResult.ResultKey   = taskResultKey;
                        taskResult.EndTime     = DateTimeOffset.UtcNow;

                        logger.Information("Saving result");
                        if (!await this.PersistTaskResult(taskResultKey, taskResult, token))
                        {
                            return(false);
                        }
                    }
                }
            }
            catch (StorageException ex) when(ex.IsCancellation())
            {
                return(false);
            }
            catch (OperationCanceledException) when(token.IsCancellationRequested)
            {
                return(false);
            }
            catch (Exception ex)
            {
                taskResult.Message = ex.ToString();
                taskResult.EndTime = DateTimeOffset.UtcNow;
                await this.PersistTaskResult(taskResultKey, taskResult, token);

                await this.Utilities.UpdateTaskAsync(jobPartitionKey, taskKey, t => t.State = t.State == TaskState.Dispatching || t.State == TaskState.Running?TaskState.Failed : t.State, token, this.logger);

                await this.Utilities.AddJobsEventAsync(jobType, jobId, $"Task {taskId}, exception {ex}", EventType.Warning, token);
            }
            finally
            {
                var queue = this.Utilities.GetJobTaskCompletionQueue(jobId);
                logger.Information("Adding task completion message");
                await queue.AddMessageAsync(new CloudQueueMessage(JsonConvert.SerializeObject(new TaskCompletionMessage()
                {
                    JobId = jobId,
                    Id = taskId,
                    ExitCode = exitCode,
                    JobType = jobType,
                    RequeueCount = requeueCount,
                    ChildIds = task.ChildIds,
                }, Formatting.Indented)), null, null, null, null, token);

                taskResultBlob = taskResultBlob ?? await this.Utilities.CreateOrReplaceJobOutputBlobAsync(jobType, taskResultKey, token);

                taskResultBlob.Metadata[TaskOutputPage.EofMark] = true.ToString();
                await taskResultBlob.SetMetadataAsync(null, null, null, token);

                logger.Information("Finished");
            }

            return(true);
        }
Exemple #3
0
 public abstract Task <bool> ProcessAsync(TaskEventMessage message, DateTimeOffset?insertionTime, CancellationToken token);