Ejemplo n.º 1
0
        public async T.Task <IActionResult> PatchJobAsync(Job job, CancellationToken token)
        {
            this.Logger.Information("Patch job called for job {0} {1}", job.Type, job.Id);

            JobState state = JobState.Finished;

            if (!await this.Utilities.UpdateJobAsync(job.Type, job.Id, j =>
            {
                state = j.State = (j.State == JobState.Queued || j.State == JobState.Running || j.State == JobState.Finishing) ? JobState.Canceling : j.State;
            }, token, this.Logger))
            {
                return(new NotFoundObjectResult($"{job.Type} job {job.Id} was not found."));
            }

            if (state == JobState.Canceling)
            {
                var jobEventQueue = this.Utilities.GetJobEventQueue();
                var jobMsg        = new JobEventMessage()
                {
                    Id = job.Id, Type = job.Type, EventVerb = "cancel"
                };
                await jobEventQueue.AddMessageAsync(new CloudQueueMessage(JsonConvert.SerializeObject(jobMsg)), null, null, null, null, token);

                this.Logger.Information("Create job dispatch message success.");
                return(new OkObjectResult($"{job.Type} job {job.Id} is being canceled."));
            }
            else
            {
                return(new BadRequestObjectResult($"Cannot cancel {job.Type} job {job.Id} because it is in {state} state."));
            }
        }
Ejemplo n.º 2
0
        public async T.Task <Job> CreateJobAsync(Job job, CancellationToken token)
        {
            this.Logger.Information("New job called. creating job");
            var jobTable = this.Utilities.GetJobsTable();

            job.Id = await this.Utilities.GetNextId("Jobs", "Jobs", token);

            this.Logger.Information("generated new job id {0}", job.Id);
            var rowKey = this.Utilities.JobEntryKey;

            job.CreatedAt = DateTimeOffset.UtcNow;

            var partitionName = this.Utilities.GetJobPartitionKey(job.Type, job.Id);
            var result        = await jobTable.InsertOrReplaceAsync(partitionName, rowKey, job, token);

            this.Logger.Information("create job result {0}", result);

            partitionName = this.Utilities.GetJobPartitionKey(job.Type, job.Id, true);
            result        = await jobTable.InsertOrReplaceAsync(partitionName, rowKey, job, token);

            this.Logger.Information("create job result {0}", result);

            this.Logger.Information("Creating job dispatch message");
            var jobEventQueue = this.Utilities.GetJobEventQueue();

            var jobMsg = new JobEventMessage()
            {
                Id = job.Id, Type = job.Type, EventVerb = "dispatch"
            };
            await jobEventQueue.AddMessageAsync(new CloudQueueMessage(JsonConvert.SerializeObject(jobMsg)), null, null, null, null, token);

            this.Logger.Information("Create job dispatch message success.");

            return(job);
        }
Ejemplo n.º 3
0
        public override async T.Task ProcessAsync(Job job, JobEventMessage message, CancellationToken token)
        {
            var jobTable = this.Utilities.GetJobsTable();

            if (job.State != JobState.Canceling && job.State != JobState.Running)
            {
                return;
            }

            await this.Utilities.UpdateJobAsync(job.Type, job.Id, j =>
            {
                if (j.State == JobState.Running)
                {
                    (j.Events ?? (j.Events = new List <Event>())).Add(new Event()
                    {
                        Content = $"The job maximum execution time {j.MaximumRuntimeSeconds} seconds expired.",
                        Source  = EventSource.Job,
                        Type    = EventType.Warning,
                    });
                }

                j.State = j.State == JobState.Running ? JobState.Canceling : j.State;
            }, token, this.Logger);

            var jobPartitionKey = this.Utilities.GetJobPartitionKey(job.Type, job.Id);

            var jobPartitionQuery = this.Utilities.GetPartitionQueryString(jobPartitionKey);
            var taskRangeQuery    = this.Utilities.GetRowKeyRangeString(
                this.Utilities.GetTaskKey(job.Id, 0, job.RequeueCount),
                this.Utilities.GetTaskKey(job.Id, int.MaxValue, job.RequeueCount),
                false,
                false);

            var taskQueue = await this.Utilities.GetOrCreateJobTaskCompletionQueueAsync(job.Id, token);

            var msg1 = new CloudQueueMessage(
                JsonConvert.SerializeObject(new TaskCompletionMessage()
            {
                JobId = job.Id, Id = int.MaxValue, ExitCode = 0
            }));

            await taskQueue.AddMessageAsync(msg1, null, null, null, null, token);

            this.Logger.Information("Added task cancel to queue {0}, {1}", taskQueue.Name, msg1.Id);

            await T.Task.WhenAll(job.TargetNodes.Select(async n =>
            {
                var q   = this.Utilities.GetNodeCancelQueue(n);
                var msg = new CloudQueueMessage(
                    JsonConvert.SerializeObject(new TaskEventMessage()
                {
                    JobId = job.Id, Id = 0, JobType = job.Type, RequeueCount = job.RequeueCount, EventVerb = "cancel"
                }));
                await q.AddMessageAsync(msg, null, null, null, null, token);
                this.Logger.Information("Added job {0} cancel to queue {1}, {2}", job.Id, q.Name, msg.Id);
            }));

            await this.Utilities.UpdateJobAsync(job.Type, job.Id, j => j.State = JobState.Canceled, token, this.Logger);
        }
Ejemplo n.º 4
0
        public override async T.Task ProcessAsync(Job job, JobEventMessage message, CancellationToken token)
        {
            var jobTable = this.Utilities.GetJobsTable();

            if (job.State != JobState.Queued)
            {
                this.Logger.Error("The job {0} state {1} is not queued.", job.Id, job.State);
                return;
            }

            var tasks = await this.JobTypeHandler.GenerateTasksAsync(job, token);

            if (tasks == null)
            {
                this.Logger.Error("The job {0} script doesn't generate any tasks", job.Id);
                await this.Utilities.UpdateJobAsync(job.Type, job.Id, j =>
                {
                    (j.Events ?? (j.Events = new List <Event>())).Add(new Event()
                    {
                        Content = $"The job {job.Id} script doesn't generate any tasks.",
                        Source  = EventSource.Job,
                        Type    = EventType.Alert,
                    });

                    j.State     = JobState.Failed;
                    j.TaskCount = 0;
                }, token, this.Logger);

                return;
            }

            var allParentIds = new HashSet <int>(tasks.SelectMany(t => t.ParentIds ?? new List <int>()));
            var endingIds    = tasks.Where(t => !allParentIds.Contains(t.Id)).Select(t => t.Id).ToList();

            var startTask = InternalTask.CreateFrom(job);

            startTask.Id             = 0;
            startTask.CustomizedData = InternalTask.StartTaskMark;
            tasks.ForEach(t =>
            {
                if (t.ParentIds == null || t.ParentIds.Count == 0)
                {
                    t.ParentIds = new List <int>()
                    {
                        startTask.Id
                    }
                }
                ;
                t.ChildIds?.Clear();
            });

            var endTask = InternalTask.CreateFrom(job);

            endTask.Id             = int.MaxValue;
            endTask.CustomizedData = InternalTask.EndTaskMark;
            endTask.ParentIds      = endingIds;
            this.Logger.Information("Job {0} task {1} has {2} parent ids, {3}", job.Id, endTask.Id, endTask.ParentIds.Count, string.Join(",", endTask.ParentIds));

            tasks.Add(startTask);
            tasks.Add(endTask);

            var(success, msg) = this.FillData(tasks, job);
            if (!success)
            {
                this.Logger.Error(msg);
                await this.Utilities.UpdateJobAsync(job.Type, job.Id, j =>
                {
                    j.State = JobState.Failed;
                    (j.Events ?? (j.Events = new List <Event>())).Add(new Event()
                    {
                        Content = msg,
                        Source  = EventSource.Job,
                        Type    = EventType.Alert
                    });
                }, token, this.Logger);

                return;
            }

            const int MaxChildIds = 1000;

            this.Logger.Information("Job {0} Converting {1} Tasks to Instances.", job.Id, tasks.Count);
            var taskInstances = tasks.Select(it =>
            {
                string zipppedParentIds = Compress.GZip(string.Join(",", it.ParentIds ?? new List <int>()));

                var childIds = it.ChildIds;
                childIds     = childIds ?? new List <int>();
                childIds     = childIds.Count > MaxChildIds ? null : childIds;

                return(new Task()
                {
                    ChildIds = childIds,
                    ZippedParentIds = zipppedParentIds,
                    CommandLine = it.CommandLine,
                    CustomizedData = it.CustomizedData,
                    Id = it.Id,
                    JobId = it.JobId,
                    JobType = it.JobType,
                    Node = it.Node,
                    RequeueCount = it.RequeueCount,
                    State = string.Equals(it.CustomizedData, Task.StartTaskMark, StringComparison.OrdinalIgnoreCase) ? TaskState.Finished : TaskState.Queued,
                    MaximumRuntimeSeconds = it.MaximumRuntimeSeconds,
                });
            }).ToList();

            var childIdsContent = tasks
                                  .Where(it => (it.ChildIds?.Count ?? 0) > MaxChildIds)
                                  .Select(it => new
            {
                it.Id,
                it.JobId,
                it.RequeueCount,
                it.ChildIds,
            })
                                  .ToList();

            this.Logger.Information("Job {0} Converting {1} Tasks to TaskStartInfo.", job.Id, tasks.Count);
            var taskInfos = tasks.Select(it => new TaskStartInfo()
            {
                Id         = it.Id,
                JobId      = it.JobId,
                JobType    = it.JobType,
                NodeName   = it.Node,
                Password   = it.Password,
                PrivateKey = it.PrivateKey,
                PublicKey  = it.PublicKey,
                UserName   = it.UserName,
                StartInfo  = new ProcessStartInfo(it.CommandLine, it.WorkingDirectory, null, null, null, it.EnvironmentVariables, null, it.RequeueCount),
            }).ToList();

            this.Logger.Information("Job {0} Inserting {1} Tasks to Table.", job.Id, tasks.Count);
            var jobPartitionKey = this.Utilities.GetJobPartitionKey(job.Type, job.Id);
            await jobTable.InsertOrReplaceBatchAsync(token, taskInstances.Select(t => new JsonTableEntity(
                                                                                     jobPartitionKey,
                                                                                     this.Utilities.GetTaskKey(job.Id, t.Id, job.RequeueCount),
                                                                                     t)).ToArray());

            if (childIdsContent.Select(cid => cid.Id).Distinct().Count() != childIdsContent.Count)
            {
                await this.Utilities.UpdateJobAsync(job.Type, job.Id, j =>
                {
                    j.State = JobState.Failed;
                    (j.Events ?? (j.Events = new List <Event>())).Add(new Event()
                    {
                        Content = $"Duplicate task ids found.",
                        Source  = EventSource.Job,
                        Type    = EventType.Alert,
                    });
                }, token, this.Logger);

                return;
            }

            this.Logger.Information("Job {0} Uploading {1} Tasks child ids content to blob.", job.Id, childIdsContent.Count);
            await T.Task.WhenAll(childIdsContent.Select(async childIds =>
            {
                var taskKey      = this.Utilities.GetTaskKey(childIds.JobId, childIds.Id, childIds.RequeueCount);
                var childIdsBlob = await this.Utilities.CreateOrReplaceTaskChildrenBlobAsync(taskKey, token);

                var jsonContent = JsonConvert.SerializeObject(childIds.ChildIds);
                await childIdsBlob.UploadTextAsync(jsonContent, Encoding.UTF8, null, null, null, token);
            }));

            this.Logger.Information("Job {0} Inserting {1} TaskInfo to Table.", job.Id, taskInfos.Count);
            await jobTable.InsertOrReplaceBatchAsync(token, taskInfos.Select(t => new JsonTableEntity(
                                                                                 jobPartitionKey,
                                                                                 this.Utilities.GetTaskInfoKey(job.Id, t.Id, job.RequeueCount),
                                                                                 t)).ToArray());

            this.Logger.Information("Job {0} updating job status.", job.Id);
            JobState state = JobState.Queued;

            await this.Utilities.UpdateJobAsync(job.Type, job.Id, j =>
            {
                state       = j.State = (j.State == JobState.Queued ? JobState.Running : j.State);
                j.TaskCount = taskInstances.Count - 2;
            }, token, this.Logger);

            if (state == JobState.Running)
            {
                this.Logger.Information("Job {0} Starting the job", job.Id);
                async T.Task addFirstTask()
                {
                    var taskCompletionQueue = await this.Utilities.GetOrCreateJobTaskCompletionQueueAsync(job.Id, token);

                    await taskCompletionQueue.AddMessageAsync(new CloudQueueMessage(
                                                                  JsonConvert.SerializeObject(new TaskCompletionMessage()
                    {
                        JobId = job.Id, Id = 0, JobType = job.Type, RequeueCount = job.RequeueCount, ChildIds = startTask.ChildIds
                    })),
                                                              null, null, null, null, token);
                };

                async T.Task addRunningJob()
                {
                    var runningJobQueue = this.Utilities.GetRunningJobQueue();
                    await runningJobQueue.AddMessageAsync(new CloudQueueMessage(
                                                              JsonConvert.SerializeObject(new RunningJobMessage()
                    {
                        JobId = job.Id, JobType = job.Type, RequeueCount = job.RequeueCount
                    })),
                                                          null, null, null, null, token);
                };

                await T.Task.WhenAll(addFirstTask(), addRunningJob());
            }
        }
Ejemplo n.º 5
0
        public override async T.Task ProcessAsync(Job job, JobEventMessage message, CancellationToken token)
        {
            var jobTable = this.Utilities.GetJobsTable();

            this.Logger.Information("JobFinisher, job {0}, state {1}", job.Id, job.State);
            if (job.State != JobState.Finishing)
            {
                return;
            }

            var jobPartitionKey = this.Utilities.GetJobPartitionKey(job.Type, job.Id);

            var jobPartitionQuery    = this.Utilities.GetPartitionQueryString(jobPartitionKey);
            var taskResultRangeQuery = this.Utilities.GetRowKeyRangeString(
                this.Utilities.GetTaskResultKey(job.Id, 0, job.RequeueCount),
                this.Utilities.GetTaskResultKey(job.Id, int.MaxValue, job.RequeueCount),
                false,
                false);

            this.Logger.Information("JobFinisher, job {0}, querying tasks results", job.Id);
            var allTaskResults = (await jobTable.QueryAsync <ComputeClusterTaskInformation>(
                                      TableQuery.CombineFilters(jobPartitionQuery, TableOperators.And, taskResultRangeQuery),
                                      null,
                                      token))
                                 .Select(t => t.Item3)
                                 .ToList();

            var taskRangeQuery = this.Utilities.GetRowKeyRangeString(
                this.Utilities.GetTaskKey(job.Id, 0, job.RequeueCount),
                this.Utilities.GetTaskKey(job.Id, int.MaxValue, job.RequeueCount),
                false,
                false);

            this.Logger.Information("JobFinisher, job {0}, querying tasks", job.Id);
            var allTasks = (await jobTable.QueryAsync <Task>(
                                TableQuery.CombineFilters(jobPartitionQuery, TableOperators.And, taskRangeQuery),
                                null,
                                token))
                           .Select(t => t.Item3)
                           .Where(t => t.CustomizedData != Task.EndTaskMark)
                           .ToList();

            this.Logger.Information("JobFinisher, job {0}, aggregating results", job.Id);
            var aggregationResult = await this.JobTypeHandler.AggregateTasksAsync(job, allTasks, allTaskResults, token);

            this.Logger.Information("JobFinisher, job {0}, aggregated result", job.Id);
            if (job.State == JobState.Finishing)
            {
                var finalState = job.State == JobState.Finishing ? JobState.Finished : JobState.Canceled;
                if (job.FailJobOnTaskFailure && allTasks.Any(t => t.State == TaskState.Failed))
                {
                    finalState = JobState.Failed;

                    await this.Utilities.AddJobsEventAsync(job, $"Fail the job because some tasks failed.", EventType.Alert, token);
                }

                job.State = finalState;
            }

            this.Logger.Information("JobFinisher, job {0}, saving result, length {1}", job.Id, aggregationResult?.Length);

            if (aggregationResult != null)
            {
                var jobOutputBlob = await this.Utilities.CreateOrReplaceJobOutputBlobAsync(job.Type, this.Utilities.GetJobAggregationResultKey(job.Id), token);

                await jobOutputBlob.AppendTextAsync(aggregationResult, Encoding.UTF8, null, null, null, token);
            }

            await this.Utilities.UpdateJobAsync(job.Type, job.Id, j =>
            {
                j.State = job.State;
            }, token, this.Logger);

            await this.Utilities.AddJobsEventAsync(job, $"Job ended to state {job.State}.", EventType.Information, token);

            this.Logger.Information("JobFinisher, job {0}, canceling tasks on nodes", job.Id);

            await T.Task.WhenAll(job.TargetNodes.Select(async n =>
            {
                var q   = this.Utilities.GetNodeCancelQueue(n);
                var msg = new CloudQueueMessage(
                    JsonConvert.SerializeObject(new TaskEventMessage()
                {
                    JobId = job.Id, Id = 0, JobType = job.Type, RequeueCount = job.RequeueCount, EventVerb = "cancel"
                }));
                await q.AddMessageAsync(msg, null, null, null, null, token);
                this.Logger.Information("Added job {0} cancel to queue {1}, {2}", job.Id, q.Name, msg.Id);
            }));

            this.Logger.Information("JobFinisher, job {0}, canceling tasks on nodes", job.Id);
        }
Ejemplo n.º 6
0
 public abstract T.Task ProcessAsync(Job job, JobEventMessage message, CancellationToken token);