public async T.Task <IActionResult> PatchJobAsync(Job job, CancellationToken token) { this.Logger.Information("Patch job called for job {0} {1}", job.Type, job.Id); JobState state = JobState.Finished; if (!await this.Utilities.UpdateJobAsync(job.Type, job.Id, j => { state = j.State = (j.State == JobState.Queued || j.State == JobState.Running || j.State == JobState.Finishing) ? JobState.Canceling : j.State; }, token, this.Logger)) { return(new NotFoundObjectResult($"{job.Type} job {job.Id} was not found.")); } if (state == JobState.Canceling) { var jobEventQueue = this.Utilities.GetJobEventQueue(); var jobMsg = new JobEventMessage() { Id = job.Id, Type = job.Type, EventVerb = "cancel" }; await jobEventQueue.AddMessageAsync(new CloudQueueMessage(JsonConvert.SerializeObject(jobMsg)), null, null, null, null, token); this.Logger.Information("Create job dispatch message success."); return(new OkObjectResult($"{job.Type} job {job.Id} is being canceled.")); } else { return(new BadRequestObjectResult($"Cannot cancel {job.Type} job {job.Id} because it is in {state} state.")); } }
public async T.Task <Job> CreateJobAsync(Job job, CancellationToken token) { this.Logger.Information("New job called. creating job"); var jobTable = this.Utilities.GetJobsTable(); job.Id = await this.Utilities.GetNextId("Jobs", "Jobs", token); this.Logger.Information("generated new job id {0}", job.Id); var rowKey = this.Utilities.JobEntryKey; job.CreatedAt = DateTimeOffset.UtcNow; var partitionName = this.Utilities.GetJobPartitionKey(job.Type, job.Id); var result = await jobTable.InsertOrReplaceAsync(partitionName, rowKey, job, token); this.Logger.Information("create job result {0}", result); partitionName = this.Utilities.GetJobPartitionKey(job.Type, job.Id, true); result = await jobTable.InsertOrReplaceAsync(partitionName, rowKey, job, token); this.Logger.Information("create job result {0}", result); this.Logger.Information("Creating job dispatch message"); var jobEventQueue = this.Utilities.GetJobEventQueue(); var jobMsg = new JobEventMessage() { Id = job.Id, Type = job.Type, EventVerb = "dispatch" }; await jobEventQueue.AddMessageAsync(new CloudQueueMessage(JsonConvert.SerializeObject(jobMsg)), null, null, null, null, token); this.Logger.Information("Create job dispatch message success."); return(job); }
public override async T.Task ProcessAsync(Job job, JobEventMessage message, CancellationToken token) { var jobTable = this.Utilities.GetJobsTable(); if (job.State != JobState.Canceling && job.State != JobState.Running) { return; } await this.Utilities.UpdateJobAsync(job.Type, job.Id, j => { if (j.State == JobState.Running) { (j.Events ?? (j.Events = new List <Event>())).Add(new Event() { Content = $"The job maximum execution time {j.MaximumRuntimeSeconds} seconds expired.", Source = EventSource.Job, Type = EventType.Warning, }); } j.State = j.State == JobState.Running ? JobState.Canceling : j.State; }, token, this.Logger); var jobPartitionKey = this.Utilities.GetJobPartitionKey(job.Type, job.Id); var jobPartitionQuery = this.Utilities.GetPartitionQueryString(jobPartitionKey); var taskRangeQuery = this.Utilities.GetRowKeyRangeString( this.Utilities.GetTaskKey(job.Id, 0, job.RequeueCount), this.Utilities.GetTaskKey(job.Id, int.MaxValue, job.RequeueCount), false, false); var taskQueue = await this.Utilities.GetOrCreateJobTaskCompletionQueueAsync(job.Id, token); var msg1 = new CloudQueueMessage( JsonConvert.SerializeObject(new TaskCompletionMessage() { JobId = job.Id, Id = int.MaxValue, ExitCode = 0 })); await taskQueue.AddMessageAsync(msg1, null, null, null, null, token); this.Logger.Information("Added task cancel to queue {0}, {1}", taskQueue.Name, msg1.Id); await T.Task.WhenAll(job.TargetNodes.Select(async n => { var q = this.Utilities.GetNodeCancelQueue(n); var msg = new CloudQueueMessage( JsonConvert.SerializeObject(new TaskEventMessage() { JobId = job.Id, Id = 0, JobType = job.Type, RequeueCount = job.RequeueCount, EventVerb = "cancel" })); await q.AddMessageAsync(msg, null, null, null, null, token); this.Logger.Information("Added job {0} cancel to queue {1}, {2}", job.Id, q.Name, msg.Id); })); await this.Utilities.UpdateJobAsync(job.Type, job.Id, j => j.State = JobState.Canceled, token, this.Logger); }
public override async T.Task ProcessAsync(Job job, JobEventMessage message, CancellationToken token) { var jobTable = this.Utilities.GetJobsTable(); if (job.State != JobState.Queued) { this.Logger.Error("The job {0} state {1} is not queued.", job.Id, job.State); return; } var tasks = await this.JobTypeHandler.GenerateTasksAsync(job, token); if (tasks == null) { this.Logger.Error("The job {0} script doesn't generate any tasks", job.Id); await this.Utilities.UpdateJobAsync(job.Type, job.Id, j => { (j.Events ?? (j.Events = new List <Event>())).Add(new Event() { Content = $"The job {job.Id} script doesn't generate any tasks.", Source = EventSource.Job, Type = EventType.Alert, }); j.State = JobState.Failed; j.TaskCount = 0; }, token, this.Logger); return; } var allParentIds = new HashSet <int>(tasks.SelectMany(t => t.ParentIds ?? new List <int>())); var endingIds = tasks.Where(t => !allParentIds.Contains(t.Id)).Select(t => t.Id).ToList(); var startTask = InternalTask.CreateFrom(job); startTask.Id = 0; startTask.CustomizedData = InternalTask.StartTaskMark; tasks.ForEach(t => { if (t.ParentIds == null || t.ParentIds.Count == 0) { t.ParentIds = new List <int>() { startTask.Id } } ; t.ChildIds?.Clear(); }); var endTask = InternalTask.CreateFrom(job); endTask.Id = int.MaxValue; endTask.CustomizedData = InternalTask.EndTaskMark; endTask.ParentIds = endingIds; this.Logger.Information("Job {0} task {1} has {2} parent ids, {3}", job.Id, endTask.Id, endTask.ParentIds.Count, string.Join(",", endTask.ParentIds)); tasks.Add(startTask); tasks.Add(endTask); var(success, msg) = this.FillData(tasks, job); if (!success) { this.Logger.Error(msg); await this.Utilities.UpdateJobAsync(job.Type, job.Id, j => { j.State = JobState.Failed; (j.Events ?? (j.Events = new List <Event>())).Add(new Event() { Content = msg, Source = EventSource.Job, Type = EventType.Alert }); }, token, this.Logger); return; } const int MaxChildIds = 1000; this.Logger.Information("Job {0} Converting {1} Tasks to Instances.", job.Id, tasks.Count); var taskInstances = tasks.Select(it => { string zipppedParentIds = Compress.GZip(string.Join(",", it.ParentIds ?? new List <int>())); var childIds = it.ChildIds; childIds = childIds ?? new List <int>(); childIds = childIds.Count > MaxChildIds ? null : childIds; return(new Task() { ChildIds = childIds, ZippedParentIds = zipppedParentIds, CommandLine = it.CommandLine, CustomizedData = it.CustomizedData, Id = it.Id, JobId = it.JobId, JobType = it.JobType, Node = it.Node, RequeueCount = it.RequeueCount, State = string.Equals(it.CustomizedData, Task.StartTaskMark, StringComparison.OrdinalIgnoreCase) ? TaskState.Finished : TaskState.Queued, MaximumRuntimeSeconds = it.MaximumRuntimeSeconds, }); }).ToList(); var childIdsContent = tasks .Where(it => (it.ChildIds?.Count ?? 0) > MaxChildIds) .Select(it => new { it.Id, it.JobId, it.RequeueCount, it.ChildIds, }) .ToList(); this.Logger.Information("Job {0} Converting {1} Tasks to TaskStartInfo.", job.Id, tasks.Count); var taskInfos = tasks.Select(it => new TaskStartInfo() { Id = it.Id, JobId = it.JobId, JobType = it.JobType, NodeName = it.Node, Password = it.Password, PrivateKey = it.PrivateKey, PublicKey = it.PublicKey, UserName = it.UserName, StartInfo = new ProcessStartInfo(it.CommandLine, it.WorkingDirectory, null, null, null, it.EnvironmentVariables, null, it.RequeueCount), }).ToList(); this.Logger.Information("Job {0} Inserting {1} Tasks to Table.", job.Id, tasks.Count); var jobPartitionKey = this.Utilities.GetJobPartitionKey(job.Type, job.Id); await jobTable.InsertOrReplaceBatchAsync(token, taskInstances.Select(t => new JsonTableEntity( jobPartitionKey, this.Utilities.GetTaskKey(job.Id, t.Id, job.RequeueCount), t)).ToArray()); if (childIdsContent.Select(cid => cid.Id).Distinct().Count() != childIdsContent.Count) { await this.Utilities.UpdateJobAsync(job.Type, job.Id, j => { j.State = JobState.Failed; (j.Events ?? (j.Events = new List <Event>())).Add(new Event() { Content = $"Duplicate task ids found.", Source = EventSource.Job, Type = EventType.Alert, }); }, token, this.Logger); return; } this.Logger.Information("Job {0} Uploading {1} Tasks child ids content to blob.", job.Id, childIdsContent.Count); await T.Task.WhenAll(childIdsContent.Select(async childIds => { var taskKey = this.Utilities.GetTaskKey(childIds.JobId, childIds.Id, childIds.RequeueCount); var childIdsBlob = await this.Utilities.CreateOrReplaceTaskChildrenBlobAsync(taskKey, token); var jsonContent = JsonConvert.SerializeObject(childIds.ChildIds); await childIdsBlob.UploadTextAsync(jsonContent, Encoding.UTF8, null, null, null, token); })); this.Logger.Information("Job {0} Inserting {1} TaskInfo to Table.", job.Id, taskInfos.Count); await jobTable.InsertOrReplaceBatchAsync(token, taskInfos.Select(t => new JsonTableEntity( jobPartitionKey, this.Utilities.GetTaskInfoKey(job.Id, t.Id, job.RequeueCount), t)).ToArray()); this.Logger.Information("Job {0} updating job status.", job.Id); JobState state = JobState.Queued; await this.Utilities.UpdateJobAsync(job.Type, job.Id, j => { state = j.State = (j.State == JobState.Queued ? JobState.Running : j.State); j.TaskCount = taskInstances.Count - 2; }, token, this.Logger); if (state == JobState.Running) { this.Logger.Information("Job {0} Starting the job", job.Id); async T.Task addFirstTask() { var taskCompletionQueue = await this.Utilities.GetOrCreateJobTaskCompletionQueueAsync(job.Id, token); await taskCompletionQueue.AddMessageAsync(new CloudQueueMessage( JsonConvert.SerializeObject(new TaskCompletionMessage() { JobId = job.Id, Id = 0, JobType = job.Type, RequeueCount = job.RequeueCount, ChildIds = startTask.ChildIds })), null, null, null, null, token); }; async T.Task addRunningJob() { var runningJobQueue = this.Utilities.GetRunningJobQueue(); await runningJobQueue.AddMessageAsync(new CloudQueueMessage( JsonConvert.SerializeObject(new RunningJobMessage() { JobId = job.Id, JobType = job.Type, RequeueCount = job.RequeueCount })), null, null, null, null, token); }; await T.Task.WhenAll(addFirstTask(), addRunningJob()); } }
public override async T.Task ProcessAsync(Job job, JobEventMessage message, CancellationToken token) { var jobTable = this.Utilities.GetJobsTable(); this.Logger.Information("JobFinisher, job {0}, state {1}", job.Id, job.State); if (job.State != JobState.Finishing) { return; } var jobPartitionKey = this.Utilities.GetJobPartitionKey(job.Type, job.Id); var jobPartitionQuery = this.Utilities.GetPartitionQueryString(jobPartitionKey); var taskResultRangeQuery = this.Utilities.GetRowKeyRangeString( this.Utilities.GetTaskResultKey(job.Id, 0, job.RequeueCount), this.Utilities.GetTaskResultKey(job.Id, int.MaxValue, job.RequeueCount), false, false); this.Logger.Information("JobFinisher, job {0}, querying tasks results", job.Id); var allTaskResults = (await jobTable.QueryAsync <ComputeClusterTaskInformation>( TableQuery.CombineFilters(jobPartitionQuery, TableOperators.And, taskResultRangeQuery), null, token)) .Select(t => t.Item3) .ToList(); var taskRangeQuery = this.Utilities.GetRowKeyRangeString( this.Utilities.GetTaskKey(job.Id, 0, job.RequeueCount), this.Utilities.GetTaskKey(job.Id, int.MaxValue, job.RequeueCount), false, false); this.Logger.Information("JobFinisher, job {0}, querying tasks", job.Id); var allTasks = (await jobTable.QueryAsync <Task>( TableQuery.CombineFilters(jobPartitionQuery, TableOperators.And, taskRangeQuery), null, token)) .Select(t => t.Item3) .Where(t => t.CustomizedData != Task.EndTaskMark) .ToList(); this.Logger.Information("JobFinisher, job {0}, aggregating results", job.Id); var aggregationResult = await this.JobTypeHandler.AggregateTasksAsync(job, allTasks, allTaskResults, token); this.Logger.Information("JobFinisher, job {0}, aggregated result", job.Id); if (job.State == JobState.Finishing) { var finalState = job.State == JobState.Finishing ? JobState.Finished : JobState.Canceled; if (job.FailJobOnTaskFailure && allTasks.Any(t => t.State == TaskState.Failed)) { finalState = JobState.Failed; await this.Utilities.AddJobsEventAsync(job, $"Fail the job because some tasks failed.", EventType.Alert, token); } job.State = finalState; } this.Logger.Information("JobFinisher, job {0}, saving result, length {1}", job.Id, aggregationResult?.Length); if (aggregationResult != null) { var jobOutputBlob = await this.Utilities.CreateOrReplaceJobOutputBlobAsync(job.Type, this.Utilities.GetJobAggregationResultKey(job.Id), token); await jobOutputBlob.AppendTextAsync(aggregationResult, Encoding.UTF8, null, null, null, token); } await this.Utilities.UpdateJobAsync(job.Type, job.Id, j => { j.State = job.State; }, token, this.Logger); await this.Utilities.AddJobsEventAsync(job, $"Job ended to state {job.State}.", EventType.Information, token); this.Logger.Information("JobFinisher, job {0}, canceling tasks on nodes", job.Id); await T.Task.WhenAll(job.TargetNodes.Select(async n => { var q = this.Utilities.GetNodeCancelQueue(n); var msg = new CloudQueueMessage( JsonConvert.SerializeObject(new TaskEventMessage() { JobId = job.Id, Id = 0, JobType = job.Type, RequeueCount = job.RequeueCount, EventVerb = "cancel" })); await q.AddMessageAsync(msg, null, null, null, null, token); this.Logger.Information("Added job {0} cancel to queue {1}, {2}", job.Id, q.Name, msg.Id); })); this.Logger.Information("JobFinisher, job {0}, canceling tasks on nodes", job.Id); }
public abstract T.Task ProcessAsync(Job job, JobEventMessage message, CancellationToken token);