private async Async.Task <HttpResponseData> Post(HttpRequestData req) { var request = await RequestHandling.ParseRequest <NodeStateEnvelope>(req); if (!request.IsOk) { return(await _context.RequestHandling.NotOk(req, request.ErrorV, context : "node event")); } var envelope = request.OkV; _log.Info($"node event: machine_id: {envelope.MachineId} event: {EntityConverter.ToJsonString(envelope)}"); var error = envelope.Event switch { NodeStateUpdate updateEvent => await OnStateUpdate(envelope.MachineId, updateEvent), WorkerEvent workerEvent => await OnWorkerEvent(envelope.MachineId, workerEvent), NodeEvent nodeEvent => await OnNodeEvent(envelope.MachineId, nodeEvent), _ => new Error(ErrorCode.INVALID_REQUEST, new string[] { $"invalid node event: {envelope.Event.GetType().Name}" }), }; if (error is Error e) { return(await _context.RequestHandling.NotOk(req, e, context : "node event")); } else { return(await RequestHandling.Ok(req, new BoolResult(true))); } }
private async Async.Task <Error?> OnStateUpdate(Guid machineId, NodeStateUpdate ev) { var node = await _context.NodeOperations.GetByMachineId(machineId); if (node is null) { _log.Warning($"unable to process state update event. machine_id:{machineId} state event:{ev}"); return(null); } if (ev.State == NodeState.Free) { if (node.ReimageRequested || node.DeleteRequested) { _log.Info($"stopping free node with reset flags: {machineId}"); await _context.NodeOperations.Stop(node); return(null); } if (await _context.NodeOperations.CouldShrinkScaleset(node)) { _log.Info($"stopping free node to resize scaleset: {machineId}"); await _context.NodeOperations.SetHalt(node); return(null); } } if (ev.State == NodeState.Init) { if (node.DeleteRequested) { _log.Info($"stopping node (init and delete_requested): {machineId}"); await _context.NodeOperations.Stop(node); return(null); } // Don’t check reimage_requested, as nodes only send 'init' state once. If // they send 'init' with reimage_requested, it's because the node was reimaged // successfully. node = node with { ReimageRequested = false, InitializedAt = DateTimeOffset.UtcNow }; await _context.NodeOperations.SetState(node, ev.State); return(null); } _log.Info($"node state update: {machineId} from {node.State} to {ev.State}"); await _context.NodeOperations.SetState(node, ev.State); if (ev.State == NodeState.Free) { _log.Info($"node now available for work: {machineId}"); } else if (ev.State == NodeState.SettingUp) { if (ev.Data is NodeSettingUpEventData settingUpData) { if (!settingUpData.Tasks.Any()) { return(new Error(ErrorCode.INVALID_REQUEST, Errors: new string[] { $"setup without tasks. machine_id: {machineId}", })); } foreach (var taskId in settingUpData.Tasks) { var task = await _context.TaskOperations.GetByTaskId(taskId); if (task is null) { return(new Error( ErrorCode.INVALID_REQUEST, Errors: new string[] { $"unable to find task: {taskId}" })); } _log.Info($"node starting task. machine_id: {machineId} job_id: {task.JobId} task_id: {task.TaskId}"); // The task state may be `running` if it has `vm_count` > 1, and // another node is concurrently executing the task. If so, leave // the state as-is, to represent the max progress made. // // Other states we would want to preserve are excluded by the // outermost conditional check. if (task.State != TaskState.Running && task.State != TaskState.SettingUp) { await _context.TaskOperations.SetState(task, TaskState.SettingUp); } var nodeTask = new NodeTasks( MachineId: machineId, TaskId: task.TaskId, State: NodeTaskState.SettingUp); await _context.NodeTasksOperations.Replace(nodeTask); } } } else if (ev.State == NodeState.Done) { Error?error = null; if (ev.Data is NodeDoneEventData doneData) { if (doneData.Error is not null) { var errorText = EntityConverter.ToJsonString(doneData); error = new Error(ErrorCode.TASK_FAILED, Errors: new string[] { errorText }); _log.Error($"node 'done' with error: machine_id:{machineId}, data:{errorText}"); } } // if tasks are running on the node when it reports as Done // those are stopped early await _context.NodeOperations.MarkTasksStoppedEarly(node, error); await _context.NodeOperations.ToReimage(node, done : true); } return(null); }