public void UpdateTaskState( AgentTaskState UpdatedTaskState ) { // Sanity checks Debug.Assert( CurrentState != JobState.AGENT_JOB_UNSPECIFIED ); Debug.Assert( CurrentState != JobState.AGENT_JOB_PENDING ); AgentTask RunningTask; if( RunningTasks.TryGetValue( UpdatedTaskState.TaskGuid, out RunningTask ) ) { // Update the individual Task state RunningTask.CurrentState = UpdatedTaskState; switch( UpdatedTaskState.TaskState ) { case EJobTaskState.TASK_STATE_ACCEPTED: // Nothing to do right now, but we'll need to track start times, etc. later break; case EJobTaskState.TASK_STATE_RUNNING: // Mark the real start time of this task (also set when we give the task out) RunningTask.StartTime = DateTime.UtcNow; break; case EJobTaskState.TASK_STATE_COMPLETE_SUCCESS: UpdateTaskStateAsSuccess( RunningTask ); break; case EJobTaskState.TASK_STATE_REJECTED: if( RunningTask.CurrentOwner is RemoteConnection ) { Manager.Log( EVerbosityLevel.Informative, ELogColour.Orange, "[UpdateTaskState]: Task Rejected remotely by " + ( RunningTask.CurrentOwner as RemoteConnection ).Info.Name ); Manager.Log( EVerbosityLevel.Informative, ELogColour.Orange, "[UpdateTaskState]: Requeueing: " + RunningTask.Specification.Parameters ); UpdateTaskStateAsRequeued( RunningTask ); } else { Manager.Log( EVerbosityLevel.Informative, ELogColour.Red, "[UpdateTaskState]: Task Rejected locally by " + Environment.MachineName + ", counted as failure"); UpdateTaskStateAsFailure( RunningTask ); } break; case EJobTaskState.TASK_STATE_KILLED: if( RunningTask.CurrentOwner is RemoteConnection ) { Manager.Log( EVerbosityLevel.Informative, ELogColour.Orange, "[UpdateTaskState]: Task Killed remotely by " + ( RunningTask.CurrentOwner as RemoteConnection ).Info.Name ); Manager.Log( EVerbosityLevel.Informative, ELogColour.Orange, "[UpdateTaskState]: Requeueing: " + RunningTask.Specification.Parameters ); UpdateTaskStateAsRequeued( RunningTask ); } else { Manager.Log( EVerbosityLevel.Informative, ELogColour.Red, "[UpdateTaskState]: Task Killed locally by " + Environment.MachineName + ", counted as failure" ); UpdateTaskStateAsFailure( RunningTask ); } break; case EJobTaskState.TASK_STATE_COMPLETE_FAILURE: if( RunningTask.CurrentOwner is RemoteConnection ) { Manager.Log( EVerbosityLevel.Informative, ELogColour.Red, "[UpdateTaskState]: Task Failed on " + ( RunningTask.CurrentOwner as RemoteConnection ).Info.Name ); } else { Manager.Log( EVerbosityLevel.Informative, ELogColour.Red, "[UpdateTaskState]: Task Failed on " + Environment.MachineName ); } Manager.Log( EVerbosityLevel.Informative, ELogColour.Red, "[UpdateTaskState]: Task Failed: " + RunningTask.Specification.Parameters ); UpdateTaskStateAsFailure( RunningTask ); break; } // Update the owning Job state, by checking for failures. Success is only // determined after all Tasks are assured to be done or orphanable. Only // do this one time and let the new state, if there is one, be sticky. lock( CurrentSuccessStateLock ) { if( CurrentSuccessState == JobSuccessState.AGENT_JOB_INCOMPLETE ) { // Updtae if any task is a failure if( TaskFailureCount > 0 ) { // Update the state and send a message indicating the failure CurrentSuccessState = JobSuccessState.AGENT_JOB_FAILURE; if( OwnerIsInstigator ) { // Log and send an INFO message describing the failure string NewMessageText = "Job has failed! The task failure count is non-zero"; SendJobCompletedMessage( new AgentInfoMessage( NewMessageText ) ); } } // Update if all tasks are successful and we're the instigator, since only // the instigator can make this determination properly else if( ( TaskSuccessCount == TaskCount ) && ( OwnerIsInstigator ) ) { CurrentSuccessState = JobSuccessState.AGENT_JOB_SUCCESS; if( OwnerIsInstigator ) { // Log and send an INFO message describing the success string NewMessageText = "Job is a success!"; SendJobCompletedMessage( new AgentInfoMessage( NewMessageText ) ); } } } } // Update the visualizer if this agent is the Instigator if( OwnerIsInstigator ) { AgentApplication.UpdateMachineState( Environment.MachineName, RetiredTasks.Count, EProgressionState.TasksCompleted ); AgentApplication.UpdateMachineState( Environment.MachineName, RunningTasks.Count, EProgressionState.TasksInProgress ); } } }
public Int32 CloseJob() { // Before we close the job, make sure all messages have been processed to // make sure we avoid any race condition between getting updates to tasks // or the job and closing the job Manager.FlushMessageQueue( Owner, false ); // Update the state only within a mutex to protect anyone trying to read // the state at the same time lock( CurrentStateLock ) { // Only do this if this Job hasn't already been closed if( CurrentState != JobState.AGENT_JOB_CLOSED ) { CurrentState = JobState.AGENT_JOB_CLOSED; StopTime = DateTime.UtcNow; // First, resolve any outstanding reservations CheckForReservations(); // Determine success state of the Job lock( CurrentSuccessStateLock ) { // Only update if the Job success state has not been determined already by other means if( CurrentSuccessState == JobSuccessState.AGENT_JOB_INCOMPLETE ) { // If this is an agent managed, Task-based process if( ( Specification != null ) && ( ( Specification.JobFlags & EJobTaskFlags.FLAG_MANUAL_START ) == 0 ) && ( TaskCount > 0 ) ) { // Check for a set of known failure cases bool IsAStandardFailureCase = false; string NewMessageText = "No message provided!"; // If there are any Tasks still pending if( PendingTasks.Count != 0 ) { // Log and send an INFO message describing the failure NewMessageText = "Job has failed! Job is closed while Tasks are still PENDING"; IsAStandardFailureCase = true; } // If there are any Tasks still running else if( RunningTasks.Count != 0 ) { // Log and send an INFO message describing the failure NewMessageText = "Job has failed! Job is closed while Tasks are still RUNNING"; IsAStandardFailureCase = true; } // If any Task was reported a failure else if( TaskFailureCount != 0 ) { // Log and send an INFO message describing the failure NewMessageText = "Job has failed! The task failure count is non-zero"; IsAStandardFailureCase = true; } if( IsAStandardFailureCase ) { CurrentSuccessState = JobSuccessState.AGENT_JOB_FAILURE; if( OwnerIsInstigator ) { SendJobCompletedMessage( new AgentInfoMessage( NewMessageText ) ); } } else { // This is the only way to mark an agent managed, Task-based Job a success CurrentSuccessState = JobSuccessState.AGENT_JOB_SUCCESS; if( OwnerIsInstigator ) { // Log and send an INFO message describing the failure NewMessageText = "Job is a success!"; SendJobCompletedMessage( new AgentInfoMessage( NewMessageText ) ); } } } else { // Otherwise, the process should shut itself down now that the Job // has been ended and any reservations have been sent out. If it // fails to quit itself, it will be killed and we'll still get // the exited process callback } } } // For each remote connection we have for this Job, end the Job. // Ending the Job will eventually cause remote Job executables to // be notified that the Job is closed. foreach( RemoteConnection RemoteChild in Owner.RemoteChildren.Values ) { Hashtable CloseJobInParameters = null; Hashtable CloseJobOutParameters = null; RemoteChild.Interface.CloseJob( RemoteChild.Handle, CloseJobInParameters, ref CloseJobOutParameters ); } // If this is the Instigator, perform additional post-job work if( OwnerIsInstigator ) { // Inform the visualizer that we've disconnected AgentApplication.UpdateMachineState( Environment.MachineName, -1, EProgressionState.InstigatorDisconnected ); // If the job was a success, record the state for determinisitc replay if( CurrentSuccessState == JobSuccessState.AGENT_JOB_SUCCESS ) { // We're done with the last run record now Manager.LastSuccessfulJobRecord = null; bool DeterministicModeAllowed = true; AgentJobRecord NewJobRecord = new AgentJobRecord(); NewJobRecord.Specification = Specification; // Sort all retired tasks by assign time to make sure the order is correct List<AgentTask> ListOfRetiredTasks = new List<AgentTask>( RetiredTasks.ToArray() ); ListOfRetiredTasks.Sort( AgentTask.CompareTasksByAssignTime ); // Assign out the tasks based on where it was assigned and when foreach( AgentTask NextTask in ListOfRetiredTasks ) { // Add the task to the set of all tasks NewJobRecord.AllTasks.Add( NextTask.Specification.TaskGuid, NextTask ); // Add the task to the agent-specific queue, creating an entry // for the agent if we haven't seen it yet Queue<AgentTask> TaskQueue = null; string NameOfWorker = Manager.MachineNameFromConnection( NextTask.CurrentOwner ); string IPAddressOfWorker = Manager.MachineIPAddressFromConnection( NextTask.CurrentOwner ); if( !NewJobRecord.WorkerAgentNames.Contains( NameOfWorker ) ) { // Create a new task queue for the newly discovered agent TaskQueue = new Queue<AgentTask>(); TaskQueue.Enqueue( NextTask ); // Add this new agent to both the set of names and the task mapping sets NewJobRecord.WorkerAgentNames.Add( NameOfWorker ); NewJobRecord.WorkerAgentIPAddresses.Add( IPAddressOfWorker ); NewJobRecord.AgentToGoldenTaskQueueMapping.Add( NameOfWorker, TaskQueue ); } else if( NewJobRecord.AgentToGoldenTaskQueueMapping.TryGetValue( NameOfWorker, out TaskQueue ) ) { // Queue up the next task TaskQueue.Enqueue( NextTask ); } else { // Error, we should fail the entire thing DeterministicModeAllowed = false; break; } } // If allowed, assign it for the next run if( DeterministicModeAllowed ) { Manager.LastSuccessfulJobRecord = NewJobRecord; } } // Report some of the stats for the Job // For each successfully completed task, log the time it took and the time/cost foreach( AgentTask NextTask in RetiredTasks.ToArray() ) { if( NextTask.CurrentState.TaskState == EJobTaskState.TASK_STATE_COMPLETE_SUCCESS ) { TimeSpan ScheduledTime = NextTask.StartTime - NextTask.AssignTime; TimeSpan RunningTime = NextTask.StopTime - NextTask.StartTime; string LogMessage = String.Format( "[CloseJob] Task {0} {1} - Scheduled(ms): {2}, Running(ms): {3}, Cost: {4}, Running(ms)/Cost: {5}", NextTask.Specification.TaskGuid, NextTask.Specification.Parameters, ScheduledTime.TotalMilliseconds, RunningTime.TotalMilliseconds, NextTask.Specification.Cost, ( double )RunningTime.TotalMilliseconds / ( double )NextTask.Specification.Cost ); Manager.Log( EVerbosityLevel.Verbose, ELogColour.Green, LogMessage ); } } } } } // Attempt to report the final stats to the DB PostJobStatsToDB(); return ( Constants.SUCCESS ); }
public void ExitedProcessEventHandler( Object Sender, EventArgs Args ) { // Verify that the process is the one we think it is and update the Job state if( ProcessObject == ( Sender as Process ) ) { Debug.Assert( ProcessObject.HasExited ); // Grab any additional data from the ProcessObject before we let it go ProcessObjectExitCode = ProcessObject.ExitCode; lock( CurrentSuccessStateLock ) { // Only update if the job state has not been determined by other means if( CurrentSuccessState == JobSuccessState.AGENT_JOB_INCOMPLETE ) { // Determine if this is an agent managed process bool IsAnAgentManagedProcess = ( Specification.JobFlags & EJobTaskFlags.FLAG_MANUAL_START ) == 0; // If this is an agent managed, Task-based process if( ( IsAnAgentManagedProcess ) && ( TaskCount > 0 ) ) { // If the Job executable didn't close cleanly it's marked a failure, // otherwise, we'll wait until CloseJob is called to determine if // it's a success if( ProcessObject.ExitCode != 0 ) { CurrentSuccessState = JobSuccessState.AGENT_JOB_FAILURE; if( OwnerIsInstigator ) { // Log and send an INFO message describing the failure string NewMessageText = "Job has failed! Job executable didn't exit cleanly. Exit code: " + ProcessObject.ExitCode.ToString(); SendJobCompletedMessage( new AgentInfoMessage( NewMessageText ) ); } } } else { // Otherwise, this is an Agent managed, non-Task-based Job, or this Job // is manually managed outside of the Agent. In either case, the exit // code will determine success. if( ProcessObject.ExitCode == 0 ) { CurrentSuccessState = JobSuccessState.AGENT_JOB_SUCCESS; if( OwnerIsInstigator ) { // Log and send an INFO message describing the success string NewMessageText = "Job is a success!"; SendJobCompletedMessage( new AgentInfoMessage( NewMessageText ) ); } } else { CurrentSuccessState = JobSuccessState.AGENT_JOB_FAILURE; if( OwnerIsInstigator ) { // Log and send an INFO message describing the failure string NewMessageText = "Job has failed! Job executable has exited with a non-zero exit code"; SendJobCompletedMessage( new AgentInfoMessage( NewMessageText ) ); } } } } } // Attempt to report the final stats to the DB PostJobStatsToDB(); } }