예제 #1
0
        public void UpdateTaskState( AgentTaskState UpdatedTaskState )
        {
            // Sanity checks
            Debug.Assert( CurrentState != JobState.AGENT_JOB_UNSPECIFIED );
            Debug.Assert( CurrentState != JobState.AGENT_JOB_PENDING );

            AgentTask RunningTask;
            if( RunningTasks.TryGetValue( UpdatedTaskState.TaskGuid, out RunningTask ) )
            {
                // Update the individual Task state
                RunningTask.CurrentState = UpdatedTaskState;
                switch( UpdatedTaskState.TaskState )
                {
                    case EJobTaskState.TASK_STATE_ACCEPTED:
                        // Nothing to do right now, but we'll need to track start times, etc. later
                        break;

                    case EJobTaskState.TASK_STATE_RUNNING:
                        // Mark the real start time of this task (also set when we give the task out)
                        RunningTask.StartTime = DateTime.UtcNow;
                        break;

                    case EJobTaskState.TASK_STATE_COMPLETE_SUCCESS:
                        UpdateTaskStateAsSuccess( RunningTask );
                        break;

                    case EJobTaskState.TASK_STATE_REJECTED:
                        if( RunningTask.CurrentOwner is RemoteConnection )
                        {
                            Manager.Log( EVerbosityLevel.Informative, ELogColour.Orange, "[UpdateTaskState]: Task Rejected remotely by " + ( RunningTask.CurrentOwner as RemoteConnection ).Info.Name );
                            Manager.Log( EVerbosityLevel.Informative, ELogColour.Orange, "[UpdateTaskState]: Requeueing: " + RunningTask.Specification.Parameters );
                            UpdateTaskStateAsRequeued( RunningTask );
                        }
                        else
                        {
                            Manager.Log( EVerbosityLevel.Informative, ELogColour.Red, "[UpdateTaskState]: Task Rejected locally by " + Environment.MachineName + ", counted as failure");
                            UpdateTaskStateAsFailure( RunningTask );
                        }
                        break;

                    case EJobTaskState.TASK_STATE_KILLED:
                        if( RunningTask.CurrentOwner is RemoteConnection )
                        {
                            Manager.Log( EVerbosityLevel.Informative, ELogColour.Orange, "[UpdateTaskState]: Task Killed remotely by " + ( RunningTask.CurrentOwner as RemoteConnection ).Info.Name );
                            Manager.Log( EVerbosityLevel.Informative, ELogColour.Orange, "[UpdateTaskState]: Requeueing: " + RunningTask.Specification.Parameters );
                            UpdateTaskStateAsRequeued( RunningTask );
                        }
                        else
                        {
                            Manager.Log( EVerbosityLevel.Informative, ELogColour.Red, "[UpdateTaskState]: Task Killed locally by " + Environment.MachineName + ", counted as failure" );
                            UpdateTaskStateAsFailure( RunningTask );
                        }
                        break;

                    case EJobTaskState.TASK_STATE_COMPLETE_FAILURE:
                        if( RunningTask.CurrentOwner is RemoteConnection )
                        {
                            Manager.Log( EVerbosityLevel.Informative, ELogColour.Red, "[UpdateTaskState]: Task Failed on " + ( RunningTask.CurrentOwner as RemoteConnection ).Info.Name );
                        }
                        else
                        {
                            Manager.Log( EVerbosityLevel.Informative, ELogColour.Red, "[UpdateTaskState]: Task Failed on " + Environment.MachineName );
                        }
                        Manager.Log( EVerbosityLevel.Informative, ELogColour.Red, "[UpdateTaskState]: Task Failed: " + RunningTask.Specification.Parameters );
                        UpdateTaskStateAsFailure( RunningTask );
                        break;
                }

                // Update the owning Job state, by checking for failures. Success is only
                // determined after all Tasks are assured to be done or orphanable. Only
                // do this one time and let the new state, if there is one, be sticky.
                lock( CurrentSuccessStateLock )
                {
                    if( CurrentSuccessState == JobSuccessState.AGENT_JOB_INCOMPLETE )
                    {
                        // Updtae if any task is a failure
                        if( TaskFailureCount > 0 )
                        {
                            // Update the state and send a message indicating the failure
                            CurrentSuccessState = JobSuccessState.AGENT_JOB_FAILURE;
                            if( OwnerIsInstigator )
                            {
                                // Log and send an INFO message describing the failure
                                string NewMessageText = "Job has failed! The task failure count is non-zero";
                                SendJobCompletedMessage( new AgentInfoMessage( NewMessageText ) );
                            }
                        }
                        // Update if all tasks are successful and we're the instigator, since only
                        // the instigator can make this determination properly
                        else if( ( TaskSuccessCount == TaskCount ) &&
                                 ( OwnerIsInstigator ) )
                        {
                            CurrentSuccessState = JobSuccessState.AGENT_JOB_SUCCESS;
                            if( OwnerIsInstigator )
                            {
                                // Log and send an INFO message describing the success
                                string NewMessageText = "Job is a success!";
                                SendJobCompletedMessage( new AgentInfoMessage( NewMessageText ) );
                            }
                        }
                    }
                }

                // Update the visualizer if this agent is the Instigator
                if( OwnerIsInstigator )
                {
                    AgentApplication.UpdateMachineState( Environment.MachineName, RetiredTasks.Count, EProgressionState.TasksCompleted );
                    AgentApplication.UpdateMachineState( Environment.MachineName, RunningTasks.Count, EProgressionState.TasksInProgress );
                }
            }
        }
예제 #2
0
        public Int32 CloseJob()
        {
            // Before we close the job, make sure all messages have been processed to
            // make sure we avoid any race condition between getting updates to tasks
            // or the job and closing the job
            Manager.FlushMessageQueue( Owner, false );

            // Update the state only within a mutex to protect anyone trying to read
            // the state at the same time
            lock( CurrentStateLock )
            {
                // Only do this if this Job hasn't already been closed
                if( CurrentState != JobState.AGENT_JOB_CLOSED )
                {
                    CurrentState = JobState.AGENT_JOB_CLOSED;
                    StopTime = DateTime.UtcNow;

                    // First, resolve any outstanding reservations
                    CheckForReservations();

                    // Determine success state of the Job
                    lock( CurrentSuccessStateLock )
                    {
                        // Only update if the Job success state has not been determined already by other means
                        if( CurrentSuccessState == JobSuccessState.AGENT_JOB_INCOMPLETE )
                        {
                            // If this is an agent managed, Task-based process
                            if( ( Specification != null ) &&
                                ( ( Specification.JobFlags & EJobTaskFlags.FLAG_MANUAL_START ) == 0 ) &&
                                ( TaskCount > 0 ) )
                            {
                                // Check for a set of known failure cases
                                bool IsAStandardFailureCase = false;
                                string NewMessageText = "No message provided!";

                                // If there are any Tasks still pending
                                if( PendingTasks.Count != 0 )
                                {
                                    // Log and send an INFO message describing the failure
                                    NewMessageText = "Job has failed! Job is closed while Tasks are still PENDING";
                                    IsAStandardFailureCase = true;
                                }
                                // If there are any Tasks still running
                                else if( RunningTasks.Count != 0 )
                                {
                                    // Log and send an INFO message describing the failure
                                    NewMessageText = "Job has failed! Job is closed while Tasks are still RUNNING";
                                    IsAStandardFailureCase = true;
                                }
                                // If any Task was reported a failure
                                else if( TaskFailureCount != 0 )
                                {
                                    // Log and send an INFO message describing the failure
                                    NewMessageText = "Job has failed! The task failure count is non-zero";
                                    IsAStandardFailureCase = true;
                                }

                                if( IsAStandardFailureCase )
                                {
                                    CurrentSuccessState = JobSuccessState.AGENT_JOB_FAILURE;
                                    if( OwnerIsInstigator )
                                    {
                                        SendJobCompletedMessage( new AgentInfoMessage( NewMessageText ) );
                                    }
                                }
                                else
                                {
                                    // This is the only way to mark an agent managed, Task-based Job a success
                                    CurrentSuccessState = JobSuccessState.AGENT_JOB_SUCCESS;
                                    if( OwnerIsInstigator )
                                    {
                                        // Log and send an INFO message describing the failure
                                        NewMessageText = "Job is a success!";
                                        SendJobCompletedMessage( new AgentInfoMessage( NewMessageText ) );
                                    }
                                }
                            }
                            else
                            {
                                // Otherwise, the process should shut itself down now that the Job
                                // has been ended and any reservations have been sent out. If it
                                // fails to quit itself, it will be killed and we'll still get
                                // the exited process callback
                            }
                        }
                    }

                    // For each remote connection we have for this Job, end the Job.
                    // Ending the Job will eventually cause remote Job executables to
                    // be notified that the Job is closed.
                    foreach( RemoteConnection RemoteChild in Owner.RemoteChildren.Values )
                    {
                        Hashtable CloseJobInParameters = null;
                        Hashtable CloseJobOutParameters = null;
                        RemoteChild.Interface.CloseJob( RemoteChild.Handle, CloseJobInParameters, ref CloseJobOutParameters );
                    }

                    // If this is the Instigator, perform additional post-job work
                    if( OwnerIsInstigator )
                    {
                        // Inform the visualizer that we've disconnected
                        AgentApplication.UpdateMachineState( Environment.MachineName, -1, EProgressionState.InstigatorDisconnected );

                        // If the job was a success, record the state for determinisitc replay
                        if( CurrentSuccessState == JobSuccessState.AGENT_JOB_SUCCESS )
                        {
                            // We're done with the last run record now
                            Manager.LastSuccessfulJobRecord = null;

                            bool DeterministicModeAllowed = true;
                            AgentJobRecord NewJobRecord = new AgentJobRecord();
                            NewJobRecord.Specification = Specification;

                            // Sort all retired tasks by assign time to make sure the order is correct
                            List<AgentTask> ListOfRetiredTasks = new List<AgentTask>( RetiredTasks.ToArray() );
                            ListOfRetiredTasks.Sort( AgentTask.CompareTasksByAssignTime );

                            // Assign out the tasks based on where it was assigned and when
                            foreach( AgentTask NextTask in ListOfRetiredTasks )
                            {
                                // Add the task to the set of all tasks
                                NewJobRecord.AllTasks.Add( NextTask.Specification.TaskGuid, NextTask );

                                // Add the task to the agent-specific queue, creating an entry
                                // for the agent if we haven't seen it yet
                                Queue<AgentTask> TaskQueue = null;
                                string NameOfWorker = Manager.MachineNameFromConnection( NextTask.CurrentOwner );
                                string IPAddressOfWorker = Manager.MachineIPAddressFromConnection( NextTask.CurrentOwner );
                                if( !NewJobRecord.WorkerAgentNames.Contains( NameOfWorker ) )
                                {
                                    // Create a new task queue for the newly discovered agent
                                    TaskQueue = new Queue<AgentTask>();
                                    TaskQueue.Enqueue( NextTask );

                                    // Add this new agent to both the set of names and the task mapping sets
                                    NewJobRecord.WorkerAgentNames.Add( NameOfWorker );
                                    NewJobRecord.WorkerAgentIPAddresses.Add( IPAddressOfWorker );
                                    NewJobRecord.AgentToGoldenTaskQueueMapping.Add( NameOfWorker, TaskQueue );
                                }
                                else if( NewJobRecord.AgentToGoldenTaskQueueMapping.TryGetValue( NameOfWorker, out TaskQueue ) )
                                {
                                    // Queue up the next task
                                    TaskQueue.Enqueue( NextTask );
                                }
                                else
                                {
                                    // Error, we should fail the entire thing
                                    DeterministicModeAllowed = false;
                                    break;
                                }
                            }

                            // If allowed, assign it for the next run
                            if( DeterministicModeAllowed )
                            {
                                Manager.LastSuccessfulJobRecord = NewJobRecord;
                            }
                        }

                        // Report some of the stats for the Job
                        // For each successfully completed task, log the time it took and the time/cost
                        foreach( AgentTask NextTask in RetiredTasks.ToArray() )
                        {
                            if( NextTask.CurrentState.TaskState == EJobTaskState.TASK_STATE_COMPLETE_SUCCESS )
                            {
                                TimeSpan ScheduledTime = NextTask.StartTime - NextTask.AssignTime;
                                TimeSpan RunningTime = NextTask.StopTime - NextTask.StartTime;
                                string LogMessage = String.Format( "[CloseJob] Task {0} {1} - Scheduled(ms): {2}, Running(ms): {3}, Cost: {4}, Running(ms)/Cost: {5}",
                                    NextTask.Specification.TaskGuid,
                                    NextTask.Specification.Parameters,
                                    ScheduledTime.TotalMilliseconds,
                                    RunningTime.TotalMilliseconds,
                                    NextTask.Specification.Cost,
                                    ( double )RunningTime.TotalMilliseconds / ( double )NextTask.Specification.Cost );
                                Manager.Log( EVerbosityLevel.Verbose, ELogColour.Green, LogMessage );
                            }
                        }
                    }
                }
            }

            // Attempt to report the final stats to the DB
            PostJobStatsToDB();

            return ( Constants.SUCCESS );
        }
예제 #3
0
        public void ExitedProcessEventHandler( Object Sender, EventArgs Args )
        {
            // Verify that the process is the one we think it is and update the Job state
            if( ProcessObject == ( Sender as Process ) )
            {
                Debug.Assert( ProcessObject.HasExited );

                // Grab any additional data from the ProcessObject before we let it go
                ProcessObjectExitCode = ProcessObject.ExitCode;

                lock( CurrentSuccessStateLock )
                {
                    // Only update if the job state has not been determined by other means
                    if( CurrentSuccessState == JobSuccessState.AGENT_JOB_INCOMPLETE )
                    {
                        // Determine if this is an agent managed process
                        bool IsAnAgentManagedProcess =
                            ( Specification.JobFlags & EJobTaskFlags.FLAG_MANUAL_START ) == 0;

                        // If this is an agent managed, Task-based process
                        if( ( IsAnAgentManagedProcess ) &&
                            ( TaskCount > 0 ) )
                        {
                            // If the Job executable didn't close cleanly it's marked a failure,
                            // otherwise, we'll wait until CloseJob is called to determine if
                            // it's a success
                            if( ProcessObject.ExitCode != 0 )
                            {
                                CurrentSuccessState = JobSuccessState.AGENT_JOB_FAILURE;
                                if( OwnerIsInstigator )
                                {
                                    // Log and send an INFO message describing the failure
                                    string NewMessageText = "Job has failed! Job executable didn't exit cleanly. Exit code: " + ProcessObject.ExitCode.ToString();
                                    SendJobCompletedMessage( new AgentInfoMessage( NewMessageText ) );
                                }
                            }
                        }
                        else
                        {
                            // Otherwise, this is an Agent managed, non-Task-based Job, or this Job
                            // is manually managed outside of the Agent. In either case, the exit
                            // code will determine success.
                            if( ProcessObject.ExitCode == 0 )
                            {
                                CurrentSuccessState = JobSuccessState.AGENT_JOB_SUCCESS;
                                if( OwnerIsInstigator )
                                {
                                    // Log and send an INFO message describing the success
                                    string NewMessageText = "Job is a success!";
                                    SendJobCompletedMessage( new AgentInfoMessage( NewMessageText ) );
                                }
                            }
                            else
                            {
                                CurrentSuccessState = JobSuccessState.AGENT_JOB_FAILURE;
                                if( OwnerIsInstigator )
                                {
                                    // Log and send an INFO message describing the failure
                                    string NewMessageText = "Job has failed! Job executable has exited with a non-zero exit code";
                                    SendJobCompletedMessage( new AgentInfoMessage( NewMessageText ) );
                                }
                            }
                        }
                    }
                }

                // Attempt to report the final stats to the DB
                PostJobStatsToDB();
            }
        }