示例#1
0
        public void TransitionToRunning(object state)
        {
            DryadLogger.LogDebug("Change State", "Transitioning to Running with current state {0} for process {1}", this.m_currentState.ToString(), this.m_id);

            try
            {
                // In rare cases (such as a cancelled duplicate), the GM may close the handle to the process while it is transitioning to running.
                // This results in Dispose being called on this process, which closes the m_assignedToNode handle.
                // In this case, we want to catch the exception and log it, but do nothing else, since the GM is done with this process.
                if (m_assignedToNodeEvent.WaitOne(new TimeSpan(0, 0, 10), false))
                {
                    DryadLogger.LogDebug("Change State", "Successfully waited for transition to {0} for process {1}", this.m_currentState.ToString(), this.m_id);
                }
                else
                {
                    DryadLogger.LogWarning("Change State", "Timed out waiting for transition to AssignedToNode for process {0}", this.m_id);
                    // We want to fire the state change anyway or else we'll get a zombie process.
                    // The GM will handle the transition, it just may cause a delay.
                }
                ChangeState(ProcessState.Running);
            }
            catch (ObjectDisposedException ex)
            {
                DryadLogger.LogError(0, ex, "Process handle was closed while waiting for transition to assigned to node");
            }
        }
示例#2
0
        public VertexStatus CheckStatus()
        {
            for (int index = 0; index < MaxRetries; index++)
            {
                try
                {
                    if (!Faulted)
                    {
                        return(this.m_client.CheckStatus());
                    }
                    break;
                }
                catch (Exception e)
                {
                    DryadLogger.LogError(0, e, "node '{0}'", m_nodeName);
                    if (!SafeOpenConnection())
                    {
                        break;
                    }
                }
            }

            RaiseFaultedEvent();

            VertexStatus s = new VertexStatus();

            s.serviceIsAlive = false;
            return(s);
        }
示例#3
0
        void ISchedulerHelper.StopTaskMonitorThread()
        {
            DryadLogger.LogMethodEntry();
            bool wait = false;

            if (m_taskMonitorThreadRunning)
            {
                lock (m_lock)
                {
                    if (m_taskMonitorThreadRunning)
                    {
                        m_threadStopEvt.Set();
                        wait = true;
                    }
                }
            }

            m_appMaster.Finish(true);

            if (wait)
            {
                try
                {
                    m_taskMonitorThread.Join();
                }
                catch (Exception e)
                {
                    DryadLogger.LogError(0, e, "Failed to wait for task monitor thread to stop.");
                }
            }
            DryadLogger.LogMethodExit();
        }
示例#4
0
        private void Dispose(bool disposing)
        {
            if (!m_disposed)
            {
                if (disposing)
                {
                    DryadLogger.LogInformation("Dispose Process", "Releasing resources for process id {0}", this.m_id);

                    this.m_assignedToNodeEvent.Close();

                    foreach (KeyValuePair <ProcessState, List <ManualResetEvent> > kvp in m_stateChangeWaiters)
                    {
                        foreach (ManualResetEvent e in kvp.Value)
                        {
                            try
                            {
                                e.Close();
                            }
                            catch (Exception ex)
                            {
                                DryadLogger.LogError(0, ex);
                            }
                        }
                    }
                }
                m_disposed = true;
            }
        }
示例#5
0
        /// <summary>
        /// Update properties
        /// </summary>
        /// <param name="replyEpr">callback URI</param>
        /// <param name="processId">vertex process id</param>
        /// <param name="infos">property information</param>
        /// <param name="blockOnLabel">property update label</param>
        /// <param name="blockOnVersion">property update version</param>
        /// <param name="maxBlockTime">maximum time to wait for update</param>
        /// <param name="getPropLabel">property to get</param>
        /// <param name="ProcessStatistics">vertex host process statistics</param>
        /// <returns>success/failure of property update</returns>
        bool IDryadVertexService.SetGetProps(string replyEpr, int processId, ProcessPropertyInfo[] infos, string blockOnLabel, ulong blockOnVersion, long maxBlockTime, string getPropLabel, bool ProcessStatistics)
        {
            DryadLogger.LogMethodEntry(replyEpr, processId);
            bool success = false;

            try
            {
                // Get the vertex process ID
                VertexProcess vp = FindByDryadId(processId);
                if (vp != null)
                {
                    success = vp.SetGetProps(replyEpr, infos, blockOnLabel, blockOnVersion, maxBlockTime, getPropLabel, ProcessStatistics);
                }
                else
                {
                    DryadLogger.LogError(0, null, "Failed to set / get process properties: Unknown process id {0}", processId);
                }
            }
            catch (Exception e)
            {
                DryadLogger.LogWarning("Set Or Get Process Properties", "Operation threw exception: {0}", e.ToString());
                throw new FaultException <VertexServiceError>(new VertexServiceError("SetGetProps", e.ToString()));
            }

            DryadLogger.LogMethodExit(success);
            return(success);
        }
示例#6
0
        public bool SetGetProps(string replyUri, int processId, ProcessPropertyInfo[] infos, string blockOnLabel, ulong blockOnVersion, long maxBlockTime, string getPropLabel, bool ProcessStatistics)
        {
            bool faultDispatcher = true;

            for (int numRetries = 0; numRetries < MaxRetries; numRetries++)
            {
                try
                {
                    if (!Faulted)
                    {
                        return(this.m_client.SetGetProps(replyUri, processId, infos, blockOnLabel, blockOnVersion, maxBlockTime, getPropLabel, ProcessStatistics));
                    }
                    return(false);
                }
                catch (FaultException <UnknownProcessError> )
                {
                    DryadLogger.LogWarning("Set Get Process Properties", "Attempt to get or set properties for unknown process {0} on node {1}", processId, this.m_nodeName);
                    faultDispatcher = false;
                    break;
                }
                catch (FaultException <VertexServiceError> vse)
                {
                    DryadLogger.LogWarning("Set Get Process Properties", "Error setting or getting properties for process {0} on node {1}: {2}", processId, this.m_nodeName, vse.Reason);
                    faultDispatcher = false;
                    break;
                }
                catch (TimeoutException te)
                {
                    DryadLogger.LogWarning("Set Get Process Properties", "Timeout communicating with vertex service for process {0} on node {1}: {2}", processId, this.m_nodeName, te.ToString());
                    if (!SafeOpenConnection())
                    {
                        faultDispatcher = true;
                        break;
                    }
                }
                catch (CommunicationException ce)
                {
                    DryadLogger.LogWarning("Set Get Process Properties", "Error communicating with vertex service for process {0} on node {1}: {2}", processId, this.m_nodeName, ce.ToString());
                    if (!SafeOpenConnection())
                    {
                        faultDispatcher = true;
                        break;
                    }
                }
                catch (Exception e)
                {
                    DryadLogger.LogError(0, e, "Error calling SetGetProps for process {0} on node {1}", processId, m_nodeName);
                    faultDispatcher = false;
                    break;
                }
            }

            if (faultDispatcher)
            {
                RaiseFaultedEvent();
            }
            return(false);
        }
示例#7
0
        public SchedulingResult EndScheduleProcess(IAsyncResult asyncResult)
        {
            // We don't want to retry the async end operation - if it fails retry
            // the whole scheduling operation

            try
            {
                if (!Faulted)
                {
                    if (this.m_client.EndScheduleProcess(asyncResult))
                    {
                        return(SchedulingResult.Success);
                    }
                    else
                    {
                        return(SchedulingResult.Failure);
                    }
                }
                else
                {
                    return(SchedulingResult.Failure);
                }
            }
            catch (FaultException <VertexServiceError> vse)
            {
                DryadLogger.LogWarning("Schedule Process", "Error completing schedule process {0} on node {1}: {2}", this.m_currentProcess.Id, this.m_nodeName, vse.Reason);
                return(SchedulingResult.Failure);
            }
            catch (TimeoutException te)
            {
                DryadLogger.LogWarning("Schedule Process", "Timeout communicating with vertex service for process {0} on node {1}: {2}", this.m_currentProcess.Id, this.m_nodeName, te.ToString());
            }
            catch (CommunicationException ce)
            {
                DryadLogger.LogWarning("Schedule Process", "Error communicating with vertex service for process {0} on node {1}: {2}", this.m_currentProcess.Id, this.m_nodeName, ce.ToString());
            }
            catch (Exception e)
            {
                DryadLogger.LogError(0, e, "Error calling EndScheduleProcess for process {0} on node {0}", this.m_currentProcess.Id, m_nodeName);
                return(SchedulingResult.Failure);
            }

            // If we make it here, then we need to retry the scheduling operation
            if (SafeOpenConnection())
            {
                // ScheduleProcess manages the retry count and returns false if it is exceeded
                DryadLogger.LogDebug("Schedule Process", "Communication error: retrying process {0} on node {1}", this.m_currentProcess.Id, this.m_nodeName);
                if (ScheduleProcess(m_currentReplyUri, m_currentProcess, m_currentAsyncCallback))
                {
                    return(SchedulingResult.Pending);
                }
            }

            // SafeOpenConnection failed or retry count exceeded - fault the dispatcher.
            DryadLogger.LogWarning("Schedule Process", "Connection failed to node {0}", this.m_nodeName);
            return(SchedulingResult.CommunicationError);
        }
        /// <summary>
        /// Check whether current operation context should be allowed access
        /// </summary>
        /// <param name="operationContext">Current operation context</param>
        /// <returns>true = allowed</returns>
        protected override bool CheckAccessCore(OperationContext operationContext)
        {
            //TODO: Put logging information to appropriate channels when available.

            //
            // Fail if context is annonymous
            //
            if (operationContext.ServiceSecurityContext.IsAnonymous)
            {
                DryadLogger.LogError(0, null, "Vertex authentication failed : Service security context is anonymous.");
                return(false);
            }

            //
            // Get identity used in current context
            //
            WindowsIdentity callerIdentity = operationContext.ServiceSecurityContext.WindowsIdentity;

            if (callerIdentity == null)
            {
                //
                // Fail if identity is not set
                //
                DryadLogger.LogError(0, null, "Vertex authentication failed : Caller identity is null.");
                return(false);
            }
            else if (callerIdentity.IsAnonymous)
            {
                //
                // Fail if identity is anonymous
                //
                DryadLogger.LogError(0, null, "Vertex authentication failed : Caller identity is anonymous.");
                return(false);
            }
            else if (!callerIdentity.IsAuthenticated)
            {
                //
                // Fail if identity is not authenticated
                //
                DryadLogger.LogError(0, null, "Vertex authentication failed : Caller identity is not authenticated.");
                return(false);
            }

            //
            // If operation context has same user as vertex service, then allow, otherwise fail.
            //
            if (this.currentIdentity.User == callerIdentity.User)
            {
                return(true);
            }
            else
            {
                DryadLogger.LogError(0, null, "Vertex authentication failed : Current identity is {0}, caller identity is {1}", this.currentIdentity.Name, callerIdentity.Name);
            }

            return(false);
        }
示例#9
0
        /// <summary>
        /// Set process state to cancelled and stop the vertex host process if possible
        /// </summary>
        public void Cancel(bool suppressNotifications)
        {
            DryadLogger.LogMethodEntry(this.DryadId);

            lock (syncRoot)
            {
                if (state == ProcessState.Completed)
                {
                    // Process has already completed before cancelation made it here, do nothing
                    DryadLogger.LogInformation("Cancel process", "Process {0} has already exited", DryadId);
                    DryadLogger.LogMethodExit();
                    return;
                }
                DryadLogger.LogInformation("Cancel process", "Process {0} has not already exited", DryadId);
                state          = ProcessState.Completed;
                this.cancelled = true;
            }

            // If the process started, kill it
            if (systemProcess != null)
            {
                try
                {
                    // Killing the process will trigger Process_Exited
                    DryadLogger.LogInformation("Cancel process", "Killing system process for process id {0}", DryadId);

                    if (suppressNotifications)
                    {
                        // Remove the Exited event handler
                        systemProcess.Exited -= this.Process_Exited;
                    }
                    systemProcess.Kill();
                    DryadLogger.LogMethodExit();
                    return;
                }
                catch (Exception e)
                {
                    //
                    // Failed to kill process - log exception
                    //
                    DryadLogger.LogError(0, e, "Failed to kill system process for process id {0}", DryadId);
                }
            }
            else
            {
                DryadLogger.LogInformation("Cancel process", "Process {0} has not started yet", DryadId);
            }

            // Process was either not running or failed to die, trigger Process_Exited ourself
            if (!suppressNotifications)
            {
                Process_Exited(this, null);
            }
            DryadLogger.LogMethodExit();
        }
示例#10
0
 public void SetGetPropsComplete(int processId, ProcessInfo info, string[] propertyLabels, ulong[] propertyVersions)
 {
     try
     {
         vertexScheduler.SetGetPropsComplete(processId, info, propertyLabels, propertyVersions);
     }
     catch (Exception e)
     {
         DryadLogger.LogError(0, e, "Failed to complete set / get properties for process {0}", processId);
     }
 }
示例#11
0
 public void ProcessExited(int processId, int exitCode)
 {
     try
     {
         vertexScheduler.ProcessExit(processId, exitCode);
     }
     catch (Exception e)
     {
         DryadLogger.LogError(0, e, "Failed to execute process exit for process {0}", processId);
     }
 }
示例#12
0
 public void FireStateChange(int processId, ProcessState newState)
 {
     try
     {
         vertexScheduler.ProcessChangeState(processId, newState);
     }
     catch (Exception e)
     {
         DryadLogger.LogError(0, e, "Failed to change state to {0} for process {1}", newState.ToString(), processId);
     }
 }
示例#13
0
        /// <summary>
        /// Notify vertex service that the Graph Manager is done
        /// with vertex process processId
        /// </summary>
        /// <param name="processId">Process Id of the process to release</param>
        public void ReleaseProcess(int processId)
        {
            bool faultDispatcher = true;

            for (int numRetries = 0; numRetries < MaxRetries; numRetries++)
            {
                try
                {
                    if (CurrentProcess == processId)
                    {
                        m_currentProcess = null;
                    }

                    if (!Faulted)
                    {
                        this.m_client.ReleaseProcess(processId);
                    }
                    return;
                }
                // ReleaseProcess is one-way
                catch (TimeoutException te)
                {
                    DryadLogger.LogWarning("Release Process", "Timeout communicating with vertex service on node {0}: {1}", this.m_nodeName, te.ToString());
                    if (!SafeOpenConnection())
                    {
                        faultDispatcher = true;
                        break;
                    }
                }
                catch (CommunicationException ce)
                {
                    DryadLogger.LogWarning("Release Process", "Error communicating with vertex service on node {0}: {1}", this.m_nodeName, ce.ToString());
                    if (!SafeOpenConnection())
                    {
                        faultDispatcher = true;
                        break;
                    }
                }
                catch (Exception e)
                {
                    DryadLogger.LogError(0, e, "Error calling ReleaseProcess for node {0}", m_nodeName);
                    faultDispatcher = false;
                    break;
                }
            }

            if (faultDispatcher)
            {
                RaiseFaultedEvent();
            }
        }
示例#14
0
        public void Initialize(StringDictionary vertexEndpointAddresses)
        {
            bool faultDispatcher = true;

            for (int numRetries = 0; numRetries < MaxRetries; numRetries++)
            {
                try
                {
                    if (!Faulted)
                    {
                        this.m_client.Initialize(vertexEndpointAddresses);
                    }
                    return;
                }
                // Initialize is one-way
                catch (TimeoutException te)
                {
                    DryadLogger.LogWarning("Initialize", "Timeout communicating with vertex service on node {0}: {1}", this.m_nodeName, te.ToString());
                    if (!SafeOpenConnection())
                    {
                        faultDispatcher = true;
                        break;
                    }
                }
                catch (CommunicationException ce)
                {
                    DryadLogger.LogWarning("Initialize", "Error communicating with vertex service on node {0}: {1}", this.m_nodeName, ce.ToString());
                    if (!SafeOpenConnection())
                    {
                        faultDispatcher = true;
                        break;
                    }
                }
                catch (Exception e)
                {
                    DryadLogger.LogError(0, e, "Error calling Initialize for node {0}", m_nodeName);
                    faultDispatcher = false;
                    break;
                }
            }

            if (faultDispatcher)
            {
                RaiseFaultedEvent();
            }
        }
示例#15
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="disposing"></param>
        private void Dispose(bool disposing)
        {
            DryadLogger.LogMethodEntry(disposing);
            if (!this.m_disposed)
            {
                lock (syncRoot)
                {
                    if (!this.m_disposed)
                    {
                        if (disposing)
                        {
                            // Close start event handle
                            try
                            {
                                processStartEvent.Close();
                            }
                            catch (Exception ex)
                            {
                                DryadLogger.LogError(0, ex);
                            }

                            // Close any get/set property wait handles
                            foreach (KeyValuePair <string, Dictionary <ulong, ManualResetEvent> > label in propertyWaitEvents)
                            {
                                foreach (KeyValuePair <ulong, ManualResetEvent> version in label.Value)
                                {
                                    try
                                    {
                                        version.Value.Close();
                                    }
                                    catch (Exception ex)
                                    {
                                        DryadLogger.LogError(0, ex);
                                    }
                                }
                            }
                            propertyWaitEvents.Clear();
                        }

                        m_disposed = true;
                    }
                }
            }
            DryadLogger.LogMethodExit();
        }
示例#16
0
        public void Cancel()
        {
            bool wasRunning = false;

            lock (SyncRoot)
            {
                // If the process has already been assigned to a node, then we will need to cancel it at the node
                if (this.CurrentState < ProcessState.AssignedToNode)
                {
                    this.m_cancelled = true;
                    this.ExitCode    = 0x830A0003; // DrError_VertexReceivedTermination
                    DryadLogger.LogInformation("Cancel process", "Cancelation received for vertex {0}.{1} before it was assigned to a node", m_graphManagerId, m_graphManagerVersion);
                    wasRunning = false;
                }
                else if (this.CurrentState == ProcessState.Completed)
                {
                    // nothing to do for this case, process already completed
                    DryadLogger.LogInformation("Cancel process", "Cancellation received for vertex {0}.{1} after it completed", m_graphManagerId, m_graphManagerVersion);
                    return;
                }
                else if (Dispatcher != null)
                {
                    DryadLogger.LogInformation("Cancel process", "Cancellation received for vertex {0}.{1} after it was assigned to node {2}", m_graphManagerId, m_graphManagerVersion, Dispatcher.NodeName);
                    wasRunning = true;
                }
                else
                {
                    // This is an unexpected condition
                    DryadLogger.LogError(0, null, "Cancellation received for vertex {0}.{1} in state {2} with no dispatcher", m_graphManagerId, m_graphManagerVersion, CurrentState.ToString());
                    return;
                }

                if (wasRunning)
                {
                    if (Dispatcher != null)
                    {
                        Dispatcher.CancelScheduleProcess(m_id);
                    }
                }
                else
                {
                    ChangeState(ProcessState.Completed);
                }
            }
        }
示例#17
0
        /// <summary>
        /// Helper method to retry opening the client for use with state changes and property comm
        /// </summary>
        /// <param name="replyUri">URI to respond to</param>
        /// <param name="e">Reason for retry</param>
        /// <returns>new client - may be null on failures</returns>
        private static VertexCallbackServiceClient ReopenClientForRetry(string replyUri, Exception e)
        {
            VertexCallbackServiceClient client = null;

            DryadLogger.LogError(0, e);
            try
            {
                client = ReopenClient(replyUri);
            }
            catch (Exception reopenEx)
            {
                DryadLogger.LogError(0, reopenEx, "Unable to reopen client connection");
            }

            //
            // If retrying, sleep briefly
            //
            System.Threading.Thread.Sleep(retrySleepTime);

            return(client);
        }
示例#18
0
        public void RaiseFaultedEvent(bool taskFailed)
        {
            bool raiseEvent = false;

            // For SP3, we need to crash if this happens in the vertex host
            if (String.Compare(Process.GetCurrentProcess().ProcessName, "HpcQueryVertexHost", StringComparison.OrdinalIgnoreCase) == 0)
            {
                DryadLogger.LogCritical(0, null, "Vertex Host lost communication with Vertex Service while updating vertex status: Exiting vertex. Graph Manager will rerun a failed vertex up to six times.");
                Environment.Exit(unchecked ((int)Constants.DrError_VertexHostLostCommunication));
            }

            lock (SyncRoot)
            {
                // We always want to raise the faulted event if the
                // task failed, so that the dispatcher is disposed.

                // If the task did not fail, we want to ensure that
                // the event is only raised once for a given fault.
                raiseEvent = taskFailed || (!Faulted);


                // We never want to reset m_taskFailed once it's been set
                // to true, because the task isn't coming back.
                m_taskFailed = m_taskFailed || taskFailed;

                m_faulted = true;
            }

            if (raiseEvent)
            {
                DryadLogger.LogError(0, null, "Dispatcher for task {0} has faulted on node {1}, current process: {2}", m_taskId, m_nodeName, CurrentProcess == InvalidProcessId ? "<none>" : CurrentProcess.ToString());

                // Notice that this will keep any locks that are currently held, so refrain from calling this while enumerating the dispatchers
                FaultedEvent(this, null);
            }
        }
示例#19
0
        /// <summary>
        /// Asynchronously called on start command
        /// </summary>
        /// <param name="obj"></param>
        void StartProcessThreadProc(Object obj)
        {
            ManualResetEvent serviceInitializedEvent = obj as ManualResetEvent;
            bool             started = false;

            try
            {
                //
                // Wait for service initialization
                //
                serviceInitializedEvent.WaitOne();

                if (ExecutionHelper.InitializeForProcessExecution(dryadProcessId, Environment.GetEnvironmentVariable("XC_RESOURCEFILES")))
                {
                    //
                    // Vertex working directory configured successfully, start the vertex host
                    //
                    environment.Add(Constants.vertexSvcLocalAddrEnvVar, localAddress);

                    ProcessStartInfo startInfo = new ProcessStartInfo();
                    startInfo.CreateNoWindow   = true;
                    startInfo.UseShellExecute  = false;
                    startInfo.WorkingDirectory = ProcessPathHelper.ProcessPath(dryadProcessId);

                    //YARN Debugging
                    //var procEnvVarKeys = startInfo.EnvironmentVariables.Keys;
                    //foreach (string key in procEnvVarKeys)
                    //{
                    //    DryadLogger.LogInformation("StartProcess", "key: '{0}' value: '{1}'", key, startInfo.EnvironmentVariables[key]);
                    //}

                    string[] args = commandLine.Split(' ');
                    string   arg  = "";
                    for (int i = 1; i < args.Length; i++)
                    {
                        arg += args[i] + " ";
                    }

                    //
                    // Use either FQ path or path relative to job path
                    //
                    if (Path.IsPathRooted(args[0]))
                    {
                        startInfo.FileName = args[0];
                    }
                    else
                    {
                        startInfo.FileName = Path.Combine(ProcessPathHelper.JobPath, args[0]);
                    }
                    DryadLogger.LogInformation("StartProcess", "FileName: '{0}'", startInfo.FileName);

                    //
                    // Add environment variable to vertex host process
                    //
                    startInfo.Arguments = arg;
                    foreach (DictionaryEntry entry in environment)
                    {
                        string key = entry.Key.ToString();

                        if (key == null || startInfo.EnvironmentVariables.ContainsKey(key))
                        {
                            DryadLogger.LogInformation("StartProcess", "Attempting to add existing key '{0}' with value '{1}'",
                                                       entry.Key, entry.Value);
                        }
                        else
                        {
                            startInfo.EnvironmentVariables.Add(key, entry.Value.ToString());
                        }
                    }

                    lock (syncRoot)
                    {
                        //
                        // After taking lock, start the vertex host process and set up exited event handler
                        //
                        if (cancelled)
                        {
                            // If we've already been canceled, don't start the process
                            DryadLogger.LogInformation("Process start", "Not starting process {0} due to receipt of cancellation", DryadId);
                            return;
                        }
                        else
                        {
                            systemProcess                     = new Process();
                            systemProcess.StartInfo           = startInfo;
                            systemProcess.EnableRaisingEvents = true;
                            systemProcess.Exited             += new EventHandler(Process_Exited);
                            Console.WriteLine("Process start - Vertex host process starting");
                            started = systemProcess.Start();
                            Console.WriteLine("Process start - Vertex host process started");
                            if (started)
                            {
                                DryadLogger.LogInformation("Process start", "Vertex host process started");
                                state = ProcessState.Running;
                            }
                            else
                            {
                                DryadLogger.LogError(0, null, "Vertex host process failed to start");
                            }
                        }
                    }
                }
                else
                {
                    DryadLogger.LogError(0, null, "Initialization failed");
                }
            }
            catch (Exception e)
            {
                DryadLogger.LogError(0, e, "Error starting vertex");
            }

            if (started)
            {
                //
                // Notify Graph Manager that process started if successful
                //
                bool success = ReplyDispatcher.FireStateChange(this.graphManagerReplyUri, this.dryadProcessId, ProcessState.Running);
                if (!success)
                {
                    //
                    // Graph manager doesn't know we started and we have no way to tell it, so it's
                    // best to just fail the vertex service task and let the job manager inform the graph manager
                    //
                    VertexService.Surrender(new Exception("Unable to communicate with graph manager."));
                }
            }
            else
            {
                //
                // Otherwise, notify GM that process has failed
                //
                lock (syncRoot)
                {
                    // If we've already been canceled, we don't need to change state or record the initialization failure
                    if (!cancelled)
                    {
                        state       = ProcessState.Completed;
                        this.failed = true;
                        exitCode    = unchecked ((int)Constants.DrError_VertexInitialization); // DryadError_VertexInitialization
                    }
                }

                if (failed)  // This also means we weren't canceled
                {
                    // Notify the Graph Manager that the process failed to start
                    Process_Exited(this, null);
                }
            }

            //
            // Make sure process start event is set
            //
            processStartEvent.Set();
        }
示例#20
0
        public void ChangeState(ProcessState newState)
        {
            lock (SyncRoot)
            {
                if (newState > m_currentState)
                {
                    DryadLogger.LogDebug("Change State", "Transition process {0} from state {1} to state {2}", m_id, m_currentState, newState);

                    m_currentState = newState;
                    List <ProcessState> listenersToRemove = new List <ProcessState>();
                    List <ProcessState> waitersToRemove   = new List <ProcessState>();

                    // Check for listeners / waiters for earlier states, in case a state is skipped (e.g. process failed to start)
                    foreach (ProcessState s in m_stateChangeListeners.Keys)
                    {
                        if (s <= m_currentState)
                        {
                            // Notify listeners
                            if (m_stateChangeListeners[s] != null)
                            {
                                XComputeProcessStateChangeEventArgs e = new XComputeProcessStateChangeEventArgs(m_id, m_currentState, false);
                                m_stateChangeListeners[s](this, e);
                                if (m_stateChangeTimers.ContainsKey(m_stateChangeListeners[s]))
                                {
                                    m_stateChangeTimers[m_stateChangeListeners[s]].Dispose();
                                    m_stateChangeTimers.Remove(m_stateChangeListeners[s]);
                                }
                            }
                            listenersToRemove.Add(s);
                        }
                    }
                    foreach (ProcessState s in listenersToRemove)
                    {
                        m_stateChangeListeners.Remove(s);
                    }

                    foreach (ProcessState s in m_stateChangeWaiters.Keys)
                    {
                        // Signal waiters
                        if (s <= m_currentState)
                        {
                            foreach (ManualResetEvent w in m_stateChangeWaiters[s])
                            {
                                w.Set();
                            }
                            waitersToRemove.Add(s);
                        }
                    }
                    foreach (ProcessState s in waitersToRemove)
                    {
                        foreach (ManualResetEvent e in m_stateChangeWaiters[s])
                        {
                            try
                            {
                                e.Close();
                            }
                            catch (Exception ex)
                            {
                                DryadLogger.LogError(0, ex);
                            }
                        }
                        m_stateChangeWaiters.Remove(s);
                    }

                    if (m_currentState == ProcessState.AssignedToNode)
                    {
                        m_assignedToNodeEvent.Set();
                    }
                }
                else
                {
                    DryadLogger.LogWarning("Change State", "Unexpected state change attempted for process {0}: from {1} to {2}", this.m_id, this.m_currentState.ToString(), newState.ToString());
                }
            }
        }
示例#21
0
        /// <summary>
        /// Called in new thread in setgetproperty service operation
        /// </summary>
        /// <param name="obj"></param>
        void SetGetPropThreadProc(Object obj)
        {
            DryadLogger.LogMethodEntry(DryadId);
            PropertyRequest r = obj as PropertyRequest;

            ProcessInfo infoLocal = new ProcessInfo();

            ulong[]  propertyVersions = null;
            string[] propertyLabels   = null;

            //
            // Make sure process is started before continuing
            //
            if (this.State < ProcessState.Running)
            {
                try
                {
                    processStartEvent.WaitOne();
                }
                catch (ObjectDisposedException ex)
                {
                    // The process was cancelled and released before it started running, just return
                    if (exited)
                    {
                        DryadLogger.LogInformation("SetGetProp Thread", "Process {0} cancelled or exited before starting.", this.DryadId);
                    }
                    else
                    {
                        DryadLogger.LogError(0, ex);
                    }
                    DryadLogger.LogMethodExit();
                    return;
                }
            }

            //
            // Use status_pending if running, vertex initialization failure if process is failed and process exit code otherwise
            //
            infoLocal.processStatus = 0x103;  // WinNT.h STATUS_PENDING
            infoLocal.processState  = state;
            if (state == ProcessState.Running)
            {
                infoLocal.exitCode = 0x103; // WinNT.h STATUS_PENDING
            }
            else if (failed)
            {
                infoLocal.exitCode = Constants.DrError_VertexError;
            }
            else if (cancelled)
            {
                infoLocal.exitCode = Constants.DrError_VertexReceivedTermination;  // DryadError_VertexReceivedTermination
            }
            else
            {
                infoLocal.exitCode = (uint)systemProcess.ExitCode;
            }

            //
            // Record specified properties and update versions - wakes up anyone waiting for property changes
            //
            SetProperties(r.infos, out propertyLabels, out propertyVersions);

            //
            // Try to get property update
            //
            if (BlockOnProperty(r.blockOnLabel, r.blockOnVersion, r.maxBlockTime))
            {
                //
                // If property update was received, update the received property information
                // If received property marks vertex completed, record that
                //
                if (r.getPropLabel != null && r.getPropLabel.Length > 0)
                {
                    lock (syncRoot)
                    {
                        infoLocal.propertyInfos = new ProcessPropertyInfo[1];

                        int index;
                        if (TryGetProperty(r.getPropLabel, out infoLocal.propertyInfos[0], out index) == false)
                        {
                            DryadLogger.LogError(0, null, "Failed to get property for label {0}", r.getPropLabel);
                        }

                        if (StatusMessageContainsDryadError_VertexCompleted(infoLocal.propertyInfos[0].propertyLabel))
                        {
                            CopyProp(infoLocal.propertyInfos[0], out latestVertexStatusSent);
                        }
                    }
                }

                //
                // If request asks for statistics on vertex process, get them
                //
                if (r.ProcessStatistics)
                {
                    if (GetStatistics(out infoLocal.processStatistics) == false)
                    {
                        DryadLogger.LogError(0, null, "Failed to get vertex statistics");
                    }
                }
            }

            //
            // Try to report property change, if unsuccessful, kill the running vertex host process
            //
            if (!ReplyDispatcher.SetGetPropsComplete(r.replyUri, systemProcess, dryadProcessId, infoLocal, propertyLabels, propertyVersions))
            {
                try
                {
                    systemProcess.Kill();
                }
                catch (InvalidOperationException /* unused ioe */)
                {
                    // The process has already exited
                    // -or-
                    // There is no process associated with this Process object.
                }
                catch (Exception eInner)
                {
                    //
                    // all other exceptions
                    //
                    DryadLogger.LogError(0, eInner, "Exception calling back to '{0}'", r.replyUri);
                }
            }

            //
            // If a property was handled from the graph manager, decrement the waiter count
            //
            if (ReplyDispatcher.IsGraphMrgUri(r.replyUri))
            {
                int n = Interlocked.Decrement(ref propertyWaiters);
                DryadLogger.LogInformation("SetGetProp Thread", "Process {0} propertyWaiters = {1}", DryadId, n);
            }

            lock (syncRoot)
            {
                //
                // If vertex process has exited, and sending vertex completed event, we can stop worrying
                //
                if (!finalStatusMessageSent)
                {
                    if (latestVertexStatusSent != null)
                    {
                        if (!String.IsNullOrEmpty(latestVertexStatusSent.propertyString))
                        {
                            if (latestVertexStatusSent.propertyString.Contains(string.Format(@"(0x{0:x8})", Constants.DrError_VertexCompleted)))
                            {
                                finalStatusMessageSent = true;
                            }
                        }
                    }
                }
            }
            DryadLogger.LogMethodExit();
        }
示例#22
0
        public bool ScheduleProcess(string replyUri, ScheduleProcessRequest req, AsyncCallback cb)
        {
            bool faultDispatcher = true;

            for (int numRetries = 0; numRetries < MaxRetries; numRetries++)
            {
                try
                {
                    // TODO: Why are we taking the lock in this particular case again?
                    lock (SyncRoot)
                    {
                        if (!Faulted && m_schedulingAttempts < MaxRetries)
                        {
                            m_schedulingAttempts++;

                            // Set the current process so that if the dispatcher faults we know
                            // which process to kill
                            m_currentProcess       = req;
                            m_currentReplyUri      = replyUri;
                            m_currentAsyncCallback = cb;

                            this.m_client.BeginScheduleProcess(replyUri, req.Id, req.CommandLine, req.Environment, cb, (object)this);
                            return(true);
                        }
                    }
                    return(false);
                }
                catch (FaultException <VertexServiceError> vse)
                {
                    DryadLogger.LogWarning("Schedule Process", "Error scheduling process {0} on node {1}: {2}", req.Id, this.m_nodeName, vse.Reason);
                    faultDispatcher = false;
                    break;
                }
                catch (TimeoutException te)
                {
                    DryadLogger.LogWarning("Schedule Process", "Timeout communicating with vertex service scheduling process {0} on node {1}: {2}", req.Id, this.m_nodeName, te.ToString());
                    if (!SafeOpenConnection())
                    {
                        faultDispatcher = true;
                        break;
                    }
                }
                catch (CommunicationException ce)
                {
                    DryadLogger.LogWarning("Schedule Process", "Error communicating with vertex service scheduling process {0} on node {1}: {2}", req.Id, this.m_nodeName, ce.ToString());
                    if (!SafeOpenConnection())
                    {
                        faultDispatcher = true;
                        break;
                    }
                }
                catch (Exception e)
                {
                    DryadLogger.LogError(0, e, "Error calling ScheduleProcess for process {0} on node {1}", req.Id, m_nodeName);
                    faultDispatcher = false;
                    break;
                }
            }

            if (faultDispatcher)
            {
                RaiseFaultedEvent();
            }
            return(false);
        }