/// <summary> /// Removes reference to a vertex process /// </summary> /// <param name="processId">process id to forget</param> void IDryadVertexService.ReleaseProcess(int processId) { DryadLogger.LogMethodEntry(processId); VertexProcess vp = null; try { vp = FindByDryadId(processId); if (vp != null) { vertexProcessTable.Remove(vp); vp.Dispose(); } else { DryadLogger.LogWarning("Release Process", "Unknown process id {0}", processId); } } catch (Exception e) { DryadLogger.LogWarning("Release Process", "Operation threw exception: {0}", e.ToString()); } DryadLogger.LogMethodExit(); }
/// <summary> /// Update properties /// </summary> /// <param name="replyEpr">callback URI</param> /// <param name="processId">vertex process id</param> /// <param name="infos">property information</param> /// <param name="blockOnLabel">property update label</param> /// <param name="blockOnVersion">property update version</param> /// <param name="maxBlockTime">maximum time to wait for update</param> /// <param name="getPropLabel">property to get</param> /// <param name="ProcessStatistics">vertex host process statistics</param> /// <returns>success/failure of property update</returns> bool IDryadVertexService.SetGetProps(string replyEpr, int processId, ProcessPropertyInfo[] infos, string blockOnLabel, ulong blockOnVersion, long maxBlockTime, string getPropLabel, bool ProcessStatistics) { DryadLogger.LogMethodEntry(replyEpr, processId); bool success = false; try { // Get the vertex process ID VertexProcess vp = FindByDryadId(processId); if (vp != null) { success = vp.SetGetProps(replyEpr, infos, blockOnLabel, blockOnVersion, maxBlockTime, getPropLabel, ProcessStatistics); } else { DryadLogger.LogError(0, null, "Failed to set / get process properties: Unknown process id {0}", processId); } } catch (Exception e) { DryadLogger.LogWarning("Set Or Get Process Properties", "Operation threw exception: {0}", e.ToString()); throw new FaultException <VertexServiceError>(new VertexServiceError("SetGetProps", e.ToString())); } DryadLogger.LogMethodExit(success); return(success); }
bool ISchedulerHelper.WaitForTasksReady() { // The basic strategy is to wait for the maximum number of vertex tasks which is // practical. Start by waiting for AllocatedNodes.Count. As tasks fail or are cancelled, // decrement the number of tasks to wait for until we drop below Min at which time the // scheduler will end the job. Also, if tasks are rerun, increment the number of tasks to wait for. do { // Event set by the Task Monitor Thread when it finishes processes a batch of changes. m_taskChangeEvt.WaitOne(); // Don't want OnVertexChangeHandler updating these counts while we're checking them lock (this) { DryadLogger.LogInformation("Wait for vertex tasks", "{0} tasks are running, waiting for at least {1} before starting", m_runningTasks, m_startNodes); if (m_runningTasks >= m_startNodes) { // We have enough running tasks to start DryadLogger.LogDebug("Wait for vertex tasks", "Sufficient number of tasks transitioned to running to begin: {0} running tasks", m_runningTasks); return(true); } } } while (true); }
public void TransitionToRunning(object state) { DryadLogger.LogDebug("Change State", "Transitioning to Running with current state {0} for process {1}", this.m_currentState.ToString(), this.m_id); try { // In rare cases (such as a cancelled duplicate), the GM may close the handle to the process while it is transitioning to running. // This results in Dispose being called on this process, which closes the m_assignedToNode handle. // In this case, we want to catch the exception and log it, but do nothing else, since the GM is done with this process. if (m_assignedToNodeEvent.WaitOne(new TimeSpan(0, 0, 10), false)) { DryadLogger.LogDebug("Change State", "Successfully waited for transition to {0} for process {1}", this.m_currentState.ToString(), this.m_id); } else { DryadLogger.LogWarning("Change State", "Timed out waiting for transition to AssignedToNode for process {0}", this.m_id); // We want to fire the state change anyway or else we'll get a zombie process. // The GM will handle the transition, it just may cause a delay. } ChangeState(ProcessState.Running); } catch (ObjectDisposedException ex) { DryadLogger.LogError(0, ex, "Process handle was closed while waiting for transition to assigned to node"); } }
public void SetIdAndVersion(string commandLine) { bool parsed = false; string[] args = commandLine.Split(cmdLineSeparator, StringSplitOptions.RemoveEmptyEntries); if (args != null) { if (args.Length == 6) { lock (SyncRoot) { if (Int32.TryParse(args[4], out m_graphManagerId)) { if (Int32.TryParse(args[5], out m_graphManagerVersion)) { parsed = true; } } } } } if (!parsed) { DryadLogger.LogWarning("Set Vertex Id And Version", "Failed to parse vertex command line: {0}", commandLine); } }
private void Dispose(bool disposing) { if (!m_disposed) { if (disposing) { DryadLogger.LogInformation("Dispose Process", "Releasing resources for process id {0}", this.m_id); this.m_assignedToNodeEvent.Close(); foreach (KeyValuePair <ProcessState, List <ManualResetEvent> > kvp in m_stateChangeWaiters) { foreach (ManualResetEvent e in kvp.Value) { try { e.Close(); } catch (Exception ex) { DryadLogger.LogError(0, ex); } } } } m_disposed = true; } }
public VertexStatus CheckStatus() { for (int index = 0; index < MaxRetries; index++) { try { if (!Faulted) { return(this.m_client.CheckStatus()); } break; } catch (Exception e) { DryadLogger.LogError(0, e, "node '{0}'", m_nodeName); if (!SafeOpenConnection()) { break; } } } RaiseFaultedEvent(); VertexStatus s = new VertexStatus(); s.serviceIsAlive = false; return(s); }
/// <summary> /// Cancels the vertex process with the provided id /// </summary> /// <param name="processId">vertex process id</param> void IDryadVertexService.CancelScheduleProcess(int processId) { VertexProcess vp = null; DryadLogger.LogMethodEntry(processId); try { vp = FindByDryadId(processId); if (vp != null) { vp.Cancel(false); } else { DryadLogger.LogWarning("Cancel Process", "Unknown process id {0}", processId); } } catch (Exception e) { DryadLogger.LogWarning("Cancel Process", "Operation threw exception: {0}", e.ToString()); } DryadLogger.LogMethodExit(); }
/// <summary> /// Constructor - called when service first hosted /// </summary> public VertexService() { DryadLogger.LogMethodEntry(); this.vertexProcessTable = new SynchronizedCollection <VertexProcess>(); System.Threading.ThreadPool.QueueUserWorkItem(new WaitCallback(InitializationThreadProc)); DryadLogger.LogMethodExit(); }
void ISchedulerHelper.StopTaskMonitorThread() { DryadLogger.LogMethodEntry(); bool wait = false; if (m_taskMonitorThreadRunning) { lock (m_lock) { if (m_taskMonitorThreadRunning) { m_threadStopEvt.Set(); wait = true; } } } m_appMaster.Finish(true); if (wait) { try { m_taskMonitorThread.Join(); } catch (Exception e) { DryadLogger.LogError(0, e, "Failed to wait for task monitor thread to stop."); } } DryadLogger.LogMethodExit(); }
public void SetGetPropsComplete(ProcessInfo info, string[] propertyLabels, ulong[] propertyVersions) { lock (SyncRoot) { // For the Set part if (propertyLabels != null && propertyVersions != null) { for (int i = 0; i < propertyLabels.Length; i++) { if (m_propertyListeners.ContainsKey(propertyLabels[i])) { List <ulong> versionsToRemove = new List <ulong>(); foreach (KeyValuePair <ulong, GetSetPropertyEventHandler> entry in m_propertyListeners[propertyLabels[i]]) { if (entry.Key <= propertyVersions[i] || entry.Key == ulong.MaxValue) { DryadLogger.LogDebug("SetGetProsComplete", "Set complete - m_id: {0} state: {1}, label: {2}", m_id, info.processState, propertyLabels[i]); XComputeProcessGetSetPropertyEventArgs e = new XComputeProcessGetSetPropertyEventArgs(m_id, info, propertyVersions); entry.Value(this, e); versionsToRemove.Add(entry.Key); } } foreach (ulong version in versionsToRemove) { m_propertyListeners[propertyLabels[i]].Remove(version); } } } } // For the Get part if (info != null && info.propertyInfos != null) { foreach (ProcessPropertyInfo propInfo in info.propertyInfos) { if (m_propertyListeners.ContainsKey(propInfo.propertyLabel)) { List <ulong> versionsToRemove = new List <ulong>(); foreach (KeyValuePair <ulong, GetSetPropertyEventHandler> entry in m_propertyListeners[propInfo.propertyLabel]) { if (entry.Key <= propInfo.propertyVersion || entry.Key == ulong.MaxValue) { DryadLogger.LogDebug("SetGetProsComplete", "Get complete - m_id: {0} state: {1}, label: {2}", m_id, info.processState, propInfo.propertyLabel); XComputeProcessGetSetPropertyEventArgs e = new XComputeProcessGetSetPropertyEventArgs(m_id, info, propertyVersions); entry.Value(this, e); versionsToRemove.Add(entry.Key); } } foreach (ulong version in versionsToRemove) { m_propertyListeners[propInfo.propertyLabel].Remove(version); } } } } } }
bool ISchedulerHelper.StartTaskMonitorThread() { // We only want to have one of these threads running, in case we get called more than once if (m_taskMonitorThreadRunning == false) { lock (m_lock) { if (m_taskMonitorThreadRunning == false) { ((ISchedulerHelper)this).OnVertexChange += new VertexChangeEventHandler(OnVertexChangeHandler); try { m_taskMonitorThread = new Thread(new ThreadStart(TaskMonitorThread)); m_taskMonitorThread.Start(); m_taskMonitorThreadRunning = true; return(true); } catch (Exception e) { DryadLogger.LogCritical(0, e, "Failed to start task monitoring thread"); return(false); } } } } return(true); }
public YarnSchedulerHelper() { // init the DryadLogger, just to make sure DryadLogger.Start("xcompute.log"); m_taskUpdateQueue = new BlockingCollection <VertexTask>(); // if we are not running in a vertex, then init the GM string jmString = Environment.GetEnvironmentVariable(Constants.jobManager); if (String.IsNullOrEmpty(jmString)) { m_minNodes = int.Parse(Environment.GetEnvironmentVariable("MINIMUM_COMPUTE_NODES")); m_maxNodes = int.Parse(Environment.GetEnvironmentVariable("MAXIMUM_COMPUTE_NODES")); m_startNodes = m_minNodes; m_vertices = new VertexTask[JobMaxNodes + 2]; DryadLogger.LogInformation("YarnSchedulerHelper()", "Initializing JAVA GM"); DryadLogger.LogInformation("YarnSchedulerHelper()", "m_maxNodes: {0}", m_maxNodes); AMInstance.RegisterGMCallback(new UpdateProcessState(QueueYarnUpdate)); ((ISchedulerHelper)this).OnVertexChange += new VertexChangeEventHandler(OnVertexChangeHandler); m_appMaster = new AMInstance(); } else { m_vertices = new VertexTask[JobMaxNodes + 2]; DryadLogger.LogInformation("YarnSchedulerHelper()", "Not initializing JAVA GM"); } }
public void ProcessYarnUpdate(VertexTask v) { DryadLogger.LogInformation("ProcessYarnUpdate", "Task {0} on node {1} is in state {2}", v.Id, v.Node, v.State); VertexChangeEventArgs e = new VertexChangeEventArgs(v.Id); e.NewNode = v.Node; e.NewState = YarnTaskStateToVertexTaskState(v.State); e.NewRequeueCount = v.RequeueCount; if (m_vertices[v.Id] != null) { e.OldNode = m_vertices[v.Id].Node; e.OldState = YarnTaskStateToVertexTaskState(m_vertices[v.Id].State); e.OldRequeueCount = m_vertices[v.Id].RequeueCount; } if (e.NewRequeueCount != e.OldRequeueCount) { DryadLogger.LogInformation("ProcessYarnUpdate", "Task {0} requeue count changed from {1} to {2}", v.Id, e.OldRequeueCount, e.NewRequeueCount); } // Update current vertex state m_vertices[v.Id] = v; m_vertexChangeEvent(this, e); //m_taskChangeEvt.Set(); }
public bool SetGetProps(string replyUri, int processId, ProcessPropertyInfo[] infos, string blockOnLabel, ulong blockOnVersion, long maxBlockTime, string getPropLabel, bool ProcessStatistics) { bool faultDispatcher = true; for (int numRetries = 0; numRetries < MaxRetries; numRetries++) { try { if (!Faulted) { return(this.m_client.SetGetProps(replyUri, processId, infos, blockOnLabel, blockOnVersion, maxBlockTime, getPropLabel, ProcessStatistics)); } return(false); } catch (FaultException <UnknownProcessError> ) { DryadLogger.LogWarning("Set Get Process Properties", "Attempt to get or set properties for unknown process {0} on node {1}", processId, this.m_nodeName); faultDispatcher = false; break; } catch (FaultException <VertexServiceError> vse) { DryadLogger.LogWarning("Set Get Process Properties", "Error setting or getting properties for process {0} on node {1}: {2}", processId, this.m_nodeName, vse.Reason); faultDispatcher = false; break; } catch (TimeoutException te) { DryadLogger.LogWarning("Set Get Process Properties", "Timeout communicating with vertex service for process {0} on node {1}: {2}", processId, this.m_nodeName, te.ToString()); if (!SafeOpenConnection()) { faultDispatcher = true; break; } } catch (CommunicationException ce) { DryadLogger.LogWarning("Set Get Process Properties", "Error communicating with vertex service for process {0} on node {1}: {2}", processId, this.m_nodeName, ce.ToString()); if (!SafeOpenConnection()) { faultDispatcher = true; break; } } catch (Exception e) { DryadLogger.LogError(0, e, "Error calling SetGetProps for process {0} on node {1}", processId, m_nodeName); faultDispatcher = false; break; } } if (faultDispatcher) { RaiseFaultedEvent(); } return(false); }
// // Asynchronously start vertex process // public bool Start(ManualResetEvent serviceInitializedEvent) { DryadLogger.LogMethodEntry(this.DryadId); bool result = ThreadPool.QueueUserWorkItem(new WaitCallback(StartProcessThreadProc), serviceInitializedEvent); DryadLogger.LogMethodExit(result); return(result); }
/// <summary> /// Check whether current operation context should be allowed access /// </summary> /// <param name="operationContext">Current operation context</param> /// <returns>true = allowed</returns> protected override bool CheckAccessCore(OperationContext operationContext) { //TODO: Put logging information to appropriate channels when available. // // Fail if context is annonymous // if (operationContext.ServiceSecurityContext.IsAnonymous) { DryadLogger.LogError(0, null, "Vertex authentication failed : Service security context is anonymous."); return(false); } // // Get identity used in current context // WindowsIdentity callerIdentity = operationContext.ServiceSecurityContext.WindowsIdentity; if (callerIdentity == null) { // // Fail if identity is not set // DryadLogger.LogError(0, null, "Vertex authentication failed : Caller identity is null."); return(false); } else if (callerIdentity.IsAnonymous) { // // Fail if identity is anonymous // DryadLogger.LogError(0, null, "Vertex authentication failed : Caller identity is anonymous."); return(false); } else if (!callerIdentity.IsAuthenticated) { // // Fail if identity is not authenticated // DryadLogger.LogError(0, null, "Vertex authentication failed : Caller identity is not authenticated."); return(false); } // // If operation context has same user as vertex service, then allow, otherwise fail. // if (this.currentIdentity.User == callerIdentity.User) { return(true); } else { DryadLogger.LogError(0, null, "Vertex authentication failed : Current identity is {0}, caller identity is {1}", this.currentIdentity.Name, callerIdentity.Name); } return(false); }
public SchedulingResult EndScheduleProcess(IAsyncResult asyncResult) { // We don't want to retry the async end operation - if it fails retry // the whole scheduling operation try { if (!Faulted) { if (this.m_client.EndScheduleProcess(asyncResult)) { return(SchedulingResult.Success); } else { return(SchedulingResult.Failure); } } else { return(SchedulingResult.Failure); } } catch (FaultException <VertexServiceError> vse) { DryadLogger.LogWarning("Schedule Process", "Error completing schedule process {0} on node {1}: {2}", this.m_currentProcess.Id, this.m_nodeName, vse.Reason); return(SchedulingResult.Failure); } catch (TimeoutException te) { DryadLogger.LogWarning("Schedule Process", "Timeout communicating with vertex service for process {0} on node {1}: {2}", this.m_currentProcess.Id, this.m_nodeName, te.ToString()); } catch (CommunicationException ce) { DryadLogger.LogWarning("Schedule Process", "Error communicating with vertex service for process {0} on node {1}: {2}", this.m_currentProcess.Id, this.m_nodeName, ce.ToString()); } catch (Exception e) { DryadLogger.LogError(0, e, "Error calling EndScheduleProcess for process {0} on node {0}", this.m_currentProcess.Id, m_nodeName); return(SchedulingResult.Failure); } // If we make it here, then we need to retry the scheduling operation if (SafeOpenConnection()) { // ScheduleProcess manages the retry count and returns false if it is exceeded DryadLogger.LogDebug("Schedule Process", "Communication error: retrying process {0} on node {1}", this.m_currentProcess.Id, this.m_nodeName); if (ScheduleProcess(m_currentReplyUri, m_currentProcess, m_currentAsyncCallback)) { return(SchedulingResult.Pending); } } // SafeOpenConnection failed or retry count exceeded - fault the dispatcher. DryadLogger.LogWarning("Schedule Process", "Connection failed to node {0}", this.m_nodeName); return(SchedulingResult.CommunicationError); }
/// <summary> /// Fail the vertex service task /// </summary> internal static void Surrender(Exception ex) { DryadLogger.LogMethodEntry(); ReplyDispatcher.ShuttingDown = true; VertexService.internalShutdown = true; VertexService.ShutdownReason = ex; VertexService.shutdownEvent.Set(); DryadLogger.LogMethodExit(); }
/// <summary> /// /// </summary> public void Dispose() { DryadLogger.LogMethodEntry(this.DryadId); Dispose(true); GC.SuppressFinalize(this); DryadLogger.LogMethodExit(); }
private void TaskMonitorThread() { TimeSpan pollInterval = TimeSpan.FromSeconds(1); TimeSpan maxPollInterval = TimeSpan.FromSeconds(16); // The main loop. Each iteration polls for task changes. while (true) { bool foundUpdate = false; DateTime loopStartTime = DateTime.Now; // // Process change results from blocking queue // do { VertexTask v = null; if (m_taskUpdateQueue.TryTake(out v, pollInterval)) { foundUpdate = true; ProcessYarnUpdate(v); } } while ((DateTime.Now - loopStartTime) < pollInterval); if (foundUpdate) { // Notify WaitForTasksReady once for each polling cycle // so that it gets all the changes in one batch m_taskChangeEvt.Set(); } // Check to see if we've been told to stop. // Timeout after pollInterval. // TODO: For better shutdown perf, we may want to check this at other places // or just kill the thread - but this provides a more graceful exit. if (m_threadStopEvt.WaitOne(pollInterval, true)) { m_taskMonitorThreadRunning = false; DryadLogger.LogInformation("Task Monitoring Thread", "Received shutdown event"); return; } // Double the polling interval each iteration up to maxPollInterval if (pollInterval < maxPollInterval) { double newSeconds = 2 * pollInterval.TotalSeconds; if (newSeconds < maxPollInterval.TotalSeconds) { pollInterval = TimeSpan.FromSeconds(newSeconds); } else { pollInterval = maxPollInterval; } } } }
/// <summary> /// Attempt to call SetGetPropsComplete on specified WCF service. /// </summary> /// <param name="replyUri">Service endpoint</param> /// <param name="systemProcess"></param> /// <param name="processId"></param> /// <param name="info"></param> /// <param name="propertyLabels"></param> /// <param name="propertyVersions"></param> /// <returns></returns> public static bool SetGetPropsComplete(string replyUri, Process systemProcess, int processId, ProcessInfo info, string[] propertyLabels, ulong[] propertyVersions) { DryadLogger.LogMethodEntry(replyUri, processId); bool result = false; VertexCallbackServiceClient client = GetClient(replyUri); // // Try to set/get properties up to numRetries times // for (int index = 0; index < numRetries; index++) { try { // // If client is null, try reopening it // if (client == null) { client = CreateClient(replyUri); } // // Make SetGetPropsComplete WCF call, return success // client.SetGetPropsComplete(processId, info, propertyLabels, propertyVersions); result = true; break; } catch (Exception e) { if ((IsGraphMrgUri(replyUri) == false && systemProcess.HasExited) || shuttingDown) { // // If trying to connect to non-running vertex or job is shutting down, don't retry and report success. // DisposeClient(ref client); return(true); } else { // // If call failed and talking to GM or running vertex process, try reopening WCF client and calling again // client = ReopenClientForRetry(replyUri, e); } } } // // If failed to connect X times, report error // DryadLogger.LogMethodExit(result); return(result); }
/// <summary> /// Set process state to cancelled and stop the vertex host process if possible /// </summary> public void Cancel(bool suppressNotifications) { DryadLogger.LogMethodEntry(this.DryadId); lock (syncRoot) { if (state == ProcessState.Completed) { // Process has already completed before cancelation made it here, do nothing DryadLogger.LogInformation("Cancel process", "Process {0} has already exited", DryadId); DryadLogger.LogMethodExit(); return; } DryadLogger.LogInformation("Cancel process", "Process {0} has not already exited", DryadId); state = ProcessState.Completed; this.cancelled = true; } // If the process started, kill it if (systemProcess != null) { try { // Killing the process will trigger Process_Exited DryadLogger.LogInformation("Cancel process", "Killing system process for process id {0}", DryadId); if (suppressNotifications) { // Remove the Exited event handler systemProcess.Exited -= this.Process_Exited; } systemProcess.Kill(); DryadLogger.LogMethodExit(); return; } catch (Exception e) { // // Failed to kill process - log exception // DryadLogger.LogError(0, e, "Failed to kill system process for process id {0}", DryadId); } } else { DryadLogger.LogInformation("Cancel process", "Process {0} has not started yet", DryadId); } // Process was either not running or failed to die, trigger Process_Exited ourself if (!suppressNotifications) { Process_Exited(this, null); } DryadLogger.LogMethodExit(); }
public void FireStateChange(int processId, ProcessState newState) { try { vertexScheduler.ProcessChangeState(processId, newState); } catch (Exception e) { DryadLogger.LogError(0, e, "Failed to change state to {0} for process {1}", newState.ToString(), processId); } }
public void SetGetPropsComplete(int processId, ProcessInfo info, string[] propertyLabels, ulong[] propertyVersions) { try { vertexScheduler.SetGetPropsComplete(processId, info, propertyLabels, propertyVersions); } catch (Exception e) { DryadLogger.LogError(0, e, "Failed to complete set / get properties for process {0}", processId); } }
/// <summary> /// Schedule a vertex host process using the provided parameters /// </summary> /// <param name="replyUri">callback URI</param> /// <param name="processId">vertex process id</param> /// <param name="commandLine">vertex host command line</param> /// <param name="environment">vertex host environment variables</param> /// <returns>Success/Failure of starting vertex process thread</returns> bool IDryadVertexService.ScheduleProcess(string replyUri, int processId, string commandLine, StringDictionary environment) { DryadLogger.LogMethodEntry(processId, commandLine); bool startSuccess = false; Console.WriteLine("Starting process id {0} with commandLIne: '{1}", processId, commandLine); try { VertexProcess newProcess = null; lock (vertexProcessTable.SyncRoot) { foreach (VertexProcess vp in vertexProcessTable) { if (vp.DryadId == processId) { // This means a previous call to Schedule process partially succeeded: // the call made it to the service but something went wrong with the response // so the GM's xcompute machinery retried the call. We can just return success // for this case rather than tearing down the process and creating a new one. return(true); } if (vp.State <= ProcessState.Running) { // There should be no other processes running. // If there are, it means a previous communication error // cause the GM to give up on this node for a while. // Kill anything that's still hanging around. vp.Cancel(true); } } newProcess = new VertexProcess( replyUri, processId, commandLine, environment, OperationContext.Current.Channel.LocalAddress.Uri.ToString() ); this.vertexProcessTable.Add(newProcess); } startSuccess = newProcess.Start(initializedEvent); } catch (Exception e) { DryadLogger.LogWarning("Schedule Process", "Operation threw exception: {0}", e.ToString()); throw new FaultException <VertexServiceError>(new VertexServiceError("ReleaseProcess", e.ToString())); } DryadLogger.LogMethodExit(startSuccess); return(startSuccess); }
public void QueueYarnUpdate(int taskId, int taskState, string nodeName) { DryadLogger.LogInformation("QueueYarnUpdate", "Task {0} on node {2} is in state {3}", taskId, nodeName, taskState); // Set change event arguments YarnTaskState yTaskState = (YarnTaskState)taskState; VertexTask v = new VertexTask(taskId, nodeName, yTaskState, int.MaxValue, DateTime.UtcNow); m_taskUpdateQueue.Add(v); }
/// <summary> /// Notify GM that vertex host process exited /// </summary> /// <param name="replyUri">GM address</param> /// <param name="processId">vertex process id</param> /// <param name="exitCode">reason for vertex host exit</param> /// <returns>success/failure</returns> public static bool ProcessExited(string replyUri, int processId, int exitCode) { DryadLogger.LogMethodEntry(replyUri, processId, exitCode); bool result = false; VertexCallbackServiceClient client = GetClient(replyUri); // // Try to notify GM that the process has exited up to numRetries times // for (int index = 0; index < numRetries; index++) { try { // // If client is null, try reopening it // if (client == null) { client = CreateClient(replyUri); } // // Make ProcessExited WCF call, return success // client.ProcessExited(processId, exitCode); result = true; break; } catch (Exception e) { if (shuttingDown) { // if shutting down, just return DisposeClient(ref client); return(true); } else { // // If call failed, try reopening WCF client and calling again // client = ReopenClientForRetry(replyUri, e); } } } // // If failure occurs after X retry attempts, report error // DryadLogger.LogMethodExit(result); return(result); }
public void ProcessExited(int processId, int exitCode) { try { vertexScheduler.ProcessExit(processId, exitCode); } catch (Exception e) { DryadLogger.LogError(0, e, "Failed to execute process exit for process {0}", processId); } }
/// <summary> /// Notify vertex service that the Graph Manager is done /// with vertex process processId /// </summary> /// <param name="processId">Process Id of the process to release</param> public void ReleaseProcess(int processId) { bool faultDispatcher = true; for (int numRetries = 0; numRetries < MaxRetries; numRetries++) { try { if (CurrentProcess == processId) { m_currentProcess = null; } if (!Faulted) { this.m_client.ReleaseProcess(processId); } return; } // ReleaseProcess is one-way catch (TimeoutException te) { DryadLogger.LogWarning("Release Process", "Timeout communicating with vertex service on node {0}: {1}", this.m_nodeName, te.ToString()); if (!SafeOpenConnection()) { faultDispatcher = true; break; } } catch (CommunicationException ce) { DryadLogger.LogWarning("Release Process", "Error communicating with vertex service on node {0}: {1}", this.m_nodeName, ce.ToString()); if (!SafeOpenConnection()) { faultDispatcher = true; break; } } catch (Exception e) { DryadLogger.LogError(0, e, "Error calling ReleaseProcess for node {0}", m_nodeName); faultDispatcher = false; break; } } if (faultDispatcher) { RaiseFaultedEvent(); } }