private void OnVertexChangeHandler(object sender, VertexChangeEventArgs ve) { if (ve.OldState != ve.NewState) { // Don't want to update counts while WaitForTasksReady is checking them lock (this) { if (ve.OldState == VertexTaskState.Running) { m_runningTasks--; } else if (ve.OldState > VertexTaskState.Running) { m_finishedTasks--; // Task transitioning from a completed state so we can increment // the number of tasks to wait for at startup m_startNodes++; } if (ve.NewState == VertexTaskState.Running) { m_runningTasks++; } else if (ve.NewState > VertexTaskState.Running) { m_finishedTasks++; // Task transitioning to a completed state so we need to // decrement the number of tasks to wait for at startup. m_startNodes--; } } } }
public void ProcessYarnUpdate(VertexTask v) { DryadLogger.LogInformation("ProcessYarnUpdate", "Task {0} on node {1} is in state {2}", v.Id, v.Node, v.State); VertexChangeEventArgs e = new VertexChangeEventArgs(v.Id); e.NewNode = v.Node; e.NewState = YarnTaskStateToVertexTaskState(v.State); e.NewRequeueCount = v.RequeueCount; if (m_vertices[v.Id] != null) { e.OldNode = m_vertices[v.Id].Node; e.OldState = YarnTaskStateToVertexTaskState(m_vertices[v.Id].State); e.OldRequeueCount = m_vertices[v.Id].RequeueCount; } if (e.NewRequeueCount != e.OldRequeueCount) { DryadLogger.LogInformation("ProcessYarnUpdate", "Task {0} requeue count changed from {1} to {2}", v.Id, e.OldRequeueCount, e.NewRequeueCount); } // Update current vertex state m_vertices[v.Id] = v; m_vertexChangeEvent(this, e); //m_taskChangeEvt.Set(); }
/// <summary> /// This event handler is called from ISchedulerHelper task monitoring thread in response /// to an HPC Task state change. /// </summary> /// <param name="sender">Not used</param> /// <param name="e">Information about the task state transition</param> private void OnVertexChanged(object sender, VertexChangeEventArgs e) { Dispatcher oldDispatcher = null; Dispatcher newDispatcher = null; bool addNewDispatcher = false; bool faultOldDispatcher = false; lock (dispatcherChangeLock) { bool dispatcherFound = dispatcherPool.GetByTaskId(e.Id, out oldDispatcher); if (!dispatcherFound) { // Check to see if this dispatcher was already faulted due to a communication error dispatcherFound = badDispatcherPool.GetByTaskId(e.Id, out oldDispatcher); } // Task state change if (e.OldState != e.NewState) { // Transitioning to, e.g., queued if (e.NewState < VertexTaskState.Running) { DryadLogger.LogInformation("Vertex Task State Change", "Task {0} transitioned to waiting", e.Id); // If there is a dispatcher for the task, then the task has previously been running. // Now it's not, so we need to fault the dispatcher. if (dispatcherFound) { DryadLogger.LogWarning("Vertex Task State Change", "Previously running task {0} transitioned to waiting", e.Id); faultOldDispatcher = true; } } // Transition to running else if (e.NewState == VertexTaskState.Running) { if (!dispatcherFound) { // No dispatcher for task, add a new one DryadLogger.LogInformation("Vertex Task State Change", "Task {0} transitioned to running", e.Id); addNewDispatcher = true; } else if (String.Compare(e.OldNode, e.NewNode, StringComparison.OrdinalIgnoreCase) != 0) { // Dispatcher found, but task is now on a new node // 1. Make sure old dispatcher is faulted. // 2. Add a new one for the new node DryadLogger.LogInformation("Vertex Task State Change", "Running task {0} assigned to new node", e.Id); faultOldDispatcher = true; addNewDispatcher = true; } else { // Dispatcher found, task is on same node DryadLogger.LogWarning("Vertex Task State Change", "Change notification for running task {0}, but state and node are unchanged in notification", e.Id); } } // Job is exiting, nothing to do else if (e.NewState == VertexTaskState.Finished) { DryadLogger.LogDebug("Vertex Task State Change", "Task {0} transitioned to finished", e.Id); } // Failed or Cancelled else { DryadLogger.LogWarning("Vertex Task State Change", "Task {0} transitioned to failed or cancelled", e.Id); // Fault dispatcher if it isn't already if (dispatcherFound) { faultOldDispatcher = true; } } } // Node change else if (String.Compare(e.OldNode, e.NewNode, StringComparison.OrdinalIgnoreCase) != 0) { if (e.NewState == VertexTaskState.Running) { DryadLogger.LogDebug("Vertex Task State Change", "Task {0} moved from node {1} to node {2}", e.Id, e.OldNode, e.NewNode); if (dispatcherFound) { faultOldDispatcher = true; addNewDispatcher = true; } } } // Running -> Queued -> Running, e.g. else if (e.OldRequeueCount < e.NewRequeueCount) { DryadLogger.LogDebug("Vertex Task State Change", "Task {0} node {1} state {2} unchanged from previous state: likely missed a state change notification.", e.Id, e.NewNode, e.NewState.ToString()); // Was task running previously? If so, fault the old dispatcher. if (dispatcherFound) { faultOldDispatcher = true; } // Is task running now? If so, create a new dispatcher to re-establish connection. if (e.NewState == VertexTaskState.Running) { addNewDispatcher = true; } } } if (faultOldDispatcher) { oldDispatcher.RaiseFaultedEvent(true); } if (addNewDispatcher) { newDispatcher = AddDispatcher(e.Id, e.NewNode, e.NewState); if (newDispatcher != null) { // Look for new request for node ThreadPool.QueueUserWorkItem(new WaitCallback(FindRequestForNodeThreadFunc), newDispatcher); } else { DryadLogger.LogError(0, null, "Failed to add new dispatcher for node {0}", e.NewNode); } } if (faultOldDispatcher) { // Check to see if we have any dispatchers left. If not, we need to fail // everything in the request pool. CheckForOutOfDispatchers(); } }
private void OnVertexChangeHandler(object sender, VertexChangeEventArgs ve) { if (ve.OldState != ve.NewState) { // Don't want to update counts while WaitForTasksReady is checking them lock (this) { if (ve.OldState == VertexTaskState.Running) { m_runningTasks--; } else if (ve.OldState > VertexTaskState.Running) { m_finishedTasks--; // Task transitioning from a completed state so we can increment // the number of tasks to wait for at startup m_startNodes++; } if (ve.NewState == VertexTaskState.Running) { m_runningTasks++; } else if (ve.NewState > VertexTaskState.Running) { m_finishedTasks++; // Task transitioning to a completed state so we need to // decrement the number of tasks to wait for at startup. m_startNodes--; } } } }
public void ProcessYarnUpdate(VertexTask v) { DryadLogger.LogInformation("ProcessYarnUpdate", "Task {0} on node {1} is in state {2}", v.Id, v.Node, v.State); VertexChangeEventArgs e = new VertexChangeEventArgs(v.Id); e.NewNode = v.Node; e.NewState = YarnTaskStateToVertexTaskState(v.State); e.NewRequeueCount = v.RequeueCount; if (m_vertices[v.Id] != null) { e.OldNode = m_vertices[v.Id].Node; e.OldState = YarnTaskStateToVertexTaskState(m_vertices[v.Id].State); e.OldRequeueCount = m_vertices[v.Id].RequeueCount; } if (e.NewRequeueCount != e.OldRequeueCount) { DryadLogger.LogInformation("ProcessYarnUpdate", "Task {0} requeue count changed from {1} to {2}", v.Id, e.OldRequeueCount, e.NewRequeueCount); } // Update current vertex state m_vertices[v.Id] = v; m_vertexChangeEvent(this, e); //m_taskChangeEvt.Set(); }