protected internal override void RebootNodeStatusUpdaterAndRegisterWithRM() { ConcurrentMap <ContainerId, Org.Apache.Hadoop.Yarn.Server.Nodemanager.Containermanager.Container.Container > containers = this._enclosing.GetNMContext().GetContainers(); try { try { if (this._enclosing.containersShouldBePreserved) { NUnit.Framework.Assert.IsFalse(containers.IsEmpty()); NUnit.Framework.Assert.IsTrue(containers.Contains(this._enclosing.existingCid)); NUnit.Framework.Assert.AreEqual(ContainerState.Running, containers[this._enclosing .existingCid].CloneAndGetContainerStatus().GetState()); } else { // ensure that containers are empty or are completed before // restart nodeStatusUpdater if (!containers.IsEmpty()) { NUnit.Framework.Assert.AreEqual(ContainerState.Complete, containers[this._enclosing .existingCid].CloneAndGetContainerStatus().GetState()); } } base.RebootNodeStatusUpdaterAndRegisterWithRM(); } catch (Exception ae) { Sharpen.Runtime.PrintStackTrace(ae); this._enclosing._enclosing.assertionFailedInThread.Set(true); } finally { this._enclosing._enclosing.syncBarrier.Await(); } } catch (Exception) { } catch (BrokenBarrierException) { } catch (Exception ae) { Sharpen.Runtime.PrintStackTrace(ae); this._enclosing._enclosing.assertionFailedInThread.Set(true); } }
protected internal override void RebootNodeStatusUpdaterAndRegisterWithRM() { ConcurrentMap <ContainerId, Org.Apache.Hadoop.Yarn.Server.Nodemanager.Containermanager.Container.Container > containers = this._enclosing.GetNMContext().GetContainers(); try { // ensure that containers are empty before restart nodeStatusUpdater if (!containers.IsEmpty()) { foreach (Org.Apache.Hadoop.Yarn.Server.Nodemanager.Containermanager.Container.Container container in containers.Values) { NUnit.Framework.Assert.AreEqual(ContainerState.Complete, container.CloneAndGetContainerStatus ().GetState()); } } base.RebootNodeStatusUpdaterAndRegisterWithRM(); // After this point new containers are free to be launched, except // containers from previous RM // Wait here so as to sync with the main test thread. this._enclosing._enclosing.syncBarrier.Await(); } catch (Exception) { } catch (BrokenBarrierException) { } catch (Exception ae) { Sharpen.Runtime.PrintStackTrace(ae); this._enclosing._enclosing.assertionFailedInThread.Set(true); } }
/// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> /// <exception cref="System.IO.IOException"/> public virtual NodeHeartbeatResponse NodeHeartbeat(NodeHeartbeatRequest request) { NodeStatus remoteNodeStatus = request.GetNodeStatus(); NodeId nodeId = remoteNodeStatus.GetNodeId(); // 1. Check if it's a valid (i.e. not excluded) node if (!this.nodesListManager.IsValidNode(nodeId.GetHost())) { string message = "Disallowed NodeManager nodeId: " + nodeId + " hostname: " + nodeId .GetHost(); Log.Info(message); shutDown.SetDiagnosticsMessage(message); return(shutDown); } // 2. Check if it's a registered node RMNode rmNode = this.rmContext.GetRMNodes()[nodeId]; if (rmNode == null) { /* node does not exist */ string message = "Node not found resyncing " + remoteNodeStatus.GetNodeId(); Log.Info(message); resync.SetDiagnosticsMessage(message); return(resync); } // Send ping this.nmLivelinessMonitor.ReceivedPing(nodeId); // 3. Check if it's a 'fresh' heartbeat i.e. not duplicate heartbeat NodeHeartbeatResponse lastNodeHeartbeatResponse = rmNode.GetLastNodeHeartBeatResponse (); if (remoteNodeStatus.GetResponseId() + 1 == lastNodeHeartbeatResponse.GetResponseId ()) { Log.Info("Received duplicate heartbeat from node " + rmNode.GetNodeAddress() + " responseId=" + remoteNodeStatus.GetResponseId()); return(lastNodeHeartbeatResponse); } else { if (remoteNodeStatus.GetResponseId() + 1 < lastNodeHeartbeatResponse.GetResponseId ()) { string message = "Too far behind rm response id:" + lastNodeHeartbeatResponse.GetResponseId () + " nm response id:" + remoteNodeStatus.GetResponseId(); Log.Info(message); resync.SetDiagnosticsMessage(message); // TODO: Just sending reboot is not enough. Think more. this.rmContext.GetDispatcher().GetEventHandler().Handle(new RMNodeEvent(nodeId, RMNodeEventType .Rebooting)); return(resync); } } // Heartbeat response NodeHeartbeatResponse nodeHeartBeatResponse = YarnServerBuilderUtils.NewNodeHeartbeatResponse (lastNodeHeartbeatResponse.GetResponseId() + 1, NodeAction.Normal, null, null, null , null, nextHeartBeatInterval); rmNode.UpdateNodeHeartbeatResponseForCleanup(nodeHeartBeatResponse); PopulateKeys(request, nodeHeartBeatResponse); ConcurrentMap <ApplicationId, ByteBuffer> systemCredentials = rmContext.GetSystemCredentialsForApps (); if (!systemCredentials.IsEmpty()) { nodeHeartBeatResponse.SetSystemCredentialsForApps(systemCredentials); } // 4. Send status to RMNode, saving the latest response. this.rmContext.GetDispatcher().GetEventHandler().Handle(new RMNodeStatusEvent(nodeId , remoteNodeStatus.GetNodeHealthStatus(), remoteNodeStatus.GetContainersStatuses (), remoteNodeStatus.GetKeepAliveApplications(), nodeHeartBeatResponse)); return(nodeHeartBeatResponse); }