/// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> /// <exception cref="System.IO.IOException"/> public virtual NodeHeartbeatResponse NodeHeartbeat(NodeHeartbeatRequest request) { NodeStatus remoteNodeStatus = request.GetNodeStatus(); NodeId nodeId = remoteNodeStatus.GetNodeId(); // 1. Check if it's a valid (i.e. not excluded) node if (!this.nodesListManager.IsValidNode(nodeId.GetHost())) { string message = "Disallowed NodeManager nodeId: " + nodeId + " hostname: " + nodeId .GetHost(); Log.Info(message); shutDown.SetDiagnosticsMessage(message); return(shutDown); } // 2. Check if it's a registered node RMNode rmNode = this.rmContext.GetRMNodes()[nodeId]; if (rmNode == null) { /* node does not exist */ string message = "Node not found resyncing " + remoteNodeStatus.GetNodeId(); Log.Info(message); resync.SetDiagnosticsMessage(message); return(resync); } // Send ping this.nmLivelinessMonitor.ReceivedPing(nodeId); // 3. Check if it's a 'fresh' heartbeat i.e. not duplicate heartbeat NodeHeartbeatResponse lastNodeHeartbeatResponse = rmNode.GetLastNodeHeartBeatResponse (); if (remoteNodeStatus.GetResponseId() + 1 == lastNodeHeartbeatResponse.GetResponseId ()) { Log.Info("Received duplicate heartbeat from node " + rmNode.GetNodeAddress() + " responseId=" + remoteNodeStatus.GetResponseId()); return(lastNodeHeartbeatResponse); } else { if (remoteNodeStatus.GetResponseId() + 1 < lastNodeHeartbeatResponse.GetResponseId ()) { string message = "Too far behind rm response id:" + lastNodeHeartbeatResponse.GetResponseId () + " nm response id:" + remoteNodeStatus.GetResponseId(); Log.Info(message); resync.SetDiagnosticsMessage(message); // TODO: Just sending reboot is not enough. Think more. this.rmContext.GetDispatcher().GetEventHandler().Handle(new RMNodeEvent(nodeId, RMNodeEventType .Rebooting)); return(resync); } } // Heartbeat response NodeHeartbeatResponse nodeHeartBeatResponse = YarnServerBuilderUtils.NewNodeHeartbeatResponse (lastNodeHeartbeatResponse.GetResponseId() + 1, NodeAction.Normal, null, null, null , null, nextHeartBeatInterval); rmNode.UpdateNodeHeartbeatResponseForCleanup(nodeHeartBeatResponse); PopulateKeys(request, nodeHeartBeatResponse); ConcurrentMap <ApplicationId, ByteBuffer> systemCredentials = rmContext.GetSystemCredentialsForApps (); if (!systemCredentials.IsEmpty()) { nodeHeartBeatResponse.SetSystemCredentialsForApps(systemCredentials); } // 4. Send status to RMNode, saving the latest response. this.rmContext.GetDispatcher().GetEventHandler().Handle(new RMNodeStatusEvent(nodeId , remoteNodeStatus.GetNodeHealthStatus(), remoteNodeStatus.GetContainersStatuses (), remoteNodeStatus.GetKeepAliveApplications(), nodeHeartBeatResponse)); return(nodeHeartBeatResponse); }