예제 #1
0
        /// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/>
        /// <exception cref="System.IO.IOException"/>
        public virtual NodeHeartbeatResponse NodeHeartbeat(NodeHeartbeatRequest request)
        {
            NodeStatus remoteNodeStatus = request.GetNodeStatus();
            NodeId     nodeId           = remoteNodeStatus.GetNodeId();

            // 1. Check if it's a valid (i.e. not excluded) node
            if (!this.nodesListManager.IsValidNode(nodeId.GetHost()))
            {
                string message = "Disallowed NodeManager nodeId: " + nodeId + " hostname: " + nodeId
                                 .GetHost();
                Log.Info(message);
                shutDown.SetDiagnosticsMessage(message);
                return(shutDown);
            }
            // 2. Check if it's a registered node
            RMNode rmNode = this.rmContext.GetRMNodes()[nodeId];

            if (rmNode == null)
            {
                /* node does not exist */
                string message = "Node not found resyncing " + remoteNodeStatus.GetNodeId();
                Log.Info(message);
                resync.SetDiagnosticsMessage(message);
                return(resync);
            }
            // Send ping
            this.nmLivelinessMonitor.ReceivedPing(nodeId);
            // 3. Check if it's a 'fresh' heartbeat i.e. not duplicate heartbeat
            NodeHeartbeatResponse lastNodeHeartbeatResponse = rmNode.GetLastNodeHeartBeatResponse
                                                                  ();

            if (remoteNodeStatus.GetResponseId() + 1 == lastNodeHeartbeatResponse.GetResponseId
                    ())
            {
                Log.Info("Received duplicate heartbeat from node " + rmNode.GetNodeAddress() + " responseId="
                         + remoteNodeStatus.GetResponseId());
                return(lastNodeHeartbeatResponse);
            }
            else
            {
                if (remoteNodeStatus.GetResponseId() + 1 < lastNodeHeartbeatResponse.GetResponseId
                        ())
                {
                    string message = "Too far behind rm response id:" + lastNodeHeartbeatResponse.GetResponseId
                                         () + " nm response id:" + remoteNodeStatus.GetResponseId();
                    Log.Info(message);
                    resync.SetDiagnosticsMessage(message);
                    // TODO: Just sending reboot is not enough. Think more.
                    this.rmContext.GetDispatcher().GetEventHandler().Handle(new RMNodeEvent(nodeId, RMNodeEventType
                                                                                            .Rebooting));
                    return(resync);
                }
            }
            // Heartbeat response
            NodeHeartbeatResponse nodeHeartBeatResponse = YarnServerBuilderUtils.NewNodeHeartbeatResponse
                                                              (lastNodeHeartbeatResponse.GetResponseId() + 1, NodeAction.Normal, null, null, null
                                                              , null, nextHeartBeatInterval);

            rmNode.UpdateNodeHeartbeatResponseForCleanup(nodeHeartBeatResponse);
            PopulateKeys(request, nodeHeartBeatResponse);
            ConcurrentMap <ApplicationId, ByteBuffer> systemCredentials = rmContext.GetSystemCredentialsForApps
                                                                              ();

            if (!systemCredentials.IsEmpty())
            {
                nodeHeartBeatResponse.SetSystemCredentialsForApps(systemCredentials);
            }
            // 4. Send status to RMNode, saving the latest response.
            this.rmContext.GetDispatcher().GetEventHandler().Handle(new RMNodeStatusEvent(nodeId
                                                                                          , remoteNodeStatus.GetNodeHealthStatus(), remoteNodeStatus.GetContainersStatuses
                                                                                              (), remoteNodeStatus.GetKeepAliveApplications(), nodeHeartBeatResponse));
            return(nodeHeartBeatResponse);
        }