示例#1
0
 /// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/>
 /// <exception cref="System.IO.IOException"/>
 public override NodeHeartbeatResponse NodeHeartbeat(NodeHeartbeatRequest request)
 {
     this._enclosing.ResetStartFailoverFlag(true);
     // make sure failover has been triggered
     NUnit.Framework.Assert.IsTrue(this._enclosing.WaittingForFailOver());
     return(base.NodeHeartbeat(request));
 }
示例#2
0
            public override void Run()
            {
                int lastResponseID = 0;

                while (!this._enclosing.stopT)
                {
                    try
                    {
                        NodeStatus nodeStatus = TestNMExpiry.recordFactory.NewRecordInstance <NodeStatus>(
                            );
                        nodeStatus.SetNodeId(this._enclosing.request3.GetNodeId());
                        nodeStatus.SetResponseId(lastResponseID);
                        nodeStatus.SetNodeHealthStatus(TestNMExpiry.recordFactory.NewRecordInstance <NodeHealthStatus
                                                                                                     >());
                        nodeStatus.GetNodeHealthStatus().SetIsNodeHealthy(true);
                        NodeHeartbeatRequest request = TestNMExpiry.recordFactory.NewRecordInstance <NodeHeartbeatRequest
                                                                                                     >();
                        request.SetNodeStatus(nodeStatus);
                        lastResponseID = this._enclosing.resourceTrackerService.NodeHeartbeat(request).GetResponseId
                                             ();
                        Sharpen.Thread.Sleep(1000);
                    }
                    catch (Exception e)
                    {
                        TestNMExpiry.Log.Info("failed to heartbeat ", e);
                    }
                }
            }
示例#3
0
        /// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/>
        /// <exception cref="System.IO.IOException"/>
        public virtual NodeHeartbeatResponse NodeHeartbeat(NodeHeartbeatRequest request)
        {
            NodeHeartbeatResponse response = recordFactory.NewRecordInstance <NodeHeartbeatResponse
                                                                              >();

            return(response);
        }
 /// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/>
 /// <exception cref="System.IO.IOException"/>
 public virtual NodeHeartbeatResponse NodeHeartbeat(NodeHeartbeatRequest request)
 {
     if (exception)
     {
         throw new YarnException("testMessage");
     }
     return(recordFactory.NewRecordInstance <NodeHeartbeatResponse>());
 }
            /// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/>
            /// <exception cref="System.IO.IOException"/>
            public virtual NodeHeartbeatResponse NodeHeartbeat(NodeHeartbeatRequest request)
            {
                NodeStatus nodeStatus = request.GetNodeStatus();

                Log.Info("Got heartbeat number " + heartBeatID);
                nodeStatus.SetResponseId(heartBeatID++);
                NodeHeartbeatResponse nhResponse = YarnServerBuilderUtils.NewNodeHeartbeatResponse
                                                       (heartBeatID, null, null, null, null, null, 1000L);

                return(nhResponse);
            }
示例#6
0
 /// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/>
 /// <exception cref="System.IO.IOException"/>
 public virtual NodeHeartbeatResponse NodeHeartbeat(NodeHeartbeatRequest request)
 {
     YarnServerCommonServiceProtos.NodeHeartbeatRequestProto requestProto = ((NodeHeartbeatRequestPBImpl
                                                                              )request).GetProto();
     try
     {
         return(new NodeHeartbeatResponsePBImpl(proxy.NodeHeartbeat(null, requestProto)));
     }
     catch (ServiceException e)
     {
         RPCUtil.UnwrapAndThrowException(e);
         return(null);
     }
 }
示例#7
0
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/>
        public virtual void Heartbeat()
        {
            NodeStatus nodeStatus = Org.Apache.Hadoop.Yarn.Server.Resourcemanager.NodeManager
                                    .CreateNodeStatus(nodeId, GetContainerStatuses(containers));

            nodeStatus.SetResponseId(responseID);
            NodeHeartbeatRequest request = recordFactory.NewRecordInstance <NodeHeartbeatRequest
                                                                            >();

            request.SetNodeStatus(nodeStatus);
            NodeHeartbeatResponse response = resourceTrackerService.NodeHeartbeat(request);

            responseID = response.GetResponseId();
        }
示例#8
0
                    /// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/>
                    /// <exception cref="System.IO.IOException"/>
                    public NodeHeartbeatResponse NodeHeartbeat(NodeHeartbeatRequest request)
                    {
                        NodeHeartbeatResponse response;

                        try
                        {
                            response = rt.NodeHeartbeat(request);
                        }
                        catch (YarnException e)
                        {
                            MiniYARNCluster.Log.Info("Exception in heartbeat from node " + request.GetNodeStatus
                                                         ().GetNodeId(), e);
                            throw;
                        }
                        return(response);
                    }
        public virtual void TestRPCResponseId()
        {
            string   node       = "localhost";
            Resource capability = BuilderUtils.NewResource(1024, 1);
            RegisterNodeManagerRequest request = recordFactory.NewRecordInstance <RegisterNodeManagerRequest
                                                                                  >();

            nodeId = NodeId.NewInstance(node, 1234);
            request.SetNodeId(nodeId);
            request.SetHttpPort(0);
            request.SetResource(capability);
            RegisterNodeManagerRequest request1 = recordFactory.NewRecordInstance <RegisterNodeManagerRequest
                                                                                   >();

            request1.SetNodeId(nodeId);
            request1.SetHttpPort(0);
            request1.SetResource(capability);
            resourceTrackerService.RegisterNodeManager(request1);
            NodeStatus nodeStatus = recordFactory.NewRecordInstance <NodeStatus>();

            nodeStatus.SetNodeId(nodeId);
            NodeHealthStatus nodeHealthStatus = recordFactory.NewRecordInstance <NodeHealthStatus
                                                                                 >();

            nodeHealthStatus.SetIsNodeHealthy(true);
            nodeStatus.SetNodeHealthStatus(nodeHealthStatus);
            NodeHeartbeatRequest nodeHeartBeatRequest = recordFactory.NewRecordInstance <NodeHeartbeatRequest
                                                                                         >();

            nodeHeartBeatRequest.SetNodeStatus(nodeStatus);
            nodeStatus.SetResponseId(0);
            NodeHeartbeatResponse response = resourceTrackerService.NodeHeartbeat(nodeHeartBeatRequest
                                                                                  );

            NUnit.Framework.Assert.IsTrue(response.GetResponseId() == 1);
            nodeStatus.SetResponseId(response.GetResponseId());
            response = resourceTrackerService.NodeHeartbeat(nodeHeartBeatRequest);
            NUnit.Framework.Assert.IsTrue(response.GetResponseId() == 2);
            /* try calling with less response id */
            response = resourceTrackerService.NodeHeartbeat(nodeHeartBeatRequest);
            NUnit.Framework.Assert.IsTrue(response.GetResponseId() == 2);
            nodeStatus.SetResponseId(0);
            response = resourceTrackerService.NodeHeartbeat(nodeHeartBeatRequest);
            NUnit.Framework.Assert.IsTrue(NodeAction.Resync.Equals(response.GetNodeAction()));
            NUnit.Framework.Assert.AreEqual("Too far behind rm response id:2 nm response id:0"
                                            , response.GetDiagnosticsMessage());
        }
示例#10
0
        public virtual void TestPbRecordFactory()
        {
            RecordFactory pbRecordFactory = RecordFactoryPBImpl.Get();

            try
            {
                NodeHeartbeatRequest request = pbRecordFactory.NewRecordInstance <NodeHeartbeatRequest
                                                                                  >();
                NUnit.Framework.Assert.AreEqual(typeof(NodeHeartbeatRequestPBImpl), request.GetType
                                                    ());
            }
            catch (YarnRuntimeException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
                NUnit.Framework.Assert.Fail("Failed to crete record");
            }
        }
示例#11
0
                    public override NodeHeartbeatResponse NodeHeartbeat(NodeHeartbeatRequest request)
                    {
                        IList <ContainerStatus> statuses = request.GetNodeStatus().GetContainersStatuses();

                        try
                        {
                            NUnit.Framework.Assert.AreEqual(1, statuses.Count);
                            NUnit.Framework.Assert.AreEqual(testCompleteContainer.GetContainerId(), statuses[
                                                                0].GetContainerId());
                        }
                        catch (Exception error)
                        {
                            Sharpen.Runtime.PrintStackTrace(error);
                            this._enclosing._enclosing._enclosing.assertionFailedInThread.Set(true);
                        }
                        return(YarnServerBuilderUtils.NewNodeHeartbeatResponse(1, NodeAction.Resync, null
                                                                               , null, null, null, 1000L));
                    }
示例#12
0
        /// <exception cref="System.Exception"/>
        public virtual void TestResourceTrackerOnHA()
        {
            NodeId   nodeId   = NodeId.NewInstance("localhost", 0);
            Resource resource = Resource.NewInstance(2048, 4);
            // make sure registerNodeManager works when failover happens
            RegisterNodeManagerRequest request = RegisterNodeManagerRequest.NewInstance(nodeId
                                                                                        , 0, resource, YarnVersionInfo.GetVersion(), null, null);

            resourceTracker.RegisterNodeManager(request);
            NUnit.Framework.Assert.IsTrue(WaitForNodeManagerToConnect(10000, nodeId));
            // restart the failover thread, and make sure nodeHeartbeat works
            failoverThread = CreateAndStartFailoverThread();
            NodeStatus status = NodeStatus.NewInstance(NodeId.NewInstance("localhost", 0), 0,
                                                       null, null, null);
            NodeHeartbeatRequest request2 = NodeHeartbeatRequest.NewInstance(status, null, null
                                                                             );

            resourceTracker.NodeHeartbeat(request2);
        }
示例#13
0
        /// <exception cref="System.Exception"/>
        public virtual NodeHeartbeatResponse NodeHeartbeat(IDictionary <ApplicationId, IList
                                                                        <Org.Apache.Hadoop.Yarn.Api.Records.ContainerStatus> > conts, bool isHealthy, int
                                                           resId)
        {
            NodeHeartbeatRequest req = Org.Apache.Hadoop.Yarn.Util.Records.NewRecord <NodeHeartbeatRequest
                                                                                      >();
            NodeStatus status = Org.Apache.Hadoop.Yarn.Util.Records.NewRecord <NodeStatus>();

            status.SetResponseId(resId);
            status.SetNodeId(nodeId);
            foreach (KeyValuePair <ApplicationId, IList <Org.Apache.Hadoop.Yarn.Api.Records.ContainerStatus
                                                         > > entry in conts)
            {
                Org.Mortbay.Log.Log.Info("entry.getValue() " + entry.Value);
                status.SetContainersStatuses(entry.Value);
            }
            NodeHealthStatus healthStatus = Org.Apache.Hadoop.Yarn.Util.Records.NewRecord <NodeHealthStatus
                                                                                           >();

            healthStatus.SetHealthReport(string.Empty);
            healthStatus.SetIsNodeHealthy(isHealthy);
            healthStatus.SetLastHealthReportTime(1);
            status.SetNodeHealthStatus(healthStatus);
            req.SetNodeStatus(status);
            req.SetLastKnownContainerTokenMasterKey(this.currentContainerTokenMasterKey);
            req.SetLastKnownNMTokenMasterKey(this.currentNMTokenMasterKey);
            NodeHeartbeatResponse heartbeatResponse = resourceTracker.NodeHeartbeat(req);
            MasterKey             masterKeyFromRM   = heartbeatResponse.GetContainerTokenMasterKey();

            if (masterKeyFromRM != null && masterKeyFromRM.GetKeyId() != this.currentContainerTokenMasterKey
                .GetKeyId())
            {
                this.currentContainerTokenMasterKey = masterKeyFromRM;
            }
            masterKeyFromRM = heartbeatResponse.GetNMTokenMasterKey();
            if (masterKeyFromRM != null && masterKeyFromRM.GetKeyId() != this.currentNMTokenMasterKey
                .GetKeyId())
            {
                this.currentNMTokenMasterKey = masterKeyFromRM;
            }
            return(heartbeatResponse);
        }
        public virtual void TestNodeHeartbeat()
        {
            NodeHeartbeatRequest request = recordFactory.NewRecordInstance <NodeHeartbeatRequest
                                                                            >();

            NUnit.Framework.Assert.IsNotNull(client.NodeHeartbeat(request));
            TestResourceTrackerPBClientImpl.ResourceTrackerTestImpl.exception = true;
            try
            {
                client.NodeHeartbeat(request);
                NUnit.Framework.Assert.Fail("there  should be YarnException");
            }
            catch (YarnException e)
            {
                NUnit.Framework.Assert.IsTrue(e.Message.StartsWith("testMessage"));
            }
            finally
            {
                TestResourceTrackerPBClientImpl.ResourceTrackerTestImpl.exception = false;
            }
        }
        private void PopulateKeys(NodeHeartbeatRequest request, NodeHeartbeatResponse nodeHeartBeatResponse
                                  )
        {
            // Check if node's masterKey needs to be updated and if the currentKey has
            // roller over, send it across
            // ContainerTokenMasterKey
            MasterKey nextMasterKeyForNode = this.containerTokenSecretManager.GetNextKey();

            if (nextMasterKeyForNode != null && (request.GetLastKnownContainerTokenMasterKey(
                                                     ).GetKeyId() != nextMasterKeyForNode.GetKeyId()))
            {
                nodeHeartBeatResponse.SetContainerTokenMasterKey(nextMasterKeyForNode);
            }
            // NMTokenMasterKey
            nextMasterKeyForNode = this.nmTokenSecretManager.GetNextKey();
            if (nextMasterKeyForNode != null && (request.GetLastKnownNMTokenMasterKey().GetKeyId
                                                     () != nextMasterKeyForNode.GetKeyId()))
            {
                nodeHeartBeatResponse.SetNMTokenMasterKey(nextMasterKeyForNode);
            }
        }
示例#16
0
 /// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/>
 /// <exception cref="System.IO.IOException"/>
 public virtual NodeHeartbeatResponse NodeHeartbeat(NodeHeartbeatRequest request)
 {
     // TODO Auto-generated method stub
     return(null);
 }
示例#17
0
            public void Run()
            {
                int lastHeartBeatID = 0;

                while (!this._enclosing.isStopped)
                {
                    try
                    {
                        NodeHeartbeatResponse response   = null;
                        NodeStatus            nodeStatus = this._enclosing.GetNodeStatus(lastHeartBeatID);
                        NodeHeartbeatRequest  request    = NodeHeartbeatRequest.NewInstance(nodeStatus, this.
                                                                                            _enclosing.context.GetContainerTokenSecretManager().GetCurrentKey(), this._enclosing
                                                                                            .context.GetNMTokenSecretManager().GetCurrentKey());
                        response = this._enclosing.resourceTracker.NodeHeartbeat(request);
                        this._enclosing.nextHeartBeatInterval = response.GetNextHeartBeatInterval();
                        this.UpdateMasterKeys(response);
                        if (response.GetNodeAction() == NodeAction.Shutdown)
                        {
                            Org.Apache.Hadoop.Yarn.Server.Nodemanager.NodeStatusUpdaterImpl.Log.Warn("Recieved SHUTDOWN signal from Resourcemanager as part of heartbeat,"
                                                                                                     + " hence shutting down.");
                            Org.Apache.Hadoop.Yarn.Server.Nodemanager.NodeStatusUpdaterImpl.Log.Warn("Message from ResourceManager: "
                                                                                                     + response.GetDiagnosticsMessage());
                            this._enclosing.context.SetDecommissioned(true);
                            this._enclosing.dispatcher.GetEventHandler().Handle(new NodeManagerEvent(NodeManagerEventType
                                                                                                     .Shutdown));
                            break;
                        }
                        if (response.GetNodeAction() == NodeAction.Resync)
                        {
                            Org.Apache.Hadoop.Yarn.Server.Nodemanager.NodeStatusUpdaterImpl.Log.Warn("Node is out of sync with ResourceManager,"
                                                                                                     + " hence resyncing.");
                            Org.Apache.Hadoop.Yarn.Server.Nodemanager.NodeStatusUpdaterImpl.Log.Warn("Message from ResourceManager: "
                                                                                                     + response.GetDiagnosticsMessage());
                            this._enclosing.rmIdentifier = ResourceManagerConstants.RmInvalidIdentifier;
                            this._enclosing.dispatcher.GetEventHandler().Handle(new NodeManagerEvent(NodeManagerEventType
                                                                                                     .Resync));
                            this._enclosing.pendingCompletedContainers.Clear();
                            break;
                        }
                        this._enclosing.RemoveOrTrackCompletedContainersFromContext(response.GetContainersToBeRemovedFromNM
                                                                                        ());
                        lastHeartBeatID = response.GetResponseId();
                        IList <ContainerId> containersToCleanup = response.GetContainersToCleanup();
                        if (!containersToCleanup.IsEmpty())
                        {
                            this._enclosing.dispatcher.GetEventHandler().Handle(new CMgrCompletedContainersEvent
                                                                                    (containersToCleanup, CMgrCompletedContainersEvent.Reason.ByResourcemanager));
                        }
                        IList <ApplicationId> appsToCleanup = response.GetApplicationsToCleanup();
                        this._enclosing.TrackAppsForKeepAlive(appsToCleanup);
                        if (!appsToCleanup.IsEmpty())
                        {
                            this._enclosing.dispatcher.GetEventHandler().Handle(new CMgrCompletedAppsEvent(appsToCleanup
                                                                                                           , CMgrCompletedAppsEvent.Reason.ByResourcemanager));
                        }
                        IDictionary <ApplicationId, ByteBuffer> systemCredentials = response.GetSystemCredentialsForApps
                                                                                        ();
                        if (systemCredentials != null && !systemCredentials.IsEmpty())
                        {
                            ((NodeManager.NMContext) this._enclosing.context).SetSystemCrendentialsForApps(Org.Apache.Hadoop.Yarn.Server.Nodemanager.NodeStatusUpdaterImpl
                                                                                                           .ParseCredentials(systemCredentials));
                        }
                    }
                    catch (ConnectException e)
                    {
                        this._enclosing.dispatcher.GetEventHandler().Handle(new NodeManagerEvent(NodeManagerEventType
                                                                                                 .Shutdown));
                        throw new YarnRuntimeException(e);
                    }
                    catch (Exception e)
                    {
                        Org.Apache.Hadoop.Yarn.Server.Nodemanager.NodeStatusUpdaterImpl.Log.Error("Caught exception in status-updater"
                                                                                                  , e);
                    }
                    finally
                    {
                        lock (this._enclosing.heartbeatMonitor)
                        {
                            this._enclosing.nextHeartBeatInterval = this._enclosing.nextHeartBeatInterval <=
                                                                    0 ? YarnConfiguration.DefaultRmNmHeartbeatIntervalMs : this._enclosing.nextHeartBeatInterval;
                            try
                            {
                                Sharpen.Runtime.Wait(this._enclosing.heartbeatMonitor, this._enclosing.nextHeartBeatInterval
                                                     );
                            }
                            catch (Exception)
                            {
                            }
                        }
                    }
                }
            }
        /// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/>
        /// <exception cref="System.IO.IOException"/>
        public virtual NodeHeartbeatResponse NodeHeartbeat(NodeHeartbeatRequest request)
        {
            NodeStatus remoteNodeStatus = request.GetNodeStatus();
            NodeId     nodeId           = remoteNodeStatus.GetNodeId();

            // 1. Check if it's a valid (i.e. not excluded) node
            if (!this.nodesListManager.IsValidNode(nodeId.GetHost()))
            {
                string message = "Disallowed NodeManager nodeId: " + nodeId + " hostname: " + nodeId
                                 .GetHost();
                Log.Info(message);
                shutDown.SetDiagnosticsMessage(message);
                return(shutDown);
            }
            // 2. Check if it's a registered node
            RMNode rmNode = this.rmContext.GetRMNodes()[nodeId];

            if (rmNode == null)
            {
                /* node does not exist */
                string message = "Node not found resyncing " + remoteNodeStatus.GetNodeId();
                Log.Info(message);
                resync.SetDiagnosticsMessage(message);
                return(resync);
            }
            // Send ping
            this.nmLivelinessMonitor.ReceivedPing(nodeId);
            // 3. Check if it's a 'fresh' heartbeat i.e. not duplicate heartbeat
            NodeHeartbeatResponse lastNodeHeartbeatResponse = rmNode.GetLastNodeHeartBeatResponse
                                                                  ();

            if (remoteNodeStatus.GetResponseId() + 1 == lastNodeHeartbeatResponse.GetResponseId
                    ())
            {
                Log.Info("Received duplicate heartbeat from node " + rmNode.GetNodeAddress() + " responseId="
                         + remoteNodeStatus.GetResponseId());
                return(lastNodeHeartbeatResponse);
            }
            else
            {
                if (remoteNodeStatus.GetResponseId() + 1 < lastNodeHeartbeatResponse.GetResponseId
                        ())
                {
                    string message = "Too far behind rm response id:" + lastNodeHeartbeatResponse.GetResponseId
                                         () + " nm response id:" + remoteNodeStatus.GetResponseId();
                    Log.Info(message);
                    resync.SetDiagnosticsMessage(message);
                    // TODO: Just sending reboot is not enough. Think more.
                    this.rmContext.GetDispatcher().GetEventHandler().Handle(new RMNodeEvent(nodeId, RMNodeEventType
                                                                                            .Rebooting));
                    return(resync);
                }
            }
            // Heartbeat response
            NodeHeartbeatResponse nodeHeartBeatResponse = YarnServerBuilderUtils.NewNodeHeartbeatResponse
                                                              (lastNodeHeartbeatResponse.GetResponseId() + 1, NodeAction.Normal, null, null, null
                                                              , null, nextHeartBeatInterval);

            rmNode.UpdateNodeHeartbeatResponseForCleanup(nodeHeartBeatResponse);
            PopulateKeys(request, nodeHeartBeatResponse);
            ConcurrentMap <ApplicationId, ByteBuffer> systemCredentials = rmContext.GetSystemCredentialsForApps
                                                                              ();

            if (!systemCredentials.IsEmpty())
            {
                nodeHeartBeatResponse.SetSystemCredentialsForApps(systemCredentials);
            }
            // 4. Send status to RMNode, saving the latest response.
            this.rmContext.GetDispatcher().GetEventHandler().Handle(new RMNodeStatusEvent(nodeId
                                                                                          , remoteNodeStatus.GetNodeHealthStatus(), remoteNodeStatus.GetContainersStatuses
                                                                                              (), remoteNodeStatus.GetKeepAliveApplications(), nodeHeartBeatResponse));
            return(nodeHeartBeatResponse);
        }