Ejemplo n.º 1
0
        public virtual void TestRPCResponseId()
        {
            string   node       = "localhost";
            Resource capability = BuilderUtils.NewResource(1024, 1);
            RegisterNodeManagerRequest request = recordFactory.NewRecordInstance <RegisterNodeManagerRequest
                                                                                  >();

            nodeId = NodeId.NewInstance(node, 1234);
            request.SetNodeId(nodeId);
            request.SetHttpPort(0);
            request.SetResource(capability);
            RegisterNodeManagerRequest request1 = recordFactory.NewRecordInstance <RegisterNodeManagerRequest
                                                                                   >();

            request1.SetNodeId(nodeId);
            request1.SetHttpPort(0);
            request1.SetResource(capability);
            resourceTrackerService.RegisterNodeManager(request1);
            NodeStatus nodeStatus = recordFactory.NewRecordInstance <NodeStatus>();

            nodeStatus.SetNodeId(nodeId);
            NodeHealthStatus nodeHealthStatus = recordFactory.NewRecordInstance <NodeHealthStatus
                                                                                 >();

            nodeHealthStatus.SetIsNodeHealthy(true);
            nodeStatus.SetNodeHealthStatus(nodeHealthStatus);
            NodeHeartbeatRequest nodeHeartBeatRequest = recordFactory.NewRecordInstance <NodeHeartbeatRequest
                                                                                         >();

            nodeHeartBeatRequest.SetNodeStatus(nodeStatus);
            nodeStatus.SetResponseId(0);
            NodeHeartbeatResponse response = resourceTrackerService.NodeHeartbeat(nodeHeartBeatRequest
                                                                                  );

            NUnit.Framework.Assert.IsTrue(response.GetResponseId() == 1);
            nodeStatus.SetResponseId(response.GetResponseId());
            response = resourceTrackerService.NodeHeartbeat(nodeHeartBeatRequest);
            NUnit.Framework.Assert.IsTrue(response.GetResponseId() == 2);
            /* try calling with less response id */
            response = resourceTrackerService.NodeHeartbeat(nodeHeartBeatRequest);
            NUnit.Framework.Assert.IsTrue(response.GetResponseId() == 2);
            nodeStatus.SetResponseId(0);
            response = resourceTrackerService.NodeHeartbeat(nodeHeartBeatRequest);
            NUnit.Framework.Assert.IsTrue(NodeAction.Resync.Equals(response.GetNodeAction()));
            NUnit.Framework.Assert.AreEqual("Too far behind rm response id:2 nm response id:0"
                                            , response.GetDiagnosticsMessage());
        }
Ejemplo n.º 2
0
        public virtual void TestReboot()
        {
            Configuration conf = new Configuration();

            rm = new MockRM(conf);
            rm.Start();
            MockNM nm1 = rm.RegisterNode("host1:1234", 5120);
            MockNM nm2 = rm.RegisterNode("host2:1234", 2048);
            int    initialMetricCount           = ClusterMetrics.GetMetrics().GetNumRebootedNMs();
            NodeHeartbeatResponse nodeHeartbeat = nm1.NodeHeartbeat(true);

            NUnit.Framework.Assert.IsTrue(NodeAction.Normal.Equals(nodeHeartbeat.GetNodeAction
                                                                       ()));
            nodeHeartbeat = nm2.NodeHeartbeat(new Dictionary <ApplicationId, IList <ContainerStatus
                                                                                    > >(), true, -100);
            NUnit.Framework.Assert.IsTrue(NodeAction.Resync.Equals(nodeHeartbeat.GetNodeAction
                                                                       ()));
            NUnit.Framework.Assert.AreEqual("Too far behind rm response id:0 nm response id:-100"
                                            , nodeHeartbeat.GetDiagnosticsMessage());
            CheckRebootedNMCount(rm, ++initialMetricCount);
        }
Ejemplo n.º 3
0
            public void Run()
            {
                int lastHeartBeatID = 0;

                while (!this._enclosing.isStopped)
                {
                    try
                    {
                        NodeHeartbeatResponse response   = null;
                        NodeStatus            nodeStatus = this._enclosing.GetNodeStatus(lastHeartBeatID);
                        NodeHeartbeatRequest  request    = NodeHeartbeatRequest.NewInstance(nodeStatus, this.
                                                                                            _enclosing.context.GetContainerTokenSecretManager().GetCurrentKey(), this._enclosing
                                                                                            .context.GetNMTokenSecretManager().GetCurrentKey());
                        response = this._enclosing.resourceTracker.NodeHeartbeat(request);
                        this._enclosing.nextHeartBeatInterval = response.GetNextHeartBeatInterval();
                        this.UpdateMasterKeys(response);
                        if (response.GetNodeAction() == NodeAction.Shutdown)
                        {
                            Org.Apache.Hadoop.Yarn.Server.Nodemanager.NodeStatusUpdaterImpl.Log.Warn("Recieved SHUTDOWN signal from Resourcemanager as part of heartbeat,"
                                                                                                     + " hence shutting down.");
                            Org.Apache.Hadoop.Yarn.Server.Nodemanager.NodeStatusUpdaterImpl.Log.Warn("Message from ResourceManager: "
                                                                                                     + response.GetDiagnosticsMessage());
                            this._enclosing.context.SetDecommissioned(true);
                            this._enclosing.dispatcher.GetEventHandler().Handle(new NodeManagerEvent(NodeManagerEventType
                                                                                                     .Shutdown));
                            break;
                        }
                        if (response.GetNodeAction() == NodeAction.Resync)
                        {
                            Org.Apache.Hadoop.Yarn.Server.Nodemanager.NodeStatusUpdaterImpl.Log.Warn("Node is out of sync with ResourceManager,"
                                                                                                     + " hence resyncing.");
                            Org.Apache.Hadoop.Yarn.Server.Nodemanager.NodeStatusUpdaterImpl.Log.Warn("Message from ResourceManager: "
                                                                                                     + response.GetDiagnosticsMessage());
                            this._enclosing.rmIdentifier = ResourceManagerConstants.RmInvalidIdentifier;
                            this._enclosing.dispatcher.GetEventHandler().Handle(new NodeManagerEvent(NodeManagerEventType
                                                                                                     .Resync));
                            this._enclosing.pendingCompletedContainers.Clear();
                            break;
                        }
                        this._enclosing.RemoveOrTrackCompletedContainersFromContext(response.GetContainersToBeRemovedFromNM
                                                                                        ());
                        lastHeartBeatID = response.GetResponseId();
                        IList <ContainerId> containersToCleanup = response.GetContainersToCleanup();
                        if (!containersToCleanup.IsEmpty())
                        {
                            this._enclosing.dispatcher.GetEventHandler().Handle(new CMgrCompletedContainersEvent
                                                                                    (containersToCleanup, CMgrCompletedContainersEvent.Reason.ByResourcemanager));
                        }
                        IList <ApplicationId> appsToCleanup = response.GetApplicationsToCleanup();
                        this._enclosing.TrackAppsForKeepAlive(appsToCleanup);
                        if (!appsToCleanup.IsEmpty())
                        {
                            this._enclosing.dispatcher.GetEventHandler().Handle(new CMgrCompletedAppsEvent(appsToCleanup
                                                                                                           , CMgrCompletedAppsEvent.Reason.ByResourcemanager));
                        }
                        IDictionary <ApplicationId, ByteBuffer> systemCredentials = response.GetSystemCredentialsForApps
                                                                                        ();
                        if (systemCredentials != null && !systemCredentials.IsEmpty())
                        {
                            ((NodeManager.NMContext) this._enclosing.context).SetSystemCrendentialsForApps(Org.Apache.Hadoop.Yarn.Server.Nodemanager.NodeStatusUpdaterImpl
                                                                                                           .ParseCredentials(systemCredentials));
                        }
                    }
                    catch (ConnectException e)
                    {
                        this._enclosing.dispatcher.GetEventHandler().Handle(new NodeManagerEvent(NodeManagerEventType
                                                                                                 .Shutdown));
                        throw new YarnRuntimeException(e);
                    }
                    catch (Exception e)
                    {
                        Org.Apache.Hadoop.Yarn.Server.Nodemanager.NodeStatusUpdaterImpl.Log.Error("Caught exception in status-updater"
                                                                                                  , e);
                    }
                    finally
                    {
                        lock (this._enclosing.heartbeatMonitor)
                        {
                            this._enclosing.nextHeartBeatInterval = this._enclosing.nextHeartBeatInterval <=
                                                                    0 ? YarnConfiguration.DefaultRmNmHeartbeatIntervalMs : this._enclosing.nextHeartBeatInterval;
                            try
                            {
                                Sharpen.Runtime.Wait(this._enclosing.heartbeatMonitor, this._enclosing.nextHeartBeatInterval
                                                     );
                            }
                            catch (Exception)
                            {
                            }
                        }
                    }
                }
            }