Пример #1
0
        public virtual void TestReconnect()
        {
            RMNodeImpl     node                  = GetRunningNode();
            ClusterMetrics cm                    = ClusterMetrics.GetMetrics();
            int            initialActive         = cm.GetNumActiveNMs();
            int            initialLost           = cm.GetNumLostNMs();
            int            initialUnhealthy      = cm.GetUnhealthyNMs();
            int            initialDecommissioned = cm.GetNumDecommisionedNMs();
            int            initialRebooted       = cm.GetNumRebootedNMs();

            node.Handle(new RMNodeReconnectEvent(node.GetNodeID(), node, null, null));
            NUnit.Framework.Assert.AreEqual("Active Nodes", initialActive, cm.GetNumActiveNMs
                                                ());
            NUnit.Framework.Assert.AreEqual("Lost Nodes", initialLost, cm.GetNumLostNMs());
            NUnit.Framework.Assert.AreEqual("Unhealthy Nodes", initialUnhealthy, cm.GetUnhealthyNMs
                                                ());
            NUnit.Framework.Assert.AreEqual("Decommissioned Nodes", initialDecommissioned, cm
                                            .GetNumDecommisionedNMs());
            NUnit.Framework.Assert.AreEqual("Rebooted Nodes", initialRebooted, cm.GetNumRebootedNMs
                                                ());
            NUnit.Framework.Assert.AreEqual(NodeState.Running, node.GetState());
            NUnit.Framework.Assert.IsNotNull(nodesListManagerEvent);
            NUnit.Framework.Assert.AreEqual(NodesListManagerEventType.NodeUsable, nodesListManagerEvent
                                            .GetType());
        }
Пример #2
0
        public virtual void TestUpdateHeartbeatResponseForCleanup()
        {
            RMNodeImpl node   = GetRunningNode();
            NodeId     nodeId = node.GetNodeID();
            // Expire a container
            ContainerId completedContainerId = BuilderUtils.NewContainerId(BuilderUtils.NewApplicationAttemptId
                                                                               (BuilderUtils.NewApplicationId(0, 0), 0), 0);

            node.Handle(new RMNodeCleanContainerEvent(nodeId, completedContainerId));
            NUnit.Framework.Assert.AreEqual(1, node.GetContainersToCleanUp().Count);
            // Finish an application
            ApplicationId finishedAppId = BuilderUtils.NewApplicationId(0, 1);

            node.Handle(new RMNodeCleanAppEvent(nodeId, finishedAppId));
            NUnit.Framework.Assert.AreEqual(1, node.GetAppsToCleanup().Count);
            // Verify status update does not clear containers/apps to cleanup
            // but updating heartbeat response for cleanup does
            RMNodeStatusEvent statusEvent = GetMockRMNodeStatusEvent();

            node.Handle(statusEvent);
            NUnit.Framework.Assert.AreEqual(1, node.GetContainersToCleanUp().Count);
            NUnit.Framework.Assert.AreEqual(1, node.GetAppsToCleanup().Count);
            NodeHeartbeatResponse hbrsp = Org.Apache.Hadoop.Yarn.Util.Records.NewRecord <NodeHeartbeatResponse
                                                                                         >();

            node.UpdateNodeHeartbeatResponseForCleanup(hbrsp);
            NUnit.Framework.Assert.AreEqual(0, node.GetContainersToCleanUp().Count);
            NUnit.Framework.Assert.AreEqual(0, node.GetAppsToCleanup().Count);
            NUnit.Framework.Assert.AreEqual(1, hbrsp.GetContainersToCleanup().Count);
            NUnit.Framework.Assert.AreEqual(completedContainerId, hbrsp.GetContainersToCleanup
                                                ()[0]);
            NUnit.Framework.Assert.AreEqual(1, hbrsp.GetApplicationsToCleanup().Count);
            NUnit.Framework.Assert.AreEqual(finishedAppId, hbrsp.GetApplicationsToCleanup()[0
                                            ]);
        }
Пример #3
0
        private RMNodeImpl GetNewNode()
        {
            NodeId     nodeId = BuilderUtils.NewNodeId("localhost", 0);
            RMNodeImpl node   = new RMNodeImpl(nodeId, rmContext, null, 0, 0, null, null, null);

            return(node);
        }
Пример #4
0
        private RMNodeImpl GetUnhealthyNode()
        {
            RMNodeImpl       node   = GetRunningNode();
            NodeHealthStatus status = NodeHealthStatus.NewInstance(false, "sick", Runtime.CurrentTimeMillis
                                                                       ());

            node.Handle(new RMNodeStatusEvent(node.GetNodeID(), status, new AList <ContainerStatus
                                                                                   >(), null, null));
            NUnit.Framework.Assert.AreEqual(NodeState.Unhealthy, node.GetState());
            return(node);
        }
Пример #5
0
        private RMNodeImpl GetRunningNode(string nmVersion)
        {
            NodeId     nodeId     = BuilderUtils.NewNodeId("localhost", 0);
            Resource   capability = Resource.NewInstance(4096, 4);
            RMNodeImpl node       = new RMNodeImpl(nodeId, rmContext, null, 0, 0, null, capability,
                                                   nmVersion);

            node.Handle(new RMNodeStartedEvent(node.GetNodeID(), null, null));
            NUnit.Framework.Assert.AreEqual(NodeState.Running, node.GetState());
            return(node);
        }
Пример #6
0
        public virtual void TestUnhealthyExpireForSchedulerRemove()
        {
            RMNodeImpl node = GetUnhealthyNode();

            Org.Mockito.Mockito.Verify(scheduler, Org.Mockito.Mockito.Times(2)).Handle(Matchers.Any
                                                                                       <NodeRemovedSchedulerEvent>());
            node.Handle(new RMNodeEvent(node.GetNodeID(), RMNodeEventType.Expire));
            Org.Mockito.Mockito.Verify(scheduler, Org.Mockito.Mockito.Times(2)).Handle(Matchers.Any
                                                                                       <NodeRemovedSchedulerEvent>());
            NUnit.Framework.Assert.AreEqual(NodeState.Lost, node.GetState());
        }
Пример #7
0
        /// <exception cref="System.Exception"/>
        public virtual void TestContainerUpdate()
        {
            //Start the node
            node.Handle(new RMNodeStartedEvent(null, null, null));
            NodeId     nodeId = BuilderUtils.NewNodeId("localhost:1", 1);
            RMNodeImpl node2  = new RMNodeImpl(nodeId, rmContext, null, 0, 0, null, null, null
                                               );

            node2.Handle(new RMNodeStartedEvent(null, null, null));
            ContainerId completedContainerIdFromNode1 = BuilderUtils.NewContainerId(BuilderUtils
                                                                                    .NewApplicationAttemptId(BuilderUtils.NewApplicationId(0, 0), 0), 0);
            ContainerId completedContainerIdFromNode2_1 = BuilderUtils.NewContainerId(BuilderUtils
                                                                                      .NewApplicationAttemptId(BuilderUtils.NewApplicationId(1, 1), 1), 1);
            ContainerId completedContainerIdFromNode2_2 = BuilderUtils.NewContainerId(BuilderUtils
                                                                                      .NewApplicationAttemptId(BuilderUtils.NewApplicationId(1, 1), 1), 2);
            RMNodeStatusEvent statusEventFromNode1     = GetMockRMNodeStatusEvent();
            RMNodeStatusEvent statusEventFromNode2_1   = GetMockRMNodeStatusEvent();
            RMNodeStatusEvent statusEventFromNode2_2   = GetMockRMNodeStatusEvent();
            ContainerStatus   containerStatusFromNode1 = Org.Mockito.Mockito.Mock <ContainerStatus
                                                                                   >();
            ContainerStatus containerStatusFromNode2_1 = Org.Mockito.Mockito.Mock <ContainerStatus
                                                                                   >();
            ContainerStatus containerStatusFromNode2_2 = Org.Mockito.Mockito.Mock <ContainerStatus
                                                                                   >();

            Org.Mockito.Mockito.DoReturn(completedContainerIdFromNode1).When(containerStatusFromNode1
                                                                             ).GetContainerId();
            Org.Mockito.Mockito.DoReturn(Sharpen.Collections.SingletonList(containerStatusFromNode1
                                                                           )).When(statusEventFromNode1).GetContainers();
            node.Handle(statusEventFromNode1);
            NUnit.Framework.Assert.AreEqual(1, completedContainers.Count);
            NUnit.Framework.Assert.AreEqual(completedContainerIdFromNode1, completedContainers
                                            [0].GetContainerId());
            completedContainers.Clear();
            Org.Mockito.Mockito.DoReturn(completedContainerIdFromNode2_1).When(containerStatusFromNode2_1
                                                                               ).GetContainerId();
            Org.Mockito.Mockito.DoReturn(Sharpen.Collections.SingletonList(containerStatusFromNode2_1
                                                                           )).When(statusEventFromNode2_1).GetContainers();
            Org.Mockito.Mockito.DoReturn(completedContainerIdFromNode2_2).When(containerStatusFromNode2_2
                                                                               ).GetContainerId();
            Org.Mockito.Mockito.DoReturn(Sharpen.Collections.SingletonList(containerStatusFromNode2_2
                                                                           )).When(statusEventFromNode2_2).GetContainers();
            node2.SetNextHeartBeat(false);
            node2.Handle(statusEventFromNode2_1);
            node2.SetNextHeartBeat(true);
            node2.Handle(statusEventFromNode2_2);
            NUnit.Framework.Assert.AreEqual(2, completedContainers.Count);
            NUnit.Framework.Assert.AreEqual(completedContainerIdFromNode2_1, completedContainers
                                            [0].GetContainerId());
            NUnit.Framework.Assert.AreEqual(completedContainerIdFromNode2_2, completedContainers
                                            [1].GetContainerId());
        }
Пример #8
0
        public virtual void TestReconnnectUpdate()
        {
            string     nmVersion1 = "nm version 1";
            string     nmVersion2 = "nm version 2";
            RMNodeImpl node       = GetRunningNode(nmVersion1);

            NUnit.Framework.Assert.AreEqual(nmVersion1, node.GetNodeManagerVersion());
            RMNodeImpl reconnectingNode = GetRunningNode(nmVersion2);

            node.Handle(new RMNodeReconnectEvent(node.GetNodeID(), reconnectingNode, null, null
                                                 ));
            NUnit.Framework.Assert.AreEqual(nmVersion2, node.GetNodeManagerVersion());
        }
Пример #9
0
        public virtual void TestResourceUpdateOnRebootedNode()
        {
            RMNodeImpl node        = GetRebootedNode();
            Resource   oldCapacity = node.GetTotalCapability();

            NUnit.Framework.Assert.AreEqual("Memory resource is not match.", oldCapacity.GetMemory
                                                (), 4096);
            NUnit.Framework.Assert.AreEqual("CPU resource is not match.", oldCapacity.GetVirtualCores
                                                (), 4);
            node.Handle(new RMNodeResourceUpdateEvent(node.GetNodeID(), ResourceOption.NewInstance
                                                          (Resource.NewInstance(2048, 2), RMNode.OverCommitTimeoutMillisDefault)));
            Resource newCapacity = node.GetTotalCapability();

            NUnit.Framework.Assert.AreEqual("Memory resource is not match.", newCapacity.GetMemory
                                                (), 2048);
            NUnit.Framework.Assert.AreEqual("CPU resource is not match.", newCapacity.GetVirtualCores
                                                (), 2);
            NUnit.Framework.Assert.AreEqual(NodeState.Rebooted, node.GetState());
        }
Пример #10
0
        public virtual void TestNodesDefaultWithUnHealthyNode()
        {
            WebResource r   = Resource();
            MockNM      nm1 = rm.RegisterNode("h1:1234", 5120);
            MockNM      nm2 = rm.RegisterNode("h2:1235", 5121);

            rm.SendNodeStarted(nm1);
            rm.NMwaitForState(nm1.GetNodeId(), NodeState.Running);
            rm.NMwaitForState(nm2.GetNodeId(), NodeState.New);
            MockNM nm3 = rm.RegisterNode("h3:1236", 5122);

            rm.NMwaitForState(nm3.GetNodeId(), NodeState.New);
            rm.SendNodeStarted(nm3);
            rm.NMwaitForState(nm3.GetNodeId(), NodeState.Running);
            RMNodeImpl       node       = (RMNodeImpl)rm.GetRMContext().GetRMNodes()[nm3.GetNodeId()];
            NodeHealthStatus nodeHealth = NodeHealthStatus.NewInstance(false, "test health report"
                                                                       , Runtime.CurrentTimeMillis());

            node.Handle(new RMNodeStatusEvent(nm3.GetNodeId(), nodeHealth, new AList <ContainerStatus
                                                                                      >(), null, null));
            rm.NMwaitForState(nm3.GetNodeId(), NodeState.Unhealthy);
            ClientResponse response = r.Path("ws").Path("v1").Path("cluster").Path("nodes").Accept
                                          (MediaType.ApplicationJson).Get <ClientResponse>();

            NUnit.Framework.Assert.AreEqual(MediaType.ApplicationJsonType, response.GetType()
                                            );
            JSONObject json = response.GetEntity <JSONObject>();

            NUnit.Framework.Assert.AreEqual("incorrect number of elements", 1, json.Length());
            JSONObject nodes = json.GetJSONObject("nodes");

            NUnit.Framework.Assert.AreEqual("incorrect number of elements", 1, nodes.Length()
                                            );
            JSONArray nodeArray = nodes.GetJSONArray("node");

            // 3 nodes, including the unhealthy node and the new node.
            NUnit.Framework.Assert.AreEqual("incorrect number of elements", 3, nodeArray.Length
                                                ());
        }
Пример #11
0
        public virtual void TestUnhealthyRebooting()
        {
            RMNodeImpl     node                  = GetUnhealthyNode();
            ClusterMetrics cm                    = ClusterMetrics.GetMetrics();
            int            initialActive         = cm.GetNumActiveNMs();
            int            initialLost           = cm.GetNumLostNMs();
            int            initialUnhealthy      = cm.GetUnhealthyNMs();
            int            initialDecommissioned = cm.GetNumDecommisionedNMs();
            int            initialRebooted       = cm.GetNumRebootedNMs();

            node.Handle(new RMNodeEvent(node.GetNodeID(), RMNodeEventType.Rebooting));
            NUnit.Framework.Assert.AreEqual("Active Nodes", initialActive, cm.GetNumActiveNMs
                                                ());
            NUnit.Framework.Assert.AreEqual("Lost Nodes", initialLost, cm.GetNumLostNMs());
            NUnit.Framework.Assert.AreEqual("Unhealthy Nodes", initialUnhealthy - 1, cm.GetUnhealthyNMs
                                                ());
            NUnit.Framework.Assert.AreEqual("Decommissioned Nodes", initialDecommissioned, cm
                                            .GetNumDecommisionedNMs());
            NUnit.Framework.Assert.AreEqual("Rebooted Nodes", initialRebooted + 1, cm.GetNumRebootedNMs
                                                ());
            NUnit.Framework.Assert.AreEqual(NodeState.Rebooted, node.GetState());
        }
Пример #12
0
        public virtual void SetUp()
        {
            InlineDispatcher rmDispatcher = new InlineDispatcher();

            rmContext = new RMContextImpl(rmDispatcher, null, null, null, Org.Mockito.Mockito.Mock
                                          <DelegationTokenRenewer>(), null, null, null, null, null);
            NodesListManager nodesListManager = Org.Mockito.Mockito.Mock <NodesListManager>();
            HostsFileReader  reader           = Org.Mockito.Mockito.Mock <HostsFileReader>();

            Org.Mockito.Mockito.When(nodesListManager.GetHostsReader()).ThenReturn(reader);
            ((RMContextImpl)rmContext).SetNodesListManager(nodesListManager);
            scheduler = Org.Mockito.Mockito.Mock <YarnScheduler>();
            Org.Mockito.Mockito.DoAnswer(new _Answer_115(this)).When(scheduler).Handle(Matchers.Any
                                                                                       <SchedulerEvent>());
            rmDispatcher.Register(typeof(SchedulerEventType), new TestRMNodeTransitions.TestSchedulerEventDispatcher
                                      (this));
            rmDispatcher.Register(typeof(NodesListManagerEventType), new TestRMNodeTransitions.TestNodeListManagerEventDispatcher
                                      (this));
            NodeId nodeId = BuilderUtils.NewNodeId("localhost", 0);

            node = new RMNodeImpl(nodeId, rmContext, null, 0, 0, null, null, null);
            nodesListManagerEvent = null;
        }
Пример #13
0
        /// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/>
        /// <exception cref="System.IO.IOException"/>
        public virtual RegisterNodeManagerResponse RegisterNodeManager(RegisterNodeManagerRequest
                                                                       request)
        {
            NodeId   nodeId                      = request.GetNodeId();
            string   host                        = nodeId.GetHost();
            int      cmPort                      = nodeId.GetPort();
            int      httpPort                    = request.GetHttpPort();
            Resource capability                  = request.GetResource();
            string   nodeManagerVersion          = request.GetNMVersion();
            RegisterNodeManagerResponse response = recordFactory.NewRecordInstance <RegisterNodeManagerResponse
                                                                                    >();

            if (!minimumNodeManagerVersion.Equals("NONE"))
            {
                if (minimumNodeManagerVersion.Equals("EqualToRM"))
                {
                    minimumNodeManagerVersion = YarnVersionInfo.GetVersion();
                }
                if ((nodeManagerVersion == null) || (VersionUtil.CompareVersions(nodeManagerVersion
                                                                                 , minimumNodeManagerVersion)) < 0)
                {
                    string message = "Disallowed NodeManager Version " + nodeManagerVersion + ", is less than the minimum version "
                                     + minimumNodeManagerVersion + " sending SHUTDOWN signal to " + "NodeManager.";
                    Log.Info(message);
                    response.SetDiagnosticsMessage(message);
                    response.SetNodeAction(NodeAction.Shutdown);
                    return(response);
                }
            }
            // Check if this node is a 'valid' node
            if (!this.nodesListManager.IsValidNode(host))
            {
                string message = "Disallowed NodeManager from  " + host + ", Sending SHUTDOWN signal to the NodeManager.";
                Log.Info(message);
                response.SetDiagnosticsMessage(message);
                response.SetNodeAction(NodeAction.Shutdown);
                return(response);
            }
            // Check if this node has minimum allocations
            if (capability.GetMemory() < minAllocMb || capability.GetVirtualCores() < minAllocVcores)
            {
                string message = "NodeManager from  " + host + " doesn't satisfy minimum allocations, Sending SHUTDOWN"
                                 + " signal to the NodeManager.";
                Log.Info(message);
                response.SetDiagnosticsMessage(message);
                response.SetNodeAction(NodeAction.Shutdown);
                return(response);
            }
            response.SetContainerTokenMasterKey(containerTokenSecretManager.GetCurrentKey());
            response.SetNMTokenMasterKey(nmTokenSecretManager.GetCurrentKey());
            RMNode rmNode = new RMNodeImpl(nodeId, rmContext, host, cmPort, httpPort, Resolve
                                               (host), capability, nodeManagerVersion);
            RMNode oldNode = this.rmContext.GetRMNodes().PutIfAbsent(nodeId, rmNode);

            if (oldNode == null)
            {
                this.rmContext.GetDispatcher().GetEventHandler().Handle(new RMNodeStartedEvent(nodeId
                                                                                               , request.GetNMContainerStatuses(), request.GetRunningApplications()));
            }
            else
            {
                Log.Info("Reconnect from the node at: " + host);
                this.nmLivelinessMonitor.Unregister(nodeId);
                // Reset heartbeat ID since node just restarted.
                oldNode.ResetLastNodeHeartBeatResponse();
                this.rmContext.GetDispatcher().GetEventHandler().Handle(new RMNodeReconnectEvent(
                                                                            nodeId, rmNode, request.GetRunningApplications(), request.GetNMContainerStatuses
                                                                                ()));
            }
            // On every node manager register we will be clearing NMToken keys if
            // present for any running application.
            this.nmTokenSecretManager.RemoveNodeKey(nodeId);
            this.nmLivelinessMonitor.Register(nodeId);
            // Handle received container status, this should be processed after new
            // RMNode inserted
            if (!rmContext.IsWorkPreservingRecoveryEnabled())
            {
                if (!request.GetNMContainerStatuses().IsEmpty())
                {
                    Log.Info("received container statuses on node manager register :" + request.GetNMContainerStatuses
                                 ());
                    foreach (NMContainerStatus status in request.GetNMContainerStatuses())
                    {
                        HandleNMContainerStatus(status, nodeId);
                    }
                }
            }
            string message_1 = "NodeManager from node " + host + "(cmPort: " + cmPort + " httpPort: "
                               + httpPort + ") " + "registered with capability: " + capability + ", assigned nodeId "
                               + nodeId;

            Log.Info(message_1);
            response.SetNodeAction(NodeAction.Normal);
            response.SetRMIdentifier(ResourceManager.GetClusterTimeStamp());
            response.SetRMVersion(YarnVersionInfo.GetVersion());
            return(response);
        }
Пример #14
0
        /// <exception cref="System.Exception"/>
        public virtual void SendNodeLost(MockNM nm)
        {
            RMNodeImpl node = (RMNodeImpl)GetRMContext().GetRMNodes()[nm.GetNodeId()];

            node.Handle(new RMNodeEvent(nm.GetNodeId(), RMNodeEventType.Expire));
        }
Пример #15
0
        /// <exception cref="System.Exception"/>
        public virtual void SendNodeStarted(MockNM nm)
        {
            RMNodeImpl node = (RMNodeImpl)GetRMContext().GetRMNodes()[nm.GetNodeId()];

            node.Handle(new RMNodeStartedEvent(nm.GetNodeId(), null, null));
        }