public virtual void TestReconnect() { RMNodeImpl node = GetRunningNode(); ClusterMetrics cm = ClusterMetrics.GetMetrics(); int initialActive = cm.GetNumActiveNMs(); int initialLost = cm.GetNumLostNMs(); int initialUnhealthy = cm.GetUnhealthyNMs(); int initialDecommissioned = cm.GetNumDecommisionedNMs(); int initialRebooted = cm.GetNumRebootedNMs(); node.Handle(new RMNodeReconnectEvent(node.GetNodeID(), node, null, null)); NUnit.Framework.Assert.AreEqual("Active Nodes", initialActive, cm.GetNumActiveNMs ()); NUnit.Framework.Assert.AreEqual("Lost Nodes", initialLost, cm.GetNumLostNMs()); NUnit.Framework.Assert.AreEqual("Unhealthy Nodes", initialUnhealthy, cm.GetUnhealthyNMs ()); NUnit.Framework.Assert.AreEqual("Decommissioned Nodes", initialDecommissioned, cm .GetNumDecommisionedNMs()); NUnit.Framework.Assert.AreEqual("Rebooted Nodes", initialRebooted, cm.GetNumRebootedNMs ()); NUnit.Framework.Assert.AreEqual(NodeState.Running, node.GetState()); NUnit.Framework.Assert.IsNotNull(nodesListManagerEvent); NUnit.Framework.Assert.AreEqual(NodesListManagerEventType.NodeUsable, nodesListManagerEvent .GetType()); }
public virtual void TestUpdateHeartbeatResponseForCleanup() { RMNodeImpl node = GetRunningNode(); NodeId nodeId = node.GetNodeID(); // Expire a container ContainerId completedContainerId = BuilderUtils.NewContainerId(BuilderUtils.NewApplicationAttemptId (BuilderUtils.NewApplicationId(0, 0), 0), 0); node.Handle(new RMNodeCleanContainerEvent(nodeId, completedContainerId)); NUnit.Framework.Assert.AreEqual(1, node.GetContainersToCleanUp().Count); // Finish an application ApplicationId finishedAppId = BuilderUtils.NewApplicationId(0, 1); node.Handle(new RMNodeCleanAppEvent(nodeId, finishedAppId)); NUnit.Framework.Assert.AreEqual(1, node.GetAppsToCleanup().Count); // Verify status update does not clear containers/apps to cleanup // but updating heartbeat response for cleanup does RMNodeStatusEvent statusEvent = GetMockRMNodeStatusEvent(); node.Handle(statusEvent); NUnit.Framework.Assert.AreEqual(1, node.GetContainersToCleanUp().Count); NUnit.Framework.Assert.AreEqual(1, node.GetAppsToCleanup().Count); NodeHeartbeatResponse hbrsp = Org.Apache.Hadoop.Yarn.Util.Records.NewRecord <NodeHeartbeatResponse >(); node.UpdateNodeHeartbeatResponseForCleanup(hbrsp); NUnit.Framework.Assert.AreEqual(0, node.GetContainersToCleanUp().Count); NUnit.Framework.Assert.AreEqual(0, node.GetAppsToCleanup().Count); NUnit.Framework.Assert.AreEqual(1, hbrsp.GetContainersToCleanup().Count); NUnit.Framework.Assert.AreEqual(completedContainerId, hbrsp.GetContainersToCleanup ()[0]); NUnit.Framework.Assert.AreEqual(1, hbrsp.GetApplicationsToCleanup().Count); NUnit.Framework.Assert.AreEqual(finishedAppId, hbrsp.GetApplicationsToCleanup()[0 ]); }
private RMNodeImpl GetNewNode() { NodeId nodeId = BuilderUtils.NewNodeId("localhost", 0); RMNodeImpl node = new RMNodeImpl(nodeId, rmContext, null, 0, 0, null, null, null); return(node); }
private RMNodeImpl GetUnhealthyNode() { RMNodeImpl node = GetRunningNode(); NodeHealthStatus status = NodeHealthStatus.NewInstance(false, "sick", Runtime.CurrentTimeMillis ()); node.Handle(new RMNodeStatusEvent(node.GetNodeID(), status, new AList <ContainerStatus >(), null, null)); NUnit.Framework.Assert.AreEqual(NodeState.Unhealthy, node.GetState()); return(node); }
private RMNodeImpl GetRunningNode(string nmVersion) { NodeId nodeId = BuilderUtils.NewNodeId("localhost", 0); Resource capability = Resource.NewInstance(4096, 4); RMNodeImpl node = new RMNodeImpl(nodeId, rmContext, null, 0, 0, null, capability, nmVersion); node.Handle(new RMNodeStartedEvent(node.GetNodeID(), null, null)); NUnit.Framework.Assert.AreEqual(NodeState.Running, node.GetState()); return(node); }
public virtual void TestUnhealthyExpireForSchedulerRemove() { RMNodeImpl node = GetUnhealthyNode(); Org.Mockito.Mockito.Verify(scheduler, Org.Mockito.Mockito.Times(2)).Handle(Matchers.Any <NodeRemovedSchedulerEvent>()); node.Handle(new RMNodeEvent(node.GetNodeID(), RMNodeEventType.Expire)); Org.Mockito.Mockito.Verify(scheduler, Org.Mockito.Mockito.Times(2)).Handle(Matchers.Any <NodeRemovedSchedulerEvent>()); NUnit.Framework.Assert.AreEqual(NodeState.Lost, node.GetState()); }
/// <exception cref="System.Exception"/> public virtual void TestContainerUpdate() { //Start the node node.Handle(new RMNodeStartedEvent(null, null, null)); NodeId nodeId = BuilderUtils.NewNodeId("localhost:1", 1); RMNodeImpl node2 = new RMNodeImpl(nodeId, rmContext, null, 0, 0, null, null, null ); node2.Handle(new RMNodeStartedEvent(null, null, null)); ContainerId completedContainerIdFromNode1 = BuilderUtils.NewContainerId(BuilderUtils .NewApplicationAttemptId(BuilderUtils.NewApplicationId(0, 0), 0), 0); ContainerId completedContainerIdFromNode2_1 = BuilderUtils.NewContainerId(BuilderUtils .NewApplicationAttemptId(BuilderUtils.NewApplicationId(1, 1), 1), 1); ContainerId completedContainerIdFromNode2_2 = BuilderUtils.NewContainerId(BuilderUtils .NewApplicationAttemptId(BuilderUtils.NewApplicationId(1, 1), 1), 2); RMNodeStatusEvent statusEventFromNode1 = GetMockRMNodeStatusEvent(); RMNodeStatusEvent statusEventFromNode2_1 = GetMockRMNodeStatusEvent(); RMNodeStatusEvent statusEventFromNode2_2 = GetMockRMNodeStatusEvent(); ContainerStatus containerStatusFromNode1 = Org.Mockito.Mockito.Mock <ContainerStatus >(); ContainerStatus containerStatusFromNode2_1 = Org.Mockito.Mockito.Mock <ContainerStatus >(); ContainerStatus containerStatusFromNode2_2 = Org.Mockito.Mockito.Mock <ContainerStatus >(); Org.Mockito.Mockito.DoReturn(completedContainerIdFromNode1).When(containerStatusFromNode1 ).GetContainerId(); Org.Mockito.Mockito.DoReturn(Sharpen.Collections.SingletonList(containerStatusFromNode1 )).When(statusEventFromNode1).GetContainers(); node.Handle(statusEventFromNode1); NUnit.Framework.Assert.AreEqual(1, completedContainers.Count); NUnit.Framework.Assert.AreEqual(completedContainerIdFromNode1, completedContainers [0].GetContainerId()); completedContainers.Clear(); Org.Mockito.Mockito.DoReturn(completedContainerIdFromNode2_1).When(containerStatusFromNode2_1 ).GetContainerId(); Org.Mockito.Mockito.DoReturn(Sharpen.Collections.SingletonList(containerStatusFromNode2_1 )).When(statusEventFromNode2_1).GetContainers(); Org.Mockito.Mockito.DoReturn(completedContainerIdFromNode2_2).When(containerStatusFromNode2_2 ).GetContainerId(); Org.Mockito.Mockito.DoReturn(Sharpen.Collections.SingletonList(containerStatusFromNode2_2 )).When(statusEventFromNode2_2).GetContainers(); node2.SetNextHeartBeat(false); node2.Handle(statusEventFromNode2_1); node2.SetNextHeartBeat(true); node2.Handle(statusEventFromNode2_2); NUnit.Framework.Assert.AreEqual(2, completedContainers.Count); NUnit.Framework.Assert.AreEqual(completedContainerIdFromNode2_1, completedContainers [0].GetContainerId()); NUnit.Framework.Assert.AreEqual(completedContainerIdFromNode2_2, completedContainers [1].GetContainerId()); }
public virtual void TestReconnnectUpdate() { string nmVersion1 = "nm version 1"; string nmVersion2 = "nm version 2"; RMNodeImpl node = GetRunningNode(nmVersion1); NUnit.Framework.Assert.AreEqual(nmVersion1, node.GetNodeManagerVersion()); RMNodeImpl reconnectingNode = GetRunningNode(nmVersion2); node.Handle(new RMNodeReconnectEvent(node.GetNodeID(), reconnectingNode, null, null )); NUnit.Framework.Assert.AreEqual(nmVersion2, node.GetNodeManagerVersion()); }
public virtual void TestResourceUpdateOnRebootedNode() { RMNodeImpl node = GetRebootedNode(); Resource oldCapacity = node.GetTotalCapability(); NUnit.Framework.Assert.AreEqual("Memory resource is not match.", oldCapacity.GetMemory (), 4096); NUnit.Framework.Assert.AreEqual("CPU resource is not match.", oldCapacity.GetVirtualCores (), 4); node.Handle(new RMNodeResourceUpdateEvent(node.GetNodeID(), ResourceOption.NewInstance (Resource.NewInstance(2048, 2), RMNode.OverCommitTimeoutMillisDefault))); Resource newCapacity = node.GetTotalCapability(); NUnit.Framework.Assert.AreEqual("Memory resource is not match.", newCapacity.GetMemory (), 2048); NUnit.Framework.Assert.AreEqual("CPU resource is not match.", newCapacity.GetVirtualCores (), 2); NUnit.Framework.Assert.AreEqual(NodeState.Rebooted, node.GetState()); }
public virtual void TestNodesDefaultWithUnHealthyNode() { WebResource r = Resource(); MockNM nm1 = rm.RegisterNode("h1:1234", 5120); MockNM nm2 = rm.RegisterNode("h2:1235", 5121); rm.SendNodeStarted(nm1); rm.NMwaitForState(nm1.GetNodeId(), NodeState.Running); rm.NMwaitForState(nm2.GetNodeId(), NodeState.New); MockNM nm3 = rm.RegisterNode("h3:1236", 5122); rm.NMwaitForState(nm3.GetNodeId(), NodeState.New); rm.SendNodeStarted(nm3); rm.NMwaitForState(nm3.GetNodeId(), NodeState.Running); RMNodeImpl node = (RMNodeImpl)rm.GetRMContext().GetRMNodes()[nm3.GetNodeId()]; NodeHealthStatus nodeHealth = NodeHealthStatus.NewInstance(false, "test health report" , Runtime.CurrentTimeMillis()); node.Handle(new RMNodeStatusEvent(nm3.GetNodeId(), nodeHealth, new AList <ContainerStatus >(), null, null)); rm.NMwaitForState(nm3.GetNodeId(), NodeState.Unhealthy); ClientResponse response = r.Path("ws").Path("v1").Path("cluster").Path("nodes").Accept (MediaType.ApplicationJson).Get <ClientResponse>(); NUnit.Framework.Assert.AreEqual(MediaType.ApplicationJsonType, response.GetType() ); JSONObject json = response.GetEntity <JSONObject>(); NUnit.Framework.Assert.AreEqual("incorrect number of elements", 1, json.Length()); JSONObject nodes = json.GetJSONObject("nodes"); NUnit.Framework.Assert.AreEqual("incorrect number of elements", 1, nodes.Length() ); JSONArray nodeArray = nodes.GetJSONArray("node"); // 3 nodes, including the unhealthy node and the new node. NUnit.Framework.Assert.AreEqual("incorrect number of elements", 3, nodeArray.Length ()); }
public virtual void TestUnhealthyRebooting() { RMNodeImpl node = GetUnhealthyNode(); ClusterMetrics cm = ClusterMetrics.GetMetrics(); int initialActive = cm.GetNumActiveNMs(); int initialLost = cm.GetNumLostNMs(); int initialUnhealthy = cm.GetUnhealthyNMs(); int initialDecommissioned = cm.GetNumDecommisionedNMs(); int initialRebooted = cm.GetNumRebootedNMs(); node.Handle(new RMNodeEvent(node.GetNodeID(), RMNodeEventType.Rebooting)); NUnit.Framework.Assert.AreEqual("Active Nodes", initialActive, cm.GetNumActiveNMs ()); NUnit.Framework.Assert.AreEqual("Lost Nodes", initialLost, cm.GetNumLostNMs()); NUnit.Framework.Assert.AreEqual("Unhealthy Nodes", initialUnhealthy - 1, cm.GetUnhealthyNMs ()); NUnit.Framework.Assert.AreEqual("Decommissioned Nodes", initialDecommissioned, cm .GetNumDecommisionedNMs()); NUnit.Framework.Assert.AreEqual("Rebooted Nodes", initialRebooted + 1, cm.GetNumRebootedNMs ()); NUnit.Framework.Assert.AreEqual(NodeState.Rebooted, node.GetState()); }
public virtual void SetUp() { InlineDispatcher rmDispatcher = new InlineDispatcher(); rmContext = new RMContextImpl(rmDispatcher, null, null, null, Org.Mockito.Mockito.Mock <DelegationTokenRenewer>(), null, null, null, null, null); NodesListManager nodesListManager = Org.Mockito.Mockito.Mock <NodesListManager>(); HostsFileReader reader = Org.Mockito.Mockito.Mock <HostsFileReader>(); Org.Mockito.Mockito.When(nodesListManager.GetHostsReader()).ThenReturn(reader); ((RMContextImpl)rmContext).SetNodesListManager(nodesListManager); scheduler = Org.Mockito.Mockito.Mock <YarnScheduler>(); Org.Mockito.Mockito.DoAnswer(new _Answer_115(this)).When(scheduler).Handle(Matchers.Any <SchedulerEvent>()); rmDispatcher.Register(typeof(SchedulerEventType), new TestRMNodeTransitions.TestSchedulerEventDispatcher (this)); rmDispatcher.Register(typeof(NodesListManagerEventType), new TestRMNodeTransitions.TestNodeListManagerEventDispatcher (this)); NodeId nodeId = BuilderUtils.NewNodeId("localhost", 0); node = new RMNodeImpl(nodeId, rmContext, null, 0, 0, null, null, null); nodesListManagerEvent = null; }
/// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> /// <exception cref="System.IO.IOException"/> public virtual RegisterNodeManagerResponse RegisterNodeManager(RegisterNodeManagerRequest request) { NodeId nodeId = request.GetNodeId(); string host = nodeId.GetHost(); int cmPort = nodeId.GetPort(); int httpPort = request.GetHttpPort(); Resource capability = request.GetResource(); string nodeManagerVersion = request.GetNMVersion(); RegisterNodeManagerResponse response = recordFactory.NewRecordInstance <RegisterNodeManagerResponse >(); if (!minimumNodeManagerVersion.Equals("NONE")) { if (minimumNodeManagerVersion.Equals("EqualToRM")) { minimumNodeManagerVersion = YarnVersionInfo.GetVersion(); } if ((nodeManagerVersion == null) || (VersionUtil.CompareVersions(nodeManagerVersion , minimumNodeManagerVersion)) < 0) { string message = "Disallowed NodeManager Version " + nodeManagerVersion + ", is less than the minimum version " + minimumNodeManagerVersion + " sending SHUTDOWN signal to " + "NodeManager."; Log.Info(message); response.SetDiagnosticsMessage(message); response.SetNodeAction(NodeAction.Shutdown); return(response); } } // Check if this node is a 'valid' node if (!this.nodesListManager.IsValidNode(host)) { string message = "Disallowed NodeManager from " + host + ", Sending SHUTDOWN signal to the NodeManager."; Log.Info(message); response.SetDiagnosticsMessage(message); response.SetNodeAction(NodeAction.Shutdown); return(response); } // Check if this node has minimum allocations if (capability.GetMemory() < minAllocMb || capability.GetVirtualCores() < minAllocVcores) { string message = "NodeManager from " + host + " doesn't satisfy minimum allocations, Sending SHUTDOWN" + " signal to the NodeManager."; Log.Info(message); response.SetDiagnosticsMessage(message); response.SetNodeAction(NodeAction.Shutdown); return(response); } response.SetContainerTokenMasterKey(containerTokenSecretManager.GetCurrentKey()); response.SetNMTokenMasterKey(nmTokenSecretManager.GetCurrentKey()); RMNode rmNode = new RMNodeImpl(nodeId, rmContext, host, cmPort, httpPort, Resolve (host), capability, nodeManagerVersion); RMNode oldNode = this.rmContext.GetRMNodes().PutIfAbsent(nodeId, rmNode); if (oldNode == null) { this.rmContext.GetDispatcher().GetEventHandler().Handle(new RMNodeStartedEvent(nodeId , request.GetNMContainerStatuses(), request.GetRunningApplications())); } else { Log.Info("Reconnect from the node at: " + host); this.nmLivelinessMonitor.Unregister(nodeId); // Reset heartbeat ID since node just restarted. oldNode.ResetLastNodeHeartBeatResponse(); this.rmContext.GetDispatcher().GetEventHandler().Handle(new RMNodeReconnectEvent( nodeId, rmNode, request.GetRunningApplications(), request.GetNMContainerStatuses ())); } // On every node manager register we will be clearing NMToken keys if // present for any running application. this.nmTokenSecretManager.RemoveNodeKey(nodeId); this.nmLivelinessMonitor.Register(nodeId); // Handle received container status, this should be processed after new // RMNode inserted if (!rmContext.IsWorkPreservingRecoveryEnabled()) { if (!request.GetNMContainerStatuses().IsEmpty()) { Log.Info("received container statuses on node manager register :" + request.GetNMContainerStatuses ()); foreach (NMContainerStatus status in request.GetNMContainerStatuses()) { HandleNMContainerStatus(status, nodeId); } } } string message_1 = "NodeManager from node " + host + "(cmPort: " + cmPort + " httpPort: " + httpPort + ") " + "registered with capability: " + capability + ", assigned nodeId " + nodeId; Log.Info(message_1); response.SetNodeAction(NodeAction.Normal); response.SetRMIdentifier(ResourceManager.GetClusterTimeStamp()); response.SetRMVersion(YarnVersionInfo.GetVersion()); return(response); }
/// <exception cref="System.Exception"/> public virtual void SendNodeLost(MockNM nm) { RMNodeImpl node = (RMNodeImpl)GetRMContext().GetRMNodes()[nm.GetNodeId()]; node.Handle(new RMNodeEvent(nm.GetNodeId(), RMNodeEventType.Expire)); }
/// <exception cref="System.Exception"/> public virtual void SendNodeStarted(MockNM nm) { RMNodeImpl node = (RMNodeImpl)GetRMContext().GetRMNodes()[nm.GetNodeId()]; node.Handle(new RMNodeStartedEvent(nm.GetNodeId(), null, null)); }