public ClusterMetricsInfo(ResourceManager rm) { // JAXB needs this ResourceScheduler rs = rm.GetResourceScheduler(); QueueMetrics metrics = rs.GetRootQueueMetrics(); ClusterMetrics clusterMetrics = ClusterMetrics.GetMetrics(); this.appsSubmitted = metrics.GetAppsSubmitted(); this.appsCompleted = metrics.GetAppsCompleted(); this.appsPending = metrics.GetAppsPending(); this.appsRunning = metrics.GetAppsRunning(); this.appsFailed = metrics.GetAppsFailed(); this.appsKilled = metrics.GetAppsKilled(); this.reservedMB = metrics.GetReservedMB(); this.availableMB = metrics.GetAvailableMB(); this.allocatedMB = metrics.GetAllocatedMB(); this.reservedVirtualCores = metrics.GetReservedVirtualCores(); this.availableVirtualCores = metrics.GetAvailableVirtualCores(); this.allocatedVirtualCores = metrics.GetAllocatedVirtualCores(); this.containersAllocated = metrics.GetAllocatedContainers(); this.containersPending = metrics.GetPendingContainers(); this.containersReserved = metrics.GetReservedContainers(); this.totalMB = availableMB + allocatedMB; this.totalVirtualCores = availableVirtualCores + allocatedVirtualCores; this.activeNodes = clusterMetrics.GetNumActiveNMs(); this.lostNodes = clusterMetrics.GetNumLostNMs(); this.unhealthyNodes = clusterMetrics.GetUnhealthyNMs(); this.decommissionedNodes = clusterMetrics.GetNumDecommisionedNMs(); this.rebootedNodes = clusterMetrics.GetNumRebootedNMs(); this.totalNodes = activeNodes + lostNodes + decommissionedNodes + rebootedNodes + unhealthyNodes; }
/// <exception cref="System.Exception"/> private void VerifyClusterMetrics(int activeNodes, int appsSubmitted, int appsPending , int containersPending, int availableMB, int activeApplications) { int timeoutSecs = 0; QueueMetrics metrics = rm.GetResourceScheduler().GetRootQueueMetrics(); ClusterMetrics clusterMetrics = ClusterMetrics.GetMetrics(); bool isAllMetricAssertionDone = false; string message = null; while (timeoutSecs++ < 5) { try { // verify queue metrics AssertMetric("appsSubmitted", appsSubmitted, metrics.GetAppsSubmitted()); AssertMetric("appsPending", appsPending, metrics.GetAppsPending()); AssertMetric("containersPending", containersPending, metrics.GetPendingContainers ()); AssertMetric("availableMB", availableMB, metrics.GetAvailableMB()); AssertMetric("activeApplications", activeApplications, metrics.GetActiveApps()); // verify node metric AssertMetric("activeNodes", activeNodes, clusterMetrics.GetNumActiveNMs()); isAllMetricAssertionDone = true; break; } catch (Exception e) { message = e.Message; System.Console.Out.WriteLine("Waiting for metrics assertion to complete"); Sharpen.Thread.Sleep(1000); } } NUnit.Framework.Assert.IsTrue(message, isAllMetricAssertionDone); }
/// <exception cref="Org.Codehaus.Jettison.Json.JSONException"/> /// <exception cref="System.Exception"/> public virtual void VerifyClusterMetrics(int submittedApps, int completedApps, int reservedMB, int availableMB, int allocMB, int reservedVirtualCores, int availableVirtualCores , int allocVirtualCores, int totalVirtualCores, int containersAlloc, int totalMB , int totalNodes, int lostNodes, int unhealthyNodes, int decommissionedNodes, int rebootedNodes, int activeNodes) { ResourceScheduler rs = rm.GetResourceScheduler(); QueueMetrics metrics = rs.GetRootQueueMetrics(); ClusterMetrics clusterMetrics = ClusterMetrics.GetMetrics(); long totalMBExpect = metrics.GetAvailableMB() + metrics.GetAllocatedMB(); long totalVirtualCoresExpect = metrics.GetAvailableVirtualCores() + metrics.GetAllocatedVirtualCores (); NUnit.Framework.Assert.AreEqual("appsSubmitted doesn't match", metrics.GetAppsSubmitted (), submittedApps); NUnit.Framework.Assert.AreEqual("appsCompleted doesn't match", metrics.GetAppsCompleted (), completedApps); NUnit.Framework.Assert.AreEqual("reservedMB doesn't match", metrics.GetReservedMB (), reservedMB); NUnit.Framework.Assert.AreEqual("availableMB doesn't match", metrics.GetAvailableMB (), availableMB); NUnit.Framework.Assert.AreEqual("allocatedMB doesn't match", metrics.GetAllocatedMB (), allocMB); NUnit.Framework.Assert.AreEqual("reservedVirtualCores doesn't match", metrics.GetReservedVirtualCores (), reservedVirtualCores); NUnit.Framework.Assert.AreEqual("availableVirtualCores doesn't match", metrics.GetAvailableVirtualCores (), availableVirtualCores); NUnit.Framework.Assert.AreEqual("allocatedVirtualCores doesn't match", totalVirtualCoresExpect , allocVirtualCores); NUnit.Framework.Assert.AreEqual("containersAllocated doesn't match", 0, containersAlloc ); NUnit.Framework.Assert.AreEqual("totalMB doesn't match", totalMBExpect, totalMB); NUnit.Framework.Assert.AreEqual("totalNodes doesn't match", clusterMetrics.GetNumActiveNMs () + clusterMetrics.GetNumLostNMs() + clusterMetrics.GetNumDecommisionedNMs() + clusterMetrics.GetNumRebootedNMs() + clusterMetrics.GetUnhealthyNMs(), totalNodes ); NUnit.Framework.Assert.AreEqual("lostNodes doesn't match", clusterMetrics.GetNumLostNMs (), lostNodes); NUnit.Framework.Assert.AreEqual("unhealthyNodes doesn't match", clusterMetrics.GetUnhealthyNMs (), unhealthyNodes); NUnit.Framework.Assert.AreEqual("decommissionedNodes doesn't match", clusterMetrics .GetNumDecommisionedNMs(), decommissionedNodes); NUnit.Framework.Assert.AreEqual("rebootedNodes doesn't match", clusterMetrics.GetNumRebootedNMs (), rebootedNodes); NUnit.Framework.Assert.AreEqual("activeNodes doesn't match", clusterMetrics.GetNumActiveNMs (), activeNodes); }
public virtual void TestReconnectNode() { DrainDispatcher dispatcher = new DrainDispatcher(); rm = new _MockRM_567(this, dispatcher); rm.Start(); MockNM nm1 = rm.RegisterNode("host1:1234", 5120); MockNM nm2 = rm.RegisterNode("host2:5678", 5120); nm1.NodeHeartbeat(true); nm2.NodeHeartbeat(false); dispatcher.Await(); CheckUnealthyNMCount(rm, nm2, true, 1); int expectedNMs = ClusterMetrics.GetMetrics().GetNumActiveNMs(); QueueMetrics metrics = rm.GetResourceScheduler().GetRootQueueMetrics(); // TODO Metrics incorrect in case of the FifoScheduler NUnit.Framework.Assert.AreEqual(5120, metrics.GetAvailableMB()); // reconnect of healthy node nm1 = rm.RegisterNode("host1:1234", 5120); NodeHeartbeatResponse response = nm1.NodeHeartbeat(true); NUnit.Framework.Assert.IsTrue(NodeAction.Normal.Equals(response.GetNodeAction())); dispatcher.Await(); NUnit.Framework.Assert.AreEqual(expectedNMs, ClusterMetrics.GetMetrics().GetNumActiveNMs ()); CheckUnealthyNMCount(rm, nm2, true, 1); // reconnect of unhealthy node nm2 = rm.RegisterNode("host2:5678", 5120); response = nm2.NodeHeartbeat(false); NUnit.Framework.Assert.IsTrue(NodeAction.Normal.Equals(response.GetNodeAction())); dispatcher.Await(); NUnit.Framework.Assert.AreEqual(expectedNMs, ClusterMetrics.GetMetrics().GetNumActiveNMs ()); CheckUnealthyNMCount(rm, nm2, true, 1); // unhealthy node changed back to healthy nm2 = rm.RegisterNode("host2:5678", 5120); dispatcher.Await(); response = nm2.NodeHeartbeat(true); response = nm2.NodeHeartbeat(true); dispatcher.Await(); NUnit.Framework.Assert.AreEqual(5120 + 5120, metrics.GetAvailableMB()); // reconnect of node with changed capability nm1 = rm.RegisterNode("host2:5678", 10240); dispatcher.Await(); response = nm1.NodeHeartbeat(true); dispatcher.Await(); NUnit.Framework.Assert.IsTrue(NodeAction.Normal.Equals(response.GetNodeAction())); NUnit.Framework.Assert.AreEqual(5120 + 10240, metrics.GetAvailableMB()); // reconnect of node with changed capability and running applications IList <ApplicationId> runningApps = new AList <ApplicationId>(); runningApps.AddItem(ApplicationId.NewInstance(1, 0)); nm1 = rm.RegisterNode("host2:5678", 15360, 2, runningApps); dispatcher.Await(); response = nm1.NodeHeartbeat(true); dispatcher.Await(); NUnit.Framework.Assert.IsTrue(NodeAction.Normal.Equals(response.GetNodeAction())); NUnit.Framework.Assert.AreEqual(5120 + 15360, metrics.GetAvailableMB()); // reconnect healthy node changing http port nm1 = new MockNM("host1:1234", 5120, rm.GetResourceTrackerService()); nm1.SetHttpPort(3); nm1.RegisterNode(); dispatcher.Await(); response = nm1.NodeHeartbeat(true); response = nm1.NodeHeartbeat(true); dispatcher.Await(); RMNode rmNode = rm.GetRMContext().GetRMNodes()[nm1.GetNodeId()]; NUnit.Framework.Assert.AreEqual(3, rmNode.GetHttpPort()); NUnit.Framework.Assert.AreEqual(5120, rmNode.GetTotalCapability().GetMemory()); NUnit.Framework.Assert.AreEqual(5120 + 15360, metrics.GetAvailableMB()); }