/// <exception cref="System.Exception"/> private void WaitForClusterMemory(MockNM nm1, ResourceScheduler rs, int clusterMemory ) { int counter = 0; while (rs.GetRootQueueMetrics().GetAllocatedMB() != clusterMemory) { nm1.NodeHeartbeat(true); Sharpen.Thread.Sleep(100); if (counter++ == 50) { NUnit.Framework.Assert.Fail("Wait for cluster memory is timed out.Expected=" + clusterMemory + " Actual=" + rs.GetRootQueueMetrics().GetAllocatedMB()); } } }
public ClusterMetricsInfo(ResourceManager rm) { // JAXB needs this ResourceScheduler rs = rm.GetResourceScheduler(); QueueMetrics metrics = rs.GetRootQueueMetrics(); ClusterMetrics clusterMetrics = ClusterMetrics.GetMetrics(); this.appsSubmitted = metrics.GetAppsSubmitted(); this.appsCompleted = metrics.GetAppsCompleted(); this.appsPending = metrics.GetAppsPending(); this.appsRunning = metrics.GetAppsRunning(); this.appsFailed = metrics.GetAppsFailed(); this.appsKilled = metrics.GetAppsKilled(); this.reservedMB = metrics.GetReservedMB(); this.availableMB = metrics.GetAvailableMB(); this.allocatedMB = metrics.GetAllocatedMB(); this.reservedVirtualCores = metrics.GetReservedVirtualCores(); this.availableVirtualCores = metrics.GetAvailableVirtualCores(); this.allocatedVirtualCores = metrics.GetAllocatedVirtualCores(); this.containersAllocated = metrics.GetAllocatedContainers(); this.containersPending = metrics.GetPendingContainers(); this.containersReserved = metrics.GetReservedContainers(); this.totalMB = availableMB + allocatedMB; this.totalVirtualCores = availableVirtualCores + allocatedVirtualCores; this.activeNodes = clusterMetrics.GetNumActiveNMs(); this.lostNodes = clusterMetrics.GetNumLostNMs(); this.unhealthyNodes = clusterMetrics.GetUnhealthyNMs(); this.decommissionedNodes = clusterMetrics.GetNumDecommisionedNMs(); this.rebootedNodes = clusterMetrics.GetNumRebootedNMs(); this.totalNodes = activeNodes + lostNodes + decommissionedNodes + rebootedNodes + unhealthyNodes; }
public UserMetricsInfo(ResourceManager rm, string user) { // JAXB needs this ResourceScheduler rs = rm.GetResourceScheduler(); QueueMetrics metrics = rs.GetRootQueueMetrics(); QueueMetrics userMetrics = metrics.GetUserMetrics(user); this.userMetricsAvailable = false; if (userMetrics != null) { this.userMetricsAvailable = true; this.appsSubmitted = userMetrics.GetAppsSubmitted(); this.appsCompleted = userMetrics.GetAppsCompleted(); this.appsPending = userMetrics.GetAppsPending(); this.appsRunning = userMetrics.GetAppsRunning(); this.appsFailed = userMetrics.GetAppsFailed(); this.appsKilled = userMetrics.GetAppsKilled(); this.runningContainers = userMetrics.GetAllocatedContainers(); this.pendingContainers = userMetrics.GetPendingContainers(); this.reservedContainers = userMetrics.GetReservedContainers(); this.reservedMB = userMetrics.GetReservedMB(); this.pendingMB = userMetrics.GetPendingMB(); this.allocatedMB = userMetrics.GetAllocatedMB(); this.reservedVirtualCores = userMetrics.GetReservedVirtualCores(); this.pendingVirtualCores = userMetrics.GetPendingVirtualCores(); this.allocatedVirtualCores = userMetrics.GetAllocatedVirtualCores(); } }
/// <exception cref="Org.Codehaus.Jettison.Json.JSONException"/> /// <exception cref="System.Exception"/> public virtual void VerifyClusterMetrics(int submittedApps, int completedApps, int reservedMB, int availableMB, int allocMB, int reservedVirtualCores, int availableVirtualCores , int allocVirtualCores, int totalVirtualCores, int containersAlloc, int totalMB , int totalNodes, int lostNodes, int unhealthyNodes, int decommissionedNodes, int rebootedNodes, int activeNodes) { ResourceScheduler rs = rm.GetResourceScheduler(); QueueMetrics metrics = rs.GetRootQueueMetrics(); ClusterMetrics clusterMetrics = ClusterMetrics.GetMetrics(); long totalMBExpect = metrics.GetAvailableMB() + metrics.GetAllocatedMB(); long totalVirtualCoresExpect = metrics.GetAvailableVirtualCores() + metrics.GetAllocatedVirtualCores (); NUnit.Framework.Assert.AreEqual("appsSubmitted doesn't match", metrics.GetAppsSubmitted (), submittedApps); NUnit.Framework.Assert.AreEqual("appsCompleted doesn't match", metrics.GetAppsCompleted (), completedApps); NUnit.Framework.Assert.AreEqual("reservedMB doesn't match", metrics.GetReservedMB (), reservedMB); NUnit.Framework.Assert.AreEqual("availableMB doesn't match", metrics.GetAvailableMB (), availableMB); NUnit.Framework.Assert.AreEqual("allocatedMB doesn't match", metrics.GetAllocatedMB (), allocMB); NUnit.Framework.Assert.AreEqual("reservedVirtualCores doesn't match", metrics.GetReservedVirtualCores (), reservedVirtualCores); NUnit.Framework.Assert.AreEqual("availableVirtualCores doesn't match", metrics.GetAvailableVirtualCores (), availableVirtualCores); NUnit.Framework.Assert.AreEqual("allocatedVirtualCores doesn't match", totalVirtualCoresExpect , allocVirtualCores); NUnit.Framework.Assert.AreEqual("containersAllocated doesn't match", 0, containersAlloc ); NUnit.Framework.Assert.AreEqual("totalMB doesn't match", totalMBExpect, totalMB); NUnit.Framework.Assert.AreEqual("totalNodes doesn't match", clusterMetrics.GetNumActiveNMs () + clusterMetrics.GetNumLostNMs() + clusterMetrics.GetNumDecommisionedNMs() + clusterMetrics.GetNumRebootedNMs() + clusterMetrics.GetUnhealthyNMs(), totalNodes ); NUnit.Framework.Assert.AreEqual("lostNodes doesn't match", clusterMetrics.GetNumLostNMs (), lostNodes); NUnit.Framework.Assert.AreEqual("unhealthyNodes doesn't match", clusterMetrics.GetUnhealthyNMs (), unhealthyNodes); NUnit.Framework.Assert.AreEqual("decommissionedNodes doesn't match", clusterMetrics .GetNumDecommisionedNMs(), decommissionedNodes); NUnit.Framework.Assert.AreEqual("rebootedNodes doesn't match", clusterMetrics.GetNumRebootedNMs (), rebootedNodes); NUnit.Framework.Assert.AreEqual("activeNodes doesn't match", clusterMetrics.GetNumActiveNMs (), activeNodes); }
// The test verifies processing of NMContainerStatuses which are sent during // NM registration. // 1. Start the cluster-RM,NM,Submit app with 1024MB,Launch & register AM // 2. AM sends ResourceRequest for 1 container with memory 2048MB. // 3. Verify for number of container allocated by RM // 4. Verify Memory Usage by cluster, it should be 3072. AM memory + requested // memory. 1024 + 2048=3072 // 5. Re-register NM by sending completed container status // 6. Verify for Memory Used, it should be 1024 // 7. Send AM heatbeat to RM. Allocated response should contain completed // container. /// <exception cref="System.Exception"/> public virtual void TestProcessingNMContainerStatusesOnNMRestart() { conf.SetInt(YarnConfiguration.RmAmMaxAttempts, 1); MemoryRMStateStore memStore = new MemoryRMStateStore(); memStore.Init(conf); // 1. Start the cluster-RM,NM,Submit app with 1024MB,Launch & register AM MockRM rm1 = new MockRM(conf, memStore); rm1.Start(); int nmMemory = 8192; int amMemory = 1024; int containerMemory = 2048; MockNM nm1 = new MockNM("127.0.0.1:1234", nmMemory, rm1.GetResourceTrackerService ()); nm1.RegisterNode(); RMApp app0 = rm1.SubmitApp(amMemory); MockAM am0 = MockRM.LaunchAndRegisterAM(app0, rm1, nm1); // 2. AM sends ResourceRequest for 1 container with memory 2048MB. int noOfContainers = 1; IList <Container> allocateContainers = am0.AllocateAndWaitForContainers(noOfContainers , containerMemory, nm1); // 3. Verify for number of container allocated by RM NUnit.Framework.Assert.AreEqual(noOfContainers, allocateContainers.Count); Container container = allocateContainers[0]; nm1.NodeHeartbeat(am0.GetApplicationAttemptId(), 1, ContainerState.Running); nm1.NodeHeartbeat(am0.GetApplicationAttemptId(), container.GetId().GetContainerId (), ContainerState.Running); rm1.WaitForState(app0.GetApplicationId(), RMAppState.Running); // 4. Verify Memory Usage by cluster, it should be 3072. AM memory + // requested memory. 1024 + 2048=3072 ResourceScheduler rs = rm1.GetRMContext().GetScheduler(); int allocatedMB = rs.GetRootQueueMetrics().GetAllocatedMB(); NUnit.Framework.Assert.AreEqual(amMemory + containerMemory, allocatedMB); // 5. Re-register NM by sending completed container status IList <NMContainerStatus> nMContainerStatusForApp = CreateNMContainerStatusForApp( am0); nm1.RegisterNode(nMContainerStatusForApp, Arrays.AsList(app0.GetApplicationId())); WaitForClusterMemory(nm1, rs, amMemory); // 6. Verify for Memory Used, it should be 1024 NUnit.Framework.Assert.AreEqual(amMemory, rs.GetRootQueueMetrics().GetAllocatedMB ()); // 7. Send AM heatbeat to RM. Allocated response should contain completed // container AllocateRequest req = AllocateRequest.NewInstance(0, 0F, new AList <ResourceRequest >(), new AList <ContainerId>(), null); AllocateResponse allocate = am0.Allocate(req); IList <ContainerStatus> completedContainersStatuses = allocate.GetCompletedContainersStatuses (); NUnit.Framework.Assert.AreEqual(noOfContainers, completedContainersStatuses.Count ); // Application clean up should happen Cluster memory used is 0 nm1.NodeHeartbeat(am0.GetApplicationAttemptId(), 1, ContainerState.Complete); WaitForClusterMemory(nm1, rs, 0); rm1.Stop(); }
/// <exception cref="Org.Apache.Hadoop.Yarn.Server.Resourcemanager.Reservation.Exceptions.PlanningException /// "/> /// <exception cref="System.Exception"/> /// <exception cref="Org.Apache.Hadoop.Security.AccessControlException"/> protected internal virtual void TestPlanFollower(bool isMove) { // Initialize plan based on move flag plan = new InMemoryPlan(scheduler.GetRootQueueMetrics(), policy, mAgent, scheduler .GetClusterResource(), 1L, res, scheduler.GetMinimumResourceCapability(), maxAlloc , "dedicated", null, isMove); // add a few reservations to the plan long ts = Runtime.CurrentTimeMillis(); ReservationId r1 = ReservationId.NewInstance(ts, 1); int[] f1 = new int[] { 10, 10, 10, 10, 10 }; NUnit.Framework.Assert.IsTrue(plan.ToString(), plan.AddReservation(new InMemoryReservationAllocation (r1, null, "u3", "dedicated", 0, 0 + f1.Length, ReservationSystemTestUtil.GenerateAllocation (0L, 1L, f1), res, minAlloc))); ReservationId r2 = ReservationId.NewInstance(ts, 2); NUnit.Framework.Assert.IsTrue(plan.ToString(), plan.AddReservation(new InMemoryReservationAllocation (r2, null, "u3", "dedicated", 3, 3 + f1.Length, ReservationSystemTestUtil.GenerateAllocation (3L, 1L, f1), res, minAlloc))); ReservationId r3 = ReservationId.NewInstance(ts, 3); int[] f2 = new int[] { 0, 10, 20, 10, 0 }; NUnit.Framework.Assert.IsTrue(plan.ToString(), plan.AddReservation(new InMemoryReservationAllocation (r3, null, "u4", "dedicated", 10, 10 + f2.Length, ReservationSystemTestUtil.GenerateAllocation (10L, 1L, f2), res, minAlloc))); AbstractSchedulerPlanFollower planFollower = CreatePlanFollower(); Org.Mockito.Mockito.When(mClock.GetTime()).ThenReturn(0L); planFollower.Run(); Queue q = GetReservationQueue(r1.ToString()); AssertReservationQueueExists(r1); // submit an app to r1 string user_0 = "test-user"; ApplicationId appId = ApplicationId.NewInstance(0, 1); ApplicationAttemptId appAttemptId_0 = ApplicationAttemptId.NewInstance(appId, 0); AppAddedSchedulerEvent addAppEvent = new AppAddedSchedulerEvent(appId, q.GetQueueName (), user_0); scheduler.Handle(addAppEvent); AppAttemptAddedSchedulerEvent appAttemptAddedEvent = new AppAttemptAddedSchedulerEvent (appAttemptId_0, false); scheduler.Handle(appAttemptAddedEvent); // initial default reservation queue should have no apps Queue defQ = GetDefaultQueue(); NUnit.Framework.Assert.AreEqual(0, GetNumberOfApplications(defQ)); AssertReservationQueueExists(r1, 0.1, 0.1); NUnit.Framework.Assert.AreEqual(1, GetNumberOfApplications(q)); AssertReservationQueueDoesNotExist(r2); AssertReservationQueueDoesNotExist(r3); Org.Mockito.Mockito.When(mClock.GetTime()).ThenReturn(3L); planFollower.Run(); NUnit.Framework.Assert.AreEqual(0, GetNumberOfApplications(defQ)); AssertReservationQueueExists(r1, 0.1, 0.1); NUnit.Framework.Assert.AreEqual(1, GetNumberOfApplications(q)); AssertReservationQueueExists(r2, 0.1, 0.1); AssertReservationQueueDoesNotExist(r3); Org.Mockito.Mockito.When(mClock.GetTime()).ThenReturn(10L); planFollower.Run(); q = GetReservationQueue(r1.ToString()); if (isMove) { // app should have been moved to default reservation queue NUnit.Framework.Assert.AreEqual(1, GetNumberOfApplications(defQ)); NUnit.Framework.Assert.IsNull(q); } else { // app should be killed NUnit.Framework.Assert.AreEqual(0, GetNumberOfApplications(defQ)); NUnit.Framework.Assert.IsNotNull(q); AppAttemptRemovedSchedulerEvent appAttemptRemovedEvent = new AppAttemptRemovedSchedulerEvent (appAttemptId_0, RMAppAttemptState.Killed, false); scheduler.Handle(appAttemptRemovedEvent); } AssertReservationQueueDoesNotExist(r2); AssertReservationQueueExists(r3, 0, 1.0); Org.Mockito.Mockito.When(mClock.GetTime()).ThenReturn(11L); planFollower.Run(); if (isMove) { // app should have been moved to default reservation queue NUnit.Framework.Assert.AreEqual(1, GetNumberOfApplications(defQ)); } else { // app should be killed NUnit.Framework.Assert.AreEqual(0, GetNumberOfApplications(defQ)); } AssertReservationQueueDoesNotExist(r1); AssertReservationQueueDoesNotExist(r2); AssertReservationQueueExists(r3, 0.1, 0.1); Org.Mockito.Mockito.When(mClock.GetTime()).ThenReturn(12L); planFollower.Run(); AssertReservationQueueDoesNotExist(r1); AssertReservationQueueDoesNotExist(r2); AssertReservationQueueExists(r3, 0.2, 0.2); Org.Mockito.Mockito.When(mClock.GetTime()).ThenReturn(16L); planFollower.Run(); AssertReservationQueueDoesNotExist(r1); AssertReservationQueueDoesNotExist(r2); AssertReservationQueueDoesNotExist(r3); VerifyCapacity(defQ); }