Ejemplo n.º 1
0
        /// <exception cref="System.Exception"/>
        private void WaitForClusterMemory(MockNM nm1, ResourceScheduler rs, int clusterMemory
                                          )
        {
            int counter = 0;

            while (rs.GetRootQueueMetrics().GetAllocatedMB() != clusterMemory)
            {
                nm1.NodeHeartbeat(true);
                Sharpen.Thread.Sleep(100);
                if (counter++ == 50)
                {
                    NUnit.Framework.Assert.Fail("Wait for cluster memory is timed out.Expected=" + clusterMemory
                                                + " Actual=" + rs.GetRootQueueMetrics().GetAllocatedMB());
                }
            }
        }
Ejemplo n.º 2
0
        public ClusterMetricsInfo(ResourceManager rm)
        {
            // JAXB needs this
            ResourceScheduler rs             = rm.GetResourceScheduler();
            QueueMetrics      metrics        = rs.GetRootQueueMetrics();
            ClusterMetrics    clusterMetrics = ClusterMetrics.GetMetrics();

            this.appsSubmitted         = metrics.GetAppsSubmitted();
            this.appsCompleted         = metrics.GetAppsCompleted();
            this.appsPending           = metrics.GetAppsPending();
            this.appsRunning           = metrics.GetAppsRunning();
            this.appsFailed            = metrics.GetAppsFailed();
            this.appsKilled            = metrics.GetAppsKilled();
            this.reservedMB            = metrics.GetReservedMB();
            this.availableMB           = metrics.GetAvailableMB();
            this.allocatedMB           = metrics.GetAllocatedMB();
            this.reservedVirtualCores  = metrics.GetReservedVirtualCores();
            this.availableVirtualCores = metrics.GetAvailableVirtualCores();
            this.allocatedVirtualCores = metrics.GetAllocatedVirtualCores();
            this.containersAllocated   = metrics.GetAllocatedContainers();
            this.containersPending     = metrics.GetPendingContainers();
            this.containersReserved    = metrics.GetReservedContainers();
            this.totalMB             = availableMB + allocatedMB;
            this.totalVirtualCores   = availableVirtualCores + allocatedVirtualCores;
            this.activeNodes         = clusterMetrics.GetNumActiveNMs();
            this.lostNodes           = clusterMetrics.GetNumLostNMs();
            this.unhealthyNodes      = clusterMetrics.GetUnhealthyNMs();
            this.decommissionedNodes = clusterMetrics.GetNumDecommisionedNMs();
            this.rebootedNodes       = clusterMetrics.GetNumRebootedNMs();
            this.totalNodes          = activeNodes + lostNodes + decommissionedNodes + rebootedNodes +
                                       unhealthyNodes;
        }
Ejemplo n.º 3
0
        public UserMetricsInfo(ResourceManager rm, string user)
        {
            // JAXB needs this
            ResourceScheduler rs          = rm.GetResourceScheduler();
            QueueMetrics      metrics     = rs.GetRootQueueMetrics();
            QueueMetrics      userMetrics = metrics.GetUserMetrics(user);

            this.userMetricsAvailable = false;
            if (userMetrics != null)
            {
                this.userMetricsAvailable  = true;
                this.appsSubmitted         = userMetrics.GetAppsSubmitted();
                this.appsCompleted         = userMetrics.GetAppsCompleted();
                this.appsPending           = userMetrics.GetAppsPending();
                this.appsRunning           = userMetrics.GetAppsRunning();
                this.appsFailed            = userMetrics.GetAppsFailed();
                this.appsKilled            = userMetrics.GetAppsKilled();
                this.runningContainers     = userMetrics.GetAllocatedContainers();
                this.pendingContainers     = userMetrics.GetPendingContainers();
                this.reservedContainers    = userMetrics.GetReservedContainers();
                this.reservedMB            = userMetrics.GetReservedMB();
                this.pendingMB             = userMetrics.GetPendingMB();
                this.allocatedMB           = userMetrics.GetAllocatedMB();
                this.reservedVirtualCores  = userMetrics.GetReservedVirtualCores();
                this.pendingVirtualCores   = userMetrics.GetPendingVirtualCores();
                this.allocatedVirtualCores = userMetrics.GetAllocatedVirtualCores();
            }
        }
Ejemplo n.º 4
0
        /// <exception cref="Org.Codehaus.Jettison.Json.JSONException"/>
        /// <exception cref="System.Exception"/>
        public virtual void VerifyClusterMetrics(int submittedApps, int completedApps, int
                                                 reservedMB, int availableMB, int allocMB, int reservedVirtualCores, int availableVirtualCores
                                                 , int allocVirtualCores, int totalVirtualCores, int containersAlloc, int totalMB
                                                 , int totalNodes, int lostNodes, int unhealthyNodes, int decommissionedNodes, int
                                                 rebootedNodes, int activeNodes)
        {
            ResourceScheduler rs             = rm.GetResourceScheduler();
            QueueMetrics      metrics        = rs.GetRootQueueMetrics();
            ClusterMetrics    clusterMetrics = ClusterMetrics.GetMetrics();
            long totalMBExpect           = metrics.GetAvailableMB() + metrics.GetAllocatedMB();
            long totalVirtualCoresExpect = metrics.GetAvailableVirtualCores() + metrics.GetAllocatedVirtualCores
                                               ();

            NUnit.Framework.Assert.AreEqual("appsSubmitted doesn't match", metrics.GetAppsSubmitted
                                                (), submittedApps);
            NUnit.Framework.Assert.AreEqual("appsCompleted doesn't match", metrics.GetAppsCompleted
                                                (), completedApps);
            NUnit.Framework.Assert.AreEqual("reservedMB doesn't match", metrics.GetReservedMB
                                                (), reservedMB);
            NUnit.Framework.Assert.AreEqual("availableMB doesn't match", metrics.GetAvailableMB
                                                (), availableMB);
            NUnit.Framework.Assert.AreEqual("allocatedMB doesn't match", metrics.GetAllocatedMB
                                                (), allocMB);
            NUnit.Framework.Assert.AreEqual("reservedVirtualCores doesn't match", metrics.GetReservedVirtualCores
                                                (), reservedVirtualCores);
            NUnit.Framework.Assert.AreEqual("availableVirtualCores doesn't match", metrics.GetAvailableVirtualCores
                                                (), availableVirtualCores);
            NUnit.Framework.Assert.AreEqual("allocatedVirtualCores doesn't match", totalVirtualCoresExpect
                                            , allocVirtualCores);
            NUnit.Framework.Assert.AreEqual("containersAllocated doesn't match", 0, containersAlloc
                                            );
            NUnit.Framework.Assert.AreEqual("totalMB doesn't match", totalMBExpect, totalMB);
            NUnit.Framework.Assert.AreEqual("totalNodes doesn't match", clusterMetrics.GetNumActiveNMs
                                                () + clusterMetrics.GetNumLostNMs() + clusterMetrics.GetNumDecommisionedNMs() +
                                            clusterMetrics.GetNumRebootedNMs() + clusterMetrics.GetUnhealthyNMs(), totalNodes
                                            );
            NUnit.Framework.Assert.AreEqual("lostNodes doesn't match", clusterMetrics.GetNumLostNMs
                                                (), lostNodes);
            NUnit.Framework.Assert.AreEqual("unhealthyNodes doesn't match", clusterMetrics.GetUnhealthyNMs
                                                (), unhealthyNodes);
            NUnit.Framework.Assert.AreEqual("decommissionedNodes doesn't match", clusterMetrics
                                            .GetNumDecommisionedNMs(), decommissionedNodes);
            NUnit.Framework.Assert.AreEqual("rebootedNodes doesn't match", clusterMetrics.GetNumRebootedNMs
                                                (), rebootedNodes);
            NUnit.Framework.Assert.AreEqual("activeNodes doesn't match", clusterMetrics.GetNumActiveNMs
                                                (), activeNodes);
        }
Ejemplo n.º 5
0
        // The test verifies processing of NMContainerStatuses which are sent during
        // NM registration.
        // 1. Start the cluster-RM,NM,Submit app with 1024MB,Launch & register AM
        // 2. AM sends ResourceRequest for 1 container with memory 2048MB.
        // 3. Verify for number of container allocated by RM
        // 4. Verify Memory Usage by cluster, it should be 3072. AM memory + requested
        // memory. 1024 + 2048=3072
        // 5. Re-register NM by sending completed container status
        // 6. Verify for Memory Used, it should be 1024
        // 7. Send AM heatbeat to RM. Allocated response should contain completed
        // container.
        /// <exception cref="System.Exception"/>
        public virtual void TestProcessingNMContainerStatusesOnNMRestart()
        {
            conf.SetInt(YarnConfiguration.RmAmMaxAttempts, 1);
            MemoryRMStateStore memStore = new MemoryRMStateStore();

            memStore.Init(conf);
            // 1. Start the cluster-RM,NM,Submit app with 1024MB,Launch & register AM
            MockRM rm1 = new MockRM(conf, memStore);

            rm1.Start();
            int    nmMemory        = 8192;
            int    amMemory        = 1024;
            int    containerMemory = 2048;
            MockNM nm1             = new MockNM("127.0.0.1:1234", nmMemory, rm1.GetResourceTrackerService
                                                    ());

            nm1.RegisterNode();
            RMApp  app0 = rm1.SubmitApp(amMemory);
            MockAM am0  = MockRM.LaunchAndRegisterAM(app0, rm1, nm1);
            // 2. AM sends ResourceRequest for 1 container with memory 2048MB.
            int noOfContainers = 1;
            IList <Container> allocateContainers = am0.AllocateAndWaitForContainers(noOfContainers
                                                                                    , containerMemory, nm1);

            // 3. Verify for number of container allocated by RM
            NUnit.Framework.Assert.AreEqual(noOfContainers, allocateContainers.Count);
            Container container = allocateContainers[0];

            nm1.NodeHeartbeat(am0.GetApplicationAttemptId(), 1, ContainerState.Running);
            nm1.NodeHeartbeat(am0.GetApplicationAttemptId(), container.GetId().GetContainerId
                                  (), ContainerState.Running);
            rm1.WaitForState(app0.GetApplicationId(), RMAppState.Running);
            // 4. Verify Memory Usage by cluster, it should be 3072. AM memory +
            // requested memory. 1024 + 2048=3072
            ResourceScheduler rs = rm1.GetRMContext().GetScheduler();
            int allocatedMB      = rs.GetRootQueueMetrics().GetAllocatedMB();

            NUnit.Framework.Assert.AreEqual(amMemory + containerMemory, allocatedMB);
            // 5. Re-register NM by sending completed container status
            IList <NMContainerStatus> nMContainerStatusForApp = CreateNMContainerStatusForApp(
                am0);

            nm1.RegisterNode(nMContainerStatusForApp, Arrays.AsList(app0.GetApplicationId()));
            WaitForClusterMemory(nm1, rs, amMemory);
            // 6. Verify for Memory Used, it should be 1024
            NUnit.Framework.Assert.AreEqual(amMemory, rs.GetRootQueueMetrics().GetAllocatedMB
                                                ());
            // 7. Send AM heatbeat to RM. Allocated response should contain completed
            // container
            AllocateRequest req = AllocateRequest.NewInstance(0, 0F, new AList <ResourceRequest
                                                                                >(), new AList <ContainerId>(), null);
            AllocateResponse        allocate = am0.Allocate(req);
            IList <ContainerStatus> completedContainersStatuses = allocate.GetCompletedContainersStatuses
                                                                      ();

            NUnit.Framework.Assert.AreEqual(noOfContainers, completedContainersStatuses.Count
                                            );
            // Application clean up should happen Cluster memory used is 0
            nm1.NodeHeartbeat(am0.GetApplicationAttemptId(), 1, ContainerState.Complete);
            WaitForClusterMemory(nm1, rs, 0);
            rm1.Stop();
        }
        /// <exception cref="Org.Apache.Hadoop.Yarn.Server.Resourcemanager.Reservation.Exceptions.PlanningException
        ///     "/>
        /// <exception cref="System.Exception"/>
        /// <exception cref="Org.Apache.Hadoop.Security.AccessControlException"/>
        protected internal virtual void TestPlanFollower(bool isMove)
        {
            // Initialize plan based on move flag
            plan = new InMemoryPlan(scheduler.GetRootQueueMetrics(), policy, mAgent, scheduler
                                    .GetClusterResource(), 1L, res, scheduler.GetMinimumResourceCapability(), maxAlloc
                                    , "dedicated", null, isMove);
            // add a few reservations to the plan
            long          ts = Runtime.CurrentTimeMillis();
            ReservationId r1 = ReservationId.NewInstance(ts, 1);

            int[] f1 = new int[] { 10, 10, 10, 10, 10 };
            NUnit.Framework.Assert.IsTrue(plan.ToString(), plan.AddReservation(new InMemoryReservationAllocation
                                                                                   (r1, null, "u3", "dedicated", 0, 0 + f1.Length, ReservationSystemTestUtil.GenerateAllocation
                                                                                       (0L, 1L, f1), res, minAlloc)));
            ReservationId r2 = ReservationId.NewInstance(ts, 2);

            NUnit.Framework.Assert.IsTrue(plan.ToString(), plan.AddReservation(new InMemoryReservationAllocation
                                                                                   (r2, null, "u3", "dedicated", 3, 3 + f1.Length, ReservationSystemTestUtil.GenerateAllocation
                                                                                       (3L, 1L, f1), res, minAlloc)));
            ReservationId r3 = ReservationId.NewInstance(ts, 3);

            int[] f2 = new int[] { 0, 10, 20, 10, 0 };
            NUnit.Framework.Assert.IsTrue(plan.ToString(), plan.AddReservation(new InMemoryReservationAllocation
                                                                                   (r3, null, "u4", "dedicated", 10, 10 + f2.Length, ReservationSystemTestUtil.GenerateAllocation
                                                                                       (10L, 1L, f2), res, minAlloc)));
            AbstractSchedulerPlanFollower planFollower = CreatePlanFollower();

            Org.Mockito.Mockito.When(mClock.GetTime()).ThenReturn(0L);
            planFollower.Run();
            Queue q = GetReservationQueue(r1.ToString());

            AssertReservationQueueExists(r1);
            // submit an app to r1
            string                 user_0         = "test-user";
            ApplicationId          appId          = ApplicationId.NewInstance(0, 1);
            ApplicationAttemptId   appAttemptId_0 = ApplicationAttemptId.NewInstance(appId, 0);
            AppAddedSchedulerEvent addAppEvent    = new AppAddedSchedulerEvent(appId, q.GetQueueName
                                                                                   (), user_0);

            scheduler.Handle(addAppEvent);
            AppAttemptAddedSchedulerEvent appAttemptAddedEvent = new AppAttemptAddedSchedulerEvent
                                                                     (appAttemptId_0, false);

            scheduler.Handle(appAttemptAddedEvent);
            // initial default reservation queue should have no apps
            Queue defQ = GetDefaultQueue();

            NUnit.Framework.Assert.AreEqual(0, GetNumberOfApplications(defQ));
            AssertReservationQueueExists(r1, 0.1, 0.1);
            NUnit.Framework.Assert.AreEqual(1, GetNumberOfApplications(q));
            AssertReservationQueueDoesNotExist(r2);
            AssertReservationQueueDoesNotExist(r3);
            Org.Mockito.Mockito.When(mClock.GetTime()).ThenReturn(3L);
            planFollower.Run();
            NUnit.Framework.Assert.AreEqual(0, GetNumberOfApplications(defQ));
            AssertReservationQueueExists(r1, 0.1, 0.1);
            NUnit.Framework.Assert.AreEqual(1, GetNumberOfApplications(q));
            AssertReservationQueueExists(r2, 0.1, 0.1);
            AssertReservationQueueDoesNotExist(r3);
            Org.Mockito.Mockito.When(mClock.GetTime()).ThenReturn(10L);
            planFollower.Run();
            q = GetReservationQueue(r1.ToString());
            if (isMove)
            {
                // app should have been moved to default reservation queue
                NUnit.Framework.Assert.AreEqual(1, GetNumberOfApplications(defQ));
                NUnit.Framework.Assert.IsNull(q);
            }
            else
            {
                // app should be killed
                NUnit.Framework.Assert.AreEqual(0, GetNumberOfApplications(defQ));
                NUnit.Framework.Assert.IsNotNull(q);
                AppAttemptRemovedSchedulerEvent appAttemptRemovedEvent = new AppAttemptRemovedSchedulerEvent
                                                                             (appAttemptId_0, RMAppAttemptState.Killed, false);
                scheduler.Handle(appAttemptRemovedEvent);
            }
            AssertReservationQueueDoesNotExist(r2);
            AssertReservationQueueExists(r3, 0, 1.0);
            Org.Mockito.Mockito.When(mClock.GetTime()).ThenReturn(11L);
            planFollower.Run();
            if (isMove)
            {
                // app should have been moved to default reservation queue
                NUnit.Framework.Assert.AreEqual(1, GetNumberOfApplications(defQ));
            }
            else
            {
                // app should be killed
                NUnit.Framework.Assert.AreEqual(0, GetNumberOfApplications(defQ));
            }
            AssertReservationQueueDoesNotExist(r1);
            AssertReservationQueueDoesNotExist(r2);
            AssertReservationQueueExists(r3, 0.1, 0.1);
            Org.Mockito.Mockito.When(mClock.GetTime()).ThenReturn(12L);
            planFollower.Run();
            AssertReservationQueueDoesNotExist(r1);
            AssertReservationQueueDoesNotExist(r2);
            AssertReservationQueueExists(r3, 0.2, 0.2);
            Org.Mockito.Mockito.When(mClock.GetTime()).ThenReturn(16L);
            planFollower.Run();
            AssertReservationQueueDoesNotExist(r1);
            AssertReservationQueueDoesNotExist(r2);
            AssertReservationQueueDoesNotExist(r3);
            VerifyCapacity(defQ);
        }