예제 #1
0
        public ClusterMetricsInfo(ResourceManager rm)
        {
            // JAXB needs this
            ResourceScheduler rs             = rm.GetResourceScheduler();
            QueueMetrics      metrics        = rs.GetRootQueueMetrics();
            ClusterMetrics    clusterMetrics = ClusterMetrics.GetMetrics();

            this.appsSubmitted         = metrics.GetAppsSubmitted();
            this.appsCompleted         = metrics.GetAppsCompleted();
            this.appsPending           = metrics.GetAppsPending();
            this.appsRunning           = metrics.GetAppsRunning();
            this.appsFailed            = metrics.GetAppsFailed();
            this.appsKilled            = metrics.GetAppsKilled();
            this.reservedMB            = metrics.GetReservedMB();
            this.availableMB           = metrics.GetAvailableMB();
            this.allocatedMB           = metrics.GetAllocatedMB();
            this.reservedVirtualCores  = metrics.GetReservedVirtualCores();
            this.availableVirtualCores = metrics.GetAvailableVirtualCores();
            this.allocatedVirtualCores = metrics.GetAllocatedVirtualCores();
            this.containersAllocated   = metrics.GetAllocatedContainers();
            this.containersPending     = metrics.GetPendingContainers();
            this.containersReserved    = metrics.GetReservedContainers();
            this.totalMB             = availableMB + allocatedMB;
            this.totalVirtualCores   = availableVirtualCores + allocatedVirtualCores;
            this.activeNodes         = clusterMetrics.GetNumActiveNMs();
            this.lostNodes           = clusterMetrics.GetNumLostNMs();
            this.unhealthyNodes      = clusterMetrics.GetUnhealthyNMs();
            this.decommissionedNodes = clusterMetrics.GetNumDecommisionedNMs();
            this.rebootedNodes       = clusterMetrics.GetNumRebootedNMs();
            this.totalNodes          = activeNodes + lostNodes + decommissionedNodes + rebootedNodes +
                                       unhealthyNodes;
        }
예제 #2
0
        /// <exception cref="System.Exception"/>
        private void VerifyClusterMetrics(int activeNodes, int appsSubmitted, int appsPending
                                          , int containersPending, int availableMB, int activeApplications)
        {
            int            timeoutSecs              = 0;
            QueueMetrics   metrics                  = rm.GetResourceScheduler().GetRootQueueMetrics();
            ClusterMetrics clusterMetrics           = ClusterMetrics.GetMetrics();
            bool           isAllMetricAssertionDone = false;
            string         message                  = null;

            while (timeoutSecs++ < 5)
            {
                try
                {
                    // verify queue metrics
                    AssertMetric("appsSubmitted", appsSubmitted, metrics.GetAppsSubmitted());
                    AssertMetric("appsPending", appsPending, metrics.GetAppsPending());
                    AssertMetric("containersPending", containersPending, metrics.GetPendingContainers
                                     ());
                    AssertMetric("availableMB", availableMB, metrics.GetAvailableMB());
                    AssertMetric("activeApplications", activeApplications, metrics.GetActiveApps());
                    // verify node metric
                    AssertMetric("activeNodes", activeNodes, clusterMetrics.GetNumActiveNMs());
                    isAllMetricAssertionDone = true;
                    break;
                }
                catch (Exception e)
                {
                    message = e.Message;
                    System.Console.Out.WriteLine("Waiting for metrics assertion to complete");
                    Sharpen.Thread.Sleep(1000);
                }
            }
            NUnit.Framework.Assert.IsTrue(message, isAllMetricAssertionDone);
        }
예제 #3
0
        public UserMetricsInfo(ResourceManager rm, string user)
        {
            // JAXB needs this
            ResourceScheduler rs          = rm.GetResourceScheduler();
            QueueMetrics      metrics     = rs.GetRootQueueMetrics();
            QueueMetrics      userMetrics = metrics.GetUserMetrics(user);

            this.userMetricsAvailable = false;
            if (userMetrics != null)
            {
                this.userMetricsAvailable  = true;
                this.appsSubmitted         = userMetrics.GetAppsSubmitted();
                this.appsCompleted         = userMetrics.GetAppsCompleted();
                this.appsPending           = userMetrics.GetAppsPending();
                this.appsRunning           = userMetrics.GetAppsRunning();
                this.appsFailed            = userMetrics.GetAppsFailed();
                this.appsKilled            = userMetrics.GetAppsKilled();
                this.runningContainers     = userMetrics.GetAllocatedContainers();
                this.pendingContainers     = userMetrics.GetPendingContainers();
                this.reservedContainers    = userMetrics.GetReservedContainers();
                this.reservedMB            = userMetrics.GetReservedMB();
                this.pendingMB             = userMetrics.GetPendingMB();
                this.allocatedMB           = userMetrics.GetAllocatedMB();
                this.reservedVirtualCores  = userMetrics.GetReservedVirtualCores();
                this.pendingVirtualCores   = userMetrics.GetPendingVirtualCores();
                this.allocatedVirtualCores = userMetrics.GetAllocatedVirtualCores();
            }
        }
예제 #4
0
        /// <summary>Validate killing an application when it is at accepted state.</summary>
        /// <exception cref="System.Exception">exception</exception>
        public virtual void TestApplicationKillAtAcceptedState()
        {
            Dispatcher dispatcher = new _AsyncDispatcher_573();
            MockRM     rm         = new _MockRM_596(dispatcher, conf);
            // test metrics
            QueueMetrics metrics       = rm.GetResourceScheduler().GetRootQueueMetrics();
            int          appsKilled    = metrics.GetAppsKilled();
            int          appsSubmitted = metrics.GetAppsSubmitted();

            rm.Start();
            MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm.GetResourceTrackerService());

            nm1.RegisterNode();
            // a failed app
            RMApp  application = rm.SubmitApp(200);
            MockAM am          = MockRM.LaunchAM(application, rm, nm1);

            am.WaitForState(RMAppAttemptState.Launched);
            nm1.NodeHeartbeat(am.GetApplicationAttemptId(), 1, ContainerState.Running);
            rm.WaitForState(application.GetApplicationId(), RMAppState.Accepted);
            // Now kill the application before new attempt is launched, the app report
            // returns the invalid AM host and port.
            KillApplicationRequest request = KillApplicationRequest.NewInstance(application.GetApplicationId
                                                                                    ());

            rm.GetClientRMService().ForceKillApplication(request);
            // Specific test for YARN-1689 follows
            // Now let's say a race causes AM to register now. This should not crash RM.
            am.RegisterAppAttempt(false);
            // We explicitly intercepted the kill-event to RMAppAttempt, so app should
            // still be in KILLING state.
            rm.WaitForState(application.GetApplicationId(), RMAppState.Killing);
            // AM should now be in running
            rm.WaitForState(am.GetApplicationAttemptId(), RMAppAttemptState.Running);
            // Simulate that appAttempt is killed.
            rm.GetRMContext().GetDispatcher().GetEventHandler().Handle(new RMAppEvent(application
                                                                                      .GetApplicationId(), RMAppEventType.AttemptKilled));
            rm.WaitForState(application.GetApplicationId(), RMAppState.Killed);
            // test metrics
            metrics = rm.GetResourceScheduler().GetRootQueueMetrics();
            NUnit.Framework.Assert.AreEqual(appsKilled + 1, metrics.GetAppsKilled());
            NUnit.Framework.Assert.AreEqual(appsSubmitted + 1, metrics.GetAppsSubmitted());
        }
예제 #5
0
        /// <exception cref="System.Exception"/>
        public virtual void TestAppAttemptMetrics()
        {
            AsyncDispatcher            dispatcher = new InlineDispatcher();
            FifoScheduler              scheduler  = new FifoScheduler();
            RMApplicationHistoryWriter writer     = Org.Mockito.Mockito.Mock <RMApplicationHistoryWriter
                                                                              >();
            RMContext rmContext = new RMContextImpl(dispatcher, null, null, null, null, null,
                                                    null, null, null, scheduler);

            ((RMContextImpl)rmContext).SetSystemMetricsPublisher(Org.Mockito.Mockito.Mock <SystemMetricsPublisher
                                                                                           >());
            Configuration conf = new Configuration();

            scheduler.SetRMContext(rmContext);
            scheduler.Init(conf);
            scheduler.Start();
            scheduler.Reinitialize(conf, rmContext);
            QueueMetrics         metrics             = scheduler.GetRootQueueMetrics();
            int                  beforeAppsSubmitted = metrics.GetAppsSubmitted();
            ApplicationId        appId        = BuilderUtils.NewApplicationId(200, 1);
            ApplicationAttemptId appAttemptId = BuilderUtils.NewApplicationAttemptId(appId, 1
                                                                                     );
            SchedulerEvent appEvent = new AppAddedSchedulerEvent(appId, "queue", "user");

            scheduler.Handle(appEvent);
            SchedulerEvent attemptEvent = new AppAttemptAddedSchedulerEvent(appAttemptId, false
                                                                            );

            scheduler.Handle(attemptEvent);
            appAttemptId = BuilderUtils.NewApplicationAttemptId(appId, 2);
            SchedulerEvent attemptEvent2 = new AppAttemptAddedSchedulerEvent(appAttemptId, false
                                                                             );

            scheduler.Handle(attemptEvent2);
            int afterAppsSubmitted = metrics.GetAppsSubmitted();

            NUnit.Framework.Assert.AreEqual(1, afterAppsSubmitted - beforeAppsSubmitted);
            scheduler.Stop();
        }
예제 #6
0
        /// <exception cref="Org.Codehaus.Jettison.Json.JSONException"/>
        /// <exception cref="System.Exception"/>
        public virtual void VerifyClusterMetrics(int submittedApps, int completedApps, int
                                                 reservedMB, int availableMB, int allocMB, int reservedVirtualCores, int availableVirtualCores
                                                 , int allocVirtualCores, int totalVirtualCores, int containersAlloc, int totalMB
                                                 , int totalNodes, int lostNodes, int unhealthyNodes, int decommissionedNodes, int
                                                 rebootedNodes, int activeNodes)
        {
            ResourceScheduler rs             = rm.GetResourceScheduler();
            QueueMetrics      metrics        = rs.GetRootQueueMetrics();
            ClusterMetrics    clusterMetrics = ClusterMetrics.GetMetrics();
            long totalMBExpect           = metrics.GetAvailableMB() + metrics.GetAllocatedMB();
            long totalVirtualCoresExpect = metrics.GetAvailableVirtualCores() + metrics.GetAllocatedVirtualCores
                                               ();

            NUnit.Framework.Assert.AreEqual("appsSubmitted doesn't match", metrics.GetAppsSubmitted
                                                (), submittedApps);
            NUnit.Framework.Assert.AreEqual("appsCompleted doesn't match", metrics.GetAppsCompleted
                                                (), completedApps);
            NUnit.Framework.Assert.AreEqual("reservedMB doesn't match", metrics.GetReservedMB
                                                (), reservedMB);
            NUnit.Framework.Assert.AreEqual("availableMB doesn't match", metrics.GetAvailableMB
                                                (), availableMB);
            NUnit.Framework.Assert.AreEqual("allocatedMB doesn't match", metrics.GetAllocatedMB
                                                (), allocMB);
            NUnit.Framework.Assert.AreEqual("reservedVirtualCores doesn't match", metrics.GetReservedVirtualCores
                                                (), reservedVirtualCores);
            NUnit.Framework.Assert.AreEqual("availableVirtualCores doesn't match", metrics.GetAvailableVirtualCores
                                                (), availableVirtualCores);
            NUnit.Framework.Assert.AreEqual("allocatedVirtualCores doesn't match", totalVirtualCoresExpect
                                            , allocVirtualCores);
            NUnit.Framework.Assert.AreEqual("containersAllocated doesn't match", 0, containersAlloc
                                            );
            NUnit.Framework.Assert.AreEqual("totalMB doesn't match", totalMBExpect, totalMB);
            NUnit.Framework.Assert.AreEqual("totalNodes doesn't match", clusterMetrics.GetNumActiveNMs
                                                () + clusterMetrics.GetNumLostNMs() + clusterMetrics.GetNumDecommisionedNMs() +
                                            clusterMetrics.GetNumRebootedNMs() + clusterMetrics.GetUnhealthyNMs(), totalNodes
                                            );
            NUnit.Framework.Assert.AreEqual("lostNodes doesn't match", clusterMetrics.GetNumLostNMs
                                                (), lostNodes);
            NUnit.Framework.Assert.AreEqual("unhealthyNodes doesn't match", clusterMetrics.GetUnhealthyNMs
                                                (), unhealthyNodes);
            NUnit.Framework.Assert.AreEqual("decommissionedNodes doesn't match", clusterMetrics
                                            .GetNumDecommisionedNMs(), decommissionedNodes);
            NUnit.Framework.Assert.AreEqual("rebootedNodes doesn't match", clusterMetrics.GetNumRebootedNMs
                                                (), rebootedNodes);
            NUnit.Framework.Assert.AreEqual("activeNodes doesn't match", clusterMetrics.GetNumActiveNMs
                                                (), activeNodes);
        }