예제 #1
0
        public ClusterMetricsInfo(ResourceManager rm)
        {
            // JAXB needs this
            ResourceScheduler rs             = rm.GetResourceScheduler();
            QueueMetrics      metrics        = rs.GetRootQueueMetrics();
            ClusterMetrics    clusterMetrics = ClusterMetrics.GetMetrics();

            this.appsSubmitted         = metrics.GetAppsSubmitted();
            this.appsCompleted         = metrics.GetAppsCompleted();
            this.appsPending           = metrics.GetAppsPending();
            this.appsRunning           = metrics.GetAppsRunning();
            this.appsFailed            = metrics.GetAppsFailed();
            this.appsKilled            = metrics.GetAppsKilled();
            this.reservedMB            = metrics.GetReservedMB();
            this.availableMB           = metrics.GetAvailableMB();
            this.allocatedMB           = metrics.GetAllocatedMB();
            this.reservedVirtualCores  = metrics.GetReservedVirtualCores();
            this.availableVirtualCores = metrics.GetAvailableVirtualCores();
            this.allocatedVirtualCores = metrics.GetAllocatedVirtualCores();
            this.containersAllocated   = metrics.GetAllocatedContainers();
            this.containersPending     = metrics.GetPendingContainers();
            this.containersReserved    = metrics.GetReservedContainers();
            this.totalMB             = availableMB + allocatedMB;
            this.totalVirtualCores   = availableVirtualCores + allocatedVirtualCores;
            this.activeNodes         = clusterMetrics.GetNumActiveNMs();
            this.lostNodes           = clusterMetrics.GetNumLostNMs();
            this.unhealthyNodes      = clusterMetrics.GetUnhealthyNMs();
            this.decommissionedNodes = clusterMetrics.GetNumDecommisionedNMs();
            this.rebootedNodes       = clusterMetrics.GetNumRebootedNMs();
            this.totalNodes          = activeNodes + lostNodes + decommissionedNodes + rebootedNodes +
                                       unhealthyNodes;
        }
예제 #2
0
        /// <exception cref="System.Exception"/>
        private void VerifyClusterMetrics(int activeNodes, int appsSubmitted, int appsPending
                                          , int containersPending, int availableMB, int activeApplications)
        {
            int            timeoutSecs              = 0;
            QueueMetrics   metrics                  = rm.GetResourceScheduler().GetRootQueueMetrics();
            ClusterMetrics clusterMetrics           = ClusterMetrics.GetMetrics();
            bool           isAllMetricAssertionDone = false;
            string         message                  = null;

            while (timeoutSecs++ < 5)
            {
                try
                {
                    // verify queue metrics
                    AssertMetric("appsSubmitted", appsSubmitted, metrics.GetAppsSubmitted());
                    AssertMetric("appsPending", appsPending, metrics.GetAppsPending());
                    AssertMetric("containersPending", containersPending, metrics.GetPendingContainers
                                     ());
                    AssertMetric("availableMB", availableMB, metrics.GetAvailableMB());
                    AssertMetric("activeApplications", activeApplications, metrics.GetActiveApps());
                    // verify node metric
                    AssertMetric("activeNodes", activeNodes, clusterMetrics.GetNumActiveNMs());
                    isAllMetricAssertionDone = true;
                    break;
                }
                catch (Exception e)
                {
                    message = e.Message;
                    System.Console.Out.WriteLine("Waiting for metrics assertion to complete");
                    Sharpen.Thread.Sleep(1000);
                }
            }
            NUnit.Framework.Assert.IsTrue(message, isAllMetricAssertionDone);
        }
예제 #3
0
        /// <exception cref="Org.Codehaus.Jettison.Json.JSONException"/>
        /// <exception cref="System.Exception"/>
        public virtual void VerifyClusterMetrics(int submittedApps, int completedApps, int
                                                 reservedMB, int availableMB, int allocMB, int reservedVirtualCores, int availableVirtualCores
                                                 , int allocVirtualCores, int totalVirtualCores, int containersAlloc, int totalMB
                                                 , int totalNodes, int lostNodes, int unhealthyNodes, int decommissionedNodes, int
                                                 rebootedNodes, int activeNodes)
        {
            ResourceScheduler rs             = rm.GetResourceScheduler();
            QueueMetrics      metrics        = rs.GetRootQueueMetrics();
            ClusterMetrics    clusterMetrics = ClusterMetrics.GetMetrics();
            long totalMBExpect           = metrics.GetAvailableMB() + metrics.GetAllocatedMB();
            long totalVirtualCoresExpect = metrics.GetAvailableVirtualCores() + metrics.GetAllocatedVirtualCores
                                               ();

            NUnit.Framework.Assert.AreEqual("appsSubmitted doesn't match", metrics.GetAppsSubmitted
                                                (), submittedApps);
            NUnit.Framework.Assert.AreEqual("appsCompleted doesn't match", metrics.GetAppsCompleted
                                                (), completedApps);
            NUnit.Framework.Assert.AreEqual("reservedMB doesn't match", metrics.GetReservedMB
                                                (), reservedMB);
            NUnit.Framework.Assert.AreEqual("availableMB doesn't match", metrics.GetAvailableMB
                                                (), availableMB);
            NUnit.Framework.Assert.AreEqual("allocatedMB doesn't match", metrics.GetAllocatedMB
                                                (), allocMB);
            NUnit.Framework.Assert.AreEqual("reservedVirtualCores doesn't match", metrics.GetReservedVirtualCores
                                                (), reservedVirtualCores);
            NUnit.Framework.Assert.AreEqual("availableVirtualCores doesn't match", metrics.GetAvailableVirtualCores
                                                (), availableVirtualCores);
            NUnit.Framework.Assert.AreEqual("allocatedVirtualCores doesn't match", totalVirtualCoresExpect
                                            , allocVirtualCores);
            NUnit.Framework.Assert.AreEqual("containersAllocated doesn't match", 0, containersAlloc
                                            );
            NUnit.Framework.Assert.AreEqual("totalMB doesn't match", totalMBExpect, totalMB);
            NUnit.Framework.Assert.AreEqual("totalNodes doesn't match", clusterMetrics.GetNumActiveNMs
                                                () + clusterMetrics.GetNumLostNMs() + clusterMetrics.GetNumDecommisionedNMs() +
                                            clusterMetrics.GetNumRebootedNMs() + clusterMetrics.GetUnhealthyNMs(), totalNodes
                                            );
            NUnit.Framework.Assert.AreEqual("lostNodes doesn't match", clusterMetrics.GetNumLostNMs
                                                (), lostNodes);
            NUnit.Framework.Assert.AreEqual("unhealthyNodes doesn't match", clusterMetrics.GetUnhealthyNMs
                                                (), unhealthyNodes);
            NUnit.Framework.Assert.AreEqual("decommissionedNodes doesn't match", clusterMetrics
                                            .GetNumDecommisionedNMs(), decommissionedNodes);
            NUnit.Framework.Assert.AreEqual("rebootedNodes doesn't match", clusterMetrics.GetNumRebootedNMs
                                                (), rebootedNodes);
            NUnit.Framework.Assert.AreEqual("activeNodes doesn't match", clusterMetrics.GetNumActiveNMs
                                                (), activeNodes);
        }
예제 #4
0
        public virtual void TestReconnectNode()
        {
            DrainDispatcher dispatcher = new DrainDispatcher();

            rm = new _MockRM_567(this, dispatcher);
            rm.Start();
            MockNM nm1 = rm.RegisterNode("host1:1234", 5120);
            MockNM nm2 = rm.RegisterNode("host2:5678", 5120);

            nm1.NodeHeartbeat(true);
            nm2.NodeHeartbeat(false);
            dispatcher.Await();
            CheckUnealthyNMCount(rm, nm2, true, 1);
            int          expectedNMs = ClusterMetrics.GetMetrics().GetNumActiveNMs();
            QueueMetrics metrics     = rm.GetResourceScheduler().GetRootQueueMetrics();

            // TODO Metrics incorrect in case of the FifoScheduler
            NUnit.Framework.Assert.AreEqual(5120, metrics.GetAvailableMB());
            // reconnect of healthy node
            nm1 = rm.RegisterNode("host1:1234", 5120);
            NodeHeartbeatResponse response = nm1.NodeHeartbeat(true);

            NUnit.Framework.Assert.IsTrue(NodeAction.Normal.Equals(response.GetNodeAction()));
            dispatcher.Await();
            NUnit.Framework.Assert.AreEqual(expectedNMs, ClusterMetrics.GetMetrics().GetNumActiveNMs
                                                ());
            CheckUnealthyNMCount(rm, nm2, true, 1);
            // reconnect of unhealthy node
            nm2      = rm.RegisterNode("host2:5678", 5120);
            response = nm2.NodeHeartbeat(false);
            NUnit.Framework.Assert.IsTrue(NodeAction.Normal.Equals(response.GetNodeAction()));
            dispatcher.Await();
            NUnit.Framework.Assert.AreEqual(expectedNMs, ClusterMetrics.GetMetrics().GetNumActiveNMs
                                                ());
            CheckUnealthyNMCount(rm, nm2, true, 1);
            // unhealthy node changed back to healthy
            nm2 = rm.RegisterNode("host2:5678", 5120);
            dispatcher.Await();
            response = nm2.NodeHeartbeat(true);
            response = nm2.NodeHeartbeat(true);
            dispatcher.Await();
            NUnit.Framework.Assert.AreEqual(5120 + 5120, metrics.GetAvailableMB());
            // reconnect of node with changed capability
            nm1 = rm.RegisterNode("host2:5678", 10240);
            dispatcher.Await();
            response = nm1.NodeHeartbeat(true);
            dispatcher.Await();
            NUnit.Framework.Assert.IsTrue(NodeAction.Normal.Equals(response.GetNodeAction()));
            NUnit.Framework.Assert.AreEqual(5120 + 10240, metrics.GetAvailableMB());
            // reconnect of node with changed capability and running applications
            IList <ApplicationId> runningApps = new AList <ApplicationId>();

            runningApps.AddItem(ApplicationId.NewInstance(1, 0));
            nm1 = rm.RegisterNode("host2:5678", 15360, 2, runningApps);
            dispatcher.Await();
            response = nm1.NodeHeartbeat(true);
            dispatcher.Await();
            NUnit.Framework.Assert.IsTrue(NodeAction.Normal.Equals(response.GetNodeAction()));
            NUnit.Framework.Assert.AreEqual(5120 + 15360, metrics.GetAvailableMB());
            // reconnect healthy node changing http port
            nm1 = new MockNM("host1:1234", 5120, rm.GetResourceTrackerService());
            nm1.SetHttpPort(3);
            nm1.RegisterNode();
            dispatcher.Await();
            response = nm1.NodeHeartbeat(true);
            response = nm1.NodeHeartbeat(true);
            dispatcher.Await();
            RMNode rmNode = rm.GetRMContext().GetRMNodes()[nm1.GetNodeId()];

            NUnit.Framework.Assert.AreEqual(3, rmNode.GetHttpPort());
            NUnit.Framework.Assert.AreEqual(5120, rmNode.GetTotalCapability().GetMemory());
            NUnit.Framework.Assert.AreEqual(5120 + 15360, metrics.GetAvailableMB());
        }