Esempio n. 1
0
        public virtual void TestNodeUpdateBeforeAppAttemptInit()
        {
            FifoScheduler scheduler = new FifoScheduler();
            MockRM        rm        = new MockRM(conf);

            scheduler.SetRMContext(rm.GetRMContext());
            scheduler.Init(conf);
            scheduler.Start();
            scheduler.Reinitialize(conf, rm.GetRMContext());
            RMNode node = MockNodes.NewNodeInfo(1, Resources.CreateResource(1024, 4), 1, "127.0.0.1"
                                                );

            scheduler.Handle(new NodeAddedSchedulerEvent(node));
            ApplicationId appId = ApplicationId.NewInstance(0, 1);

            scheduler.AddApplication(appId, "queue1", "user1", false);
            NodeUpdateSchedulerEvent updateEvent = new NodeUpdateSchedulerEvent(node);

            try
            {
                scheduler.Handle(updateEvent);
            }
            catch (ArgumentNullException)
            {
                NUnit.Framework.Assert.Fail();
            }
            ApplicationAttemptId attId = ApplicationAttemptId.NewInstance(appId, 1);

            scheduler.AddApplicationAttempt(attId, false, false);
            rm.Stop();
        }
Esempio n. 2
0
 /// <exception cref="System.IO.IOException"/>
 protected internal virtual void StartRMs(MockRM rm1, Configuration confForRM1, MockRM
                                          rm2, Configuration confForRM2)
 {
     rm1.Init(confForRM1);
     rm1.Start();
     NUnit.Framework.Assert.IsTrue(rm1.GetRMContext().GetHAServiceState() == HAServiceProtocol.HAServiceState
                                   .Standby);
     rm2.Init(confForRM2);
     rm2.Start();
     NUnit.Framework.Assert.IsTrue(rm2.GetRMContext().GetHAServiceState() == HAServiceProtocol.HAServiceState
                                   .Standby);
     rm1.adminService.TransitionToActive(requestInfo);
     NUnit.Framework.Assert.IsTrue(rm1.GetRMContext().GetHAServiceState() == HAServiceProtocol.HAServiceState
                                   .Active);
 }
Esempio n. 3
0
        public virtual void TestRMDispatcherForHA()
        {
            string errorMessageForEventHandler = "Expect to get the same number of handlers";
            string errorMessageForService      = "Expect to get the same number of services";

            configuration.SetBoolean(YarnConfiguration.AutoFailoverEnabled, false);
            Configuration conf = new YarnConfiguration(configuration);

            rm = new _MockRM_313(conf);
            rm.Init(conf);
            int expectedEventHandlerCount = ((TestRMHA.MyCountingDispatcher)rm.GetRMContext()
                                             .GetDispatcher()).GetEventHandlerCount();
            int expectedServiceCount = rm.GetServices().Count;

            NUnit.Framework.Assert.IsTrue(expectedEventHandlerCount != 0);
            HAServiceProtocol.StateChangeRequestInfo requestInfo = new HAServiceProtocol.StateChangeRequestInfo
                                                                       (HAServiceProtocol.RequestSource.RequestByUser);
            NUnit.Framework.Assert.AreEqual(StateErr, HAServiceProtocol.HAServiceState.Initializing
                                            , rm.adminService.GetServiceStatus().GetState());
            NUnit.Framework.Assert.IsFalse("RM is ready to become active before being started"
                                           , rm.adminService.GetServiceStatus().IsReadyToBecomeActive());
            rm.Start();
            //call transitions to standby and active a couple of times
            rm.adminService.TransitionToStandby(requestInfo);
            rm.adminService.TransitionToActive(requestInfo);
            rm.adminService.TransitionToStandby(requestInfo);
            rm.adminService.TransitionToActive(requestInfo);
            rm.adminService.TransitionToStandby(requestInfo);
            TestRMHA.MyCountingDispatcher dispatcher = (TestRMHA.MyCountingDispatcher)rm.GetRMContext
                                                           ().GetDispatcher();
            NUnit.Framework.Assert.IsTrue(!dispatcher.IsStopped());
            rm.adminService.TransitionToActive(requestInfo);
            NUnit.Framework.Assert.AreEqual(errorMessageForEventHandler, expectedEventHandlerCount
                                            , ((TestRMHA.MyCountingDispatcher)rm.GetRMContext().GetDispatcher()).GetEventHandlerCount
                                                ());
            NUnit.Framework.Assert.AreEqual(errorMessageForService, expectedServiceCount, rm.
                                            GetServices().Count);
            // Keep the dispatcher reference before transitioning to standby
            dispatcher = (TestRMHA.MyCountingDispatcher)rm.GetRMContext().GetDispatcher();
            rm.adminService.TransitionToStandby(requestInfo);
            NUnit.Framework.Assert.AreEqual(errorMessageForEventHandler, expectedEventHandlerCount
                                            , ((TestRMHA.MyCountingDispatcher)rm.GetRMContext().GetDispatcher()).GetEventHandlerCount
                                                ());
            NUnit.Framework.Assert.AreEqual(errorMessageForService, expectedServiceCount, rm.
                                            GetServices().Count);
            NUnit.Framework.Assert.IsTrue(dispatcher.IsStopped());
            rm.Stop();
        }
Esempio n. 4
0
        public virtual void TestHandleContainerStatusInvalidCompletions()
        {
            rm = new MockRM(new YarnConfiguration());
            rm.Start();
            EventHandler handler = Org.Mockito.Mockito.Spy(rm.GetRMContext().GetDispatcher().
                                                           GetEventHandler());
            // Case 1: Unmanaged AM
            RMApp app = rm.SubmitApp(1024, true);
            // Case 1.1: AppAttemptId is null
            NMContainerStatus report = NMContainerStatus.NewInstance(ContainerId.NewContainerId
                                                                         (ApplicationAttemptId.NewInstance(app.GetApplicationId(), 2), 1), ContainerState
                                                                     .Complete, Resource.NewInstance(1024, 1), "Dummy Completed", 0, Priority.NewInstance
                                                                         (10), 1234);

            rm.GetResourceTrackerService().HandleNMContainerStatus(report, null);
            Org.Mockito.Mockito.Verify(handler, Org.Mockito.Mockito.Never()).Handle((Org.Apache.Hadoop.Yarn.Event.Event
                                                                                     )Matchers.Any());
            // Case 1.2: Master container is null
            RMAppAttemptImpl currentAttempt = (RMAppAttemptImpl)app.GetCurrentAppAttempt();

            currentAttempt.SetMasterContainer(null);
            report = NMContainerStatus.NewInstance(ContainerId.NewContainerId(currentAttempt.
                                                                              GetAppAttemptId(), 0), ContainerState.Complete, Resource.NewInstance(1024, 1), "Dummy Completed"
                                                   , 0, Priority.NewInstance(10), 1234);
            rm.GetResourceTrackerService().HandleNMContainerStatus(report, null);
            Org.Mockito.Mockito.Verify(handler, Org.Mockito.Mockito.Never()).Handle((Org.Apache.Hadoop.Yarn.Event.Event
                                                                                     )Matchers.Any());
            // Case 2: Managed AM
            app = rm.SubmitApp(1024);
            // Case 2.1: AppAttemptId is null
            report = NMContainerStatus.NewInstance(ContainerId.NewContainerId(ApplicationAttemptId
                                                                              .NewInstance(app.GetApplicationId(), 2), 1), ContainerState.Complete, Resource.NewInstance
                                                       (1024, 1), "Dummy Completed", 0, Priority.NewInstance(10), 1234);
            try
            {
                rm.GetResourceTrackerService().HandleNMContainerStatus(report, null);
            }
            catch (Exception)
            {
            }
            // expected - ignore
            Org.Mockito.Mockito.Verify(handler, Org.Mockito.Mockito.Never()).Handle((Org.Apache.Hadoop.Yarn.Event.Event
                                                                                     )Matchers.Any());
            // Case 2.2: Master container is null
            currentAttempt = (RMAppAttemptImpl)app.GetCurrentAppAttempt();
            currentAttempt.SetMasterContainer(null);
            report = NMContainerStatus.NewInstance(ContainerId.NewContainerId(currentAttempt.
                                                                              GetAppAttemptId(), 0), ContainerState.Complete, Resource.NewInstance(1024, 1), "Dummy Completed"
                                                   , 0, Priority.NewInstance(10), 1234);
            try
            {
                rm.GetResourceTrackerService().HandleNMContainerStatus(report, null);
            }
            catch (Exception)
            {
            }
            // expected - ignore
            Org.Mockito.Mockito.Verify(handler, Org.Mockito.Mockito.Never()).Handle((Org.Apache.Hadoop.Yarn.Event.Event
                                                                                     )Matchers.Any());
        }
Esempio n. 5
0
        /// <exception cref="System.Exception"/>
        public virtual void TestTransitionedToStandbyShouldNotHang()
        {
            configuration.SetBoolean(YarnConfiguration.AutoFailoverEnabled, false);
            Configuration      conf     = new YarnConfiguration(configuration);
            MemoryRMStateStore memStore = new _MemoryRMStateStore_464();

            memStore.Init(conf);
            rm = new _MockRM_472(conf, memStore);
            rm.Init(conf);
            HAServiceProtocol.StateChangeRequestInfo requestInfo = new HAServiceProtocol.StateChangeRequestInfo
                                                                       (HAServiceProtocol.RequestSource.RequestByUser);
            NUnit.Framework.Assert.AreEqual(StateErr, HAServiceProtocol.HAServiceState.Initializing
                                            , rm.adminService.GetServiceStatus().GetState());
            NUnit.Framework.Assert.IsFalse("RM is ready to become active before being started"
                                           , rm.adminService.GetServiceStatus().IsReadyToBecomeActive());
            CheckMonitorHealth();
            rm.Start();
            CheckMonitorHealth();
            CheckStandbyRMFunctionality();
            // 2. Transition to Active.
            rm.adminService.TransitionToActive(requestInfo);
            // 3. Try Transition to standby
            Sharpen.Thread t = new Sharpen.Thread(new _Runnable_498(this));
            // TODO Auto-generated catch block
            t.Start();
            rm.GetRMContext().GetStateStore().UpdateApplicationState(null);
            t.Join();
            // wait for thread to finish
            rm.adminService.TransitionToStandby(requestInfo);
            CheckStandbyRMFunctionality();
            rm.Stop();
        }
Esempio n. 6
0
        public virtual void TestAdminRefreshQueuesWithFileSystemBasedConfigurationProvider
            ()
        {
            configuration.Set(YarnConfiguration.RmConfigurationProviderClass, "org.apache.hadoop.yarn.FileSystemBasedConfigurationProvider"
                              );
            //upload default configurations
            UploadDefaultConfiguration();
            try
            {
                rm = new MockRM(configuration);
                rm.Init(configuration);
                rm.Start();
            }
            catch (Exception)
            {
                NUnit.Framework.Assert.Fail("Should not get any exceptions");
            }
            CapacityScheduler cs = (CapacityScheduler)rm.GetRMContext().GetScheduler();
            int maxAppsBefore    = cs.GetConfiguration().GetMaximumSystemApplications();
            CapacitySchedulerConfiguration csConf = new CapacitySchedulerConfiguration();

            csConf.Set("yarn.scheduler.capacity.maximum-applications", "5000");
            UploadConfiguration(csConf, "capacity-scheduler.xml");
            rm.adminService.RefreshQueues(RefreshQueuesRequest.NewInstance());
            int maxAppsAfter = cs.GetConfiguration().GetMaximumSystemApplications();

            NUnit.Framework.Assert.AreEqual(maxAppsAfter, 5000);
            NUnit.Framework.Assert.IsTrue(maxAppsAfter != maxAppsBefore);
        }
Esempio n. 7
0
        /// <exception cref="System.Exception"/>
        private void CheckUnealthyNMCount(MockRM rm, MockNM nm1, bool health, int count)
        {
            int waitCount = 0;

            while ((rm.GetRMContext().GetRMNodes()[nm1.GetNodeId()].GetState() != NodeState.Unhealthy
                    ) == health && waitCount++ < 20)
            {
                lock (this)
                {
                    Sharpen.Runtime.Wait(this, 100);
                }
            }
            NUnit.Framework.Assert.IsFalse((rm.GetRMContext().GetRMNodes()[nm1.GetNodeId()].GetState
                                                () != NodeState.Unhealthy) == health);
            NUnit.Framework.Assert.AreEqual("Unhealthy metrics not incremented", count, ClusterMetrics
                                            .GetMetrics().GetUnhealthyNMs());
        }
Esempio n. 8
0
        public virtual void TestFailoverClearsRMContext()
        {
            configuration.SetBoolean(YarnConfiguration.AutoFailoverEnabled, false);
            configuration.SetBoolean(YarnConfiguration.RecoveryEnabled, true);
            Configuration      conf     = new YarnConfiguration(configuration);
            MemoryRMStateStore memStore = new MemoryRMStateStore();

            memStore.Init(conf);
            // 1. start RM
            rm = new MockRM(conf, memStore);
            rm.Init(conf);
            rm.Start();
            HAServiceProtocol.StateChangeRequestInfo requestInfo = new HAServiceProtocol.StateChangeRequestInfo
                                                                       (HAServiceProtocol.RequestSource.RequestByUser);
            CheckMonitorHealth();
            CheckStandbyRMFunctionality();
            // 2. Transition to active
            rm.adminService.TransitionToActive(requestInfo);
            CheckMonitorHealth();
            CheckActiveRMFunctionality();
            VerifyClusterMetrics(1, 1, 1, 1, 2048, 1);
            NUnit.Framework.Assert.AreEqual(1, rm.GetRMContext().GetRMNodes().Count);
            NUnit.Framework.Assert.AreEqual(1, rm.GetRMContext().GetRMApps().Count);
            // 3. Create new RM
            rm = new _MockRM_550(conf, memStore);
            rm.Init(conf);
            rm.Start();
            CheckMonitorHealth();
            CheckStandbyRMFunctionality();
            // 4. Try Transition to active, throw exception
            try
            {
                rm.adminService.TransitionToActive(requestInfo);
                NUnit.Framework.Assert.Fail("Transitioned to Active should throw exception.");
            }
            catch (Exception e)
            {
                NUnit.Framework.Assert.IsTrue("Error when transitioning to Active mode".Contains(
                                                  e.Message));
            }
            // 5. Clears the metrics
            VerifyClusterMetrics(0, 0, 0, 0, 0, 0);
            NUnit.Framework.Assert.AreEqual(0, rm.GetRMContext().GetRMNodes().Count);
            NUnit.Framework.Assert.AreEqual(0, rm.GetRMContext().GetRMApps().Count);
        }
Esempio n. 9
0
 /// <exception cref="System.IO.IOException"/>
 protected internal virtual void ExplicitFailover()
 {
     rm1.adminService.TransitionToStandby(requestInfo);
     rm2.adminService.TransitionToActive(requestInfo);
     NUnit.Framework.Assert.IsTrue(rm1.GetRMContext().GetHAServiceState() == HAServiceProtocol.HAServiceState
                                   .Standby);
     NUnit.Framework.Assert.IsTrue(rm2.GetRMContext().GetHAServiceState() == HAServiceProtocol.HAServiceState
                                   .Active);
 }
Esempio n. 10
0
        // Test even if AM container is allocated with containerId not equal to 1, the
        // following allocate requests from AM should be able to retrieve the
        // corresponding NM Token.
        /// <exception cref="System.Exception"/>
        public virtual void TestNMTokenSentForNormalContainer()
        {
            conf.Set(YarnConfiguration.RmScheduler, typeof(CapacityScheduler).GetCanonicalName
                         ());
            MockRM rm = new MockRM(conf);

            rm.Start();
            MockNM       nm1     = rm.RegisterNode("h1:1234", 5120);
            RMApp        app     = rm.SubmitApp(2000);
            RMAppAttempt attempt = app.GetCurrentAppAttempt();
            // Call getNewContainerId to increase container Id so that the AM container
            // Id doesn't equal to one.
            CapacityScheduler cs = (CapacityScheduler)rm.GetResourceScheduler();

            cs.GetApplicationAttempt(attempt.GetAppAttemptId()).GetNewContainerId();
            // kick the scheduling
            nm1.NodeHeartbeat(true);
            MockAM am = MockRM.LaunchAM(app, rm, nm1);

            // am container Id not equal to 1.
            NUnit.Framework.Assert.IsTrue(attempt.GetMasterContainer().GetId().GetContainerId
                                              () != 1);
            // NMSecretManager doesn't record the node on which the am is allocated.
            NUnit.Framework.Assert.IsFalse(rm.GetRMContext().GetNMTokenSecretManager().IsApplicationAttemptNMTokenPresent
                                               (attempt.GetAppAttemptId(), nm1.GetNodeId()));
            am.RegisterAppAttempt();
            rm.WaitForState(app.GetApplicationId(), RMAppState.Running);
            int NumContainers            = 1;
            IList <Container> containers = new AList <Container>();
            // nmTokens keeps track of all the nmTokens issued in the allocate call.
            IList <NMToken> expectedNMTokens = new AList <NMToken>();

            // am1 allocate 1 container on nm1.
            while (true)
            {
                AllocateResponse response = am.Allocate("127.0.0.1", 2000, NumContainers, new AList
                                                        <ContainerId>());
                nm1.NodeHeartbeat(true);
                Sharpen.Collections.AddAll(containers, response.GetAllocatedContainers());
                Sharpen.Collections.AddAll(expectedNMTokens, response.GetNMTokens());
                if (containers.Count == NumContainers)
                {
                    break;
                }
                Sharpen.Thread.Sleep(200);
                System.Console.Out.WriteLine("Waiting for container to be allocated.");
            }
            NodeId nodeId = expectedNMTokens[0].GetNodeId();

            // NMToken is sent for the allocated container.
            NUnit.Framework.Assert.AreEqual(nm1.GetNodeId(), nodeId);
        }
Esempio n. 11
0
        /// <exception cref="System.Exception"/>
        public virtual void TestMoveSuccessful()
        {
            MockRM rm1 = new MockRM(conf);

            rm1.Start();
            RMApp           app             = rm1.SubmitApp(1024);
            ClientRMService clientRMService = rm1.GetClientRMService();

            // FIFO scheduler does not support moves
            clientRMService.MoveApplicationAcrossQueues(MoveApplicationAcrossQueuesRequest.NewInstance
                                                            (app.GetApplicationId(), "newqueue"));
            RMApp rmApp = rm1.GetRMContext().GetRMApps()[app.GetApplicationId()];

            NUnit.Framework.Assert.AreEqual("newqueue", rmApp.GetQueue());
            rm1.Stop();
        }
Esempio n. 12
0
        /// <exception cref="System.Exception"/>
        private void KillApplication(MockRM rm, ApplicationId appId, ApplicationAttemptId
                                     appAttemptId, RMAppState rmAppState)
        {
            KillApplicationResponse response = rm.KillApp(appId);

            NUnit.Framework.Assert.IsTrue(response.GetIsKillCompleted() == IsFinalState(rmAppState
                                                                                        ));
            RMApp loadedApp0 = rm.GetRMContext().GetRMApps()[appId];

            rm.WaitForState(appId, RMAppState.Killed);
            if (appAttemptId != null)
            {
                rm.WaitForState(appAttemptId, RMAppAttemptState.Killed);
            }
            // no new attempt is created.
            NUnit.Framework.Assert.AreEqual(1, loadedApp0.GetAppAttempts().Count);
        }
Esempio n. 13
0
        public virtual void TestAdminRefreshQueuesWithLocalConfigurationProvider()
        {
            rm = new MockRM(configuration);
            rm.Init(configuration);
            rm.Start();
            CapacityScheduler cs = (CapacityScheduler)rm.GetRMContext().GetScheduler();
            int maxAppsBefore    = cs.GetConfiguration().GetMaximumSystemApplications();

            try
            {
                rm.adminService.RefreshQueues(RefreshQueuesRequest.NewInstance());
                NUnit.Framework.Assert.AreEqual(maxAppsBefore, cs.GetConfiguration().GetMaximumSystemApplications
                                                    ());
            }
            catch (Exception)
            {
                NUnit.Framework.Assert.Fail("Using localConfigurationProvider. Should not get any exception."
                                            );
            }
        }
Esempio n. 14
0
        /// <exception cref="System.Exception"/>
        public virtual void TestTransitionedToActiveRefreshFail()
        {
            configuration.SetBoolean(YarnConfiguration.AutoFailoverEnabled, false);
            YarnConfiguration conf = new YarnConfiguration(configuration);

            configuration = new CapacitySchedulerConfiguration(conf);
            rm            = new _MockRM_588(this, configuration);
            rm.Init(configuration);
            rm.Start();
            HAServiceProtocol.StateChangeRequestInfo requestInfo = new HAServiceProtocol.StateChangeRequestInfo
                                                                       (HAServiceProtocol.RequestSource.RequestByUser);
            configuration.Set("yarn.scheduler.capacity.root.default.capacity", "100");
            rm.adminService.TransitionToStandby(requestInfo);
            NUnit.Framework.Assert.AreEqual(HAServiceProtocol.HAServiceState.Standby, rm.GetRMContext
                                                ().GetHAServiceState());
            configuration.Set("yarn.scheduler.capacity.root.default.capacity", "200");
            try
            {
                rm.adminService.TransitionToActive(requestInfo);
            }
            catch (Exception e)
            {
                NUnit.Framework.Assert.IsTrue("Error on refreshAll during transistion to Active".
                                              Contains(e.Message));
            }
            TestRMHA.FailFastDispatcher dispatcher = ((TestRMHA.FailFastDispatcher)rm.rmContext
                                                      .GetDispatcher());
            dispatcher.Await();
            NUnit.Framework.Assert.AreEqual(1, dispatcher.GetEventCount());
            // Making correct conf and check the state
            configuration.Set("yarn.scheduler.capacity.root.default.capacity", "100");
            rm.adminService.TransitionToActive(requestInfo);
            NUnit.Framework.Assert.AreEqual(HAServiceProtocol.HAServiceState.Active, rm.GetRMContext
                                                ().GetHAServiceState());
            rm.adminService.TransitionToStandby(requestInfo);
            NUnit.Framework.Assert.AreEqual(HAServiceProtocol.HAServiceState.Standby, rm.GetRMContext
                                                ().GetHAServiceState());
        }
Esempio n. 15
0
        /// <exception cref="System.Exception"/>
        public virtual void TestNMToken()
        {
            MockRM rm = new MockRM(conf);

            try
            {
                rm.Start();
                MockNM nm1 = rm.RegisterNode("h1:1234", 10000);
                NMTokenSecretManagerInRM nmTokenSecretManager = rm.GetRMContext().GetNMTokenSecretManager
                                                                    ();
                // submitting new application
                RMApp app = rm.SubmitApp(1000);
                // start scheduling.
                nm1.NodeHeartbeat(true);
                // Starting application attempt and launching
                // It should get registered with NMTokenSecretManager.
                RMAppAttempt attempt = app.GetCurrentAppAttempt();
                MockAM       am      = rm.SendAMLaunched(attempt.GetAppAttemptId());
                NUnit.Framework.Assert.IsTrue(nmTokenSecretManager.IsApplicationAttemptRegistered
                                                  (attempt.GetAppAttemptId()));
                // This will register application master.
                am.RegisterAppAttempt();
                AList <Container>          containersReceivedForNM1 = new AList <Container>();
                IList <ContainerId>        releaseContainerList     = new AList <ContainerId>();
                Dictionary <string, Token> nmTokens = new Dictionary <string, Token>();
                // initially requesting 2 containers.
                AllocateResponse response = am.Allocate("h1", 1000, 2, releaseContainerList);
                NUnit.Framework.Assert.AreEqual(0, response.GetAllocatedContainers().Count);
                AllocateContainersAndValidateNMTokens(am, containersReceivedForNM1, 2, nmTokens,
                                                      nm1);
                NUnit.Framework.Assert.AreEqual(1, nmTokens.Count);
                // requesting 2 more containers.
                response = am.Allocate("h1", 1000, 2, releaseContainerList);
                NUnit.Framework.Assert.AreEqual(0, response.GetAllocatedContainers().Count);
                AllocateContainersAndValidateNMTokens(am, containersReceivedForNM1, 4, nmTokens,
                                                      nm1);
                NUnit.Framework.Assert.AreEqual(1, nmTokens.Count);
                // We will be simulating NM restart so restarting newly added h2:1234
                // NM 2 now registers.
                MockNM nm2 = rm.RegisterNode("h2:1234", 10000);
                nm2.NodeHeartbeat(true);
                AList <Container> containersReceivedForNM2 = new AList <Container>();
                response = am.Allocate("h2", 1000, 2, releaseContainerList);
                NUnit.Framework.Assert.AreEqual(0, response.GetAllocatedContainers().Count);
                AllocateContainersAndValidateNMTokens(am, containersReceivedForNM2, 2, nmTokens,
                                                      nm2);
                NUnit.Framework.Assert.AreEqual(2, nmTokens.Count);
                // Simulating NM-2 restart.
                nm2 = rm.RegisterNode("h2:1234", 10000);
                // Wait for reconnect to make it through the RM and create a new RMNode
                IDictionary <NodeId, RMNode> nodes = rm.GetRMContext().GetRMNodes();
                while (nodes[nm2.GetNodeId()].GetLastNodeHeartBeatResponse().GetResponseId() > 0)
                {
                    Sharpen.Thread.Sleep(WaitSleepMs);
                }
                int interval = 40;
                // Wait for nm Token to be cleared.
                while (nmTokenSecretManager.IsApplicationAttemptNMTokenPresent(attempt.GetAppAttemptId
                                                                                   (), nm2.GetNodeId()) && interval-- > 0)
                {
                    Log.Info("waiting for nmToken to be cleared for : " + nm2.GetNodeId());
                    Sharpen.Thread.Sleep(WaitSleepMs);
                }
                NUnit.Framework.Assert.IsTrue(nmTokenSecretManager.IsApplicationAttemptRegistered
                                                  (attempt.GetAppAttemptId()));
                // removing NMToken for h2:1234
                Sharpen.Collections.Remove(nmTokens, nm2.GetNodeId().ToString());
                NUnit.Framework.Assert.AreEqual(1, nmTokens.Count);
                // We should again receive the NMToken.
                response = am.Allocate("h2", 1000, 2, releaseContainerList);
                NUnit.Framework.Assert.AreEqual(0, response.GetAllocatedContainers().Count);
                AllocateContainersAndValidateNMTokens(am, containersReceivedForNM2, 4, nmTokens,
                                                      nm2);
                NUnit.Framework.Assert.AreEqual(2, nmTokens.Count);
                // Now rolling over NMToken masterKey. it should resend the NMToken in
                // next allocate call.
                NUnit.Framework.Assert.IsTrue(nmTokenSecretManager.IsApplicationAttemptNMTokenPresent
                                                  (attempt.GetAppAttemptId(), nm1.GetNodeId()));
                NUnit.Framework.Assert.IsTrue(nmTokenSecretManager.IsApplicationAttemptNMTokenPresent
                                                  (attempt.GetAppAttemptId(), nm2.GetNodeId()));
                nmTokenSecretManager.RollMasterKey();
                nmTokenSecretManager.ActivateNextMasterKey();
                NUnit.Framework.Assert.IsFalse(nmTokenSecretManager.IsApplicationAttemptNMTokenPresent
                                                   (attempt.GetAppAttemptId(), nm1.GetNodeId()));
                NUnit.Framework.Assert.IsFalse(nmTokenSecretManager.IsApplicationAttemptNMTokenPresent
                                                   (attempt.GetAppAttemptId(), nm2.GetNodeId()));
                // It should not remove application attempt entry.
                NUnit.Framework.Assert.IsTrue(nmTokenSecretManager.IsApplicationAttemptRegistered
                                                  (attempt.GetAppAttemptId()));
                nmTokens.Clear();
                NUnit.Framework.Assert.AreEqual(0, nmTokens.Count);
                // We should again receive the NMToken.
                response = am.Allocate("h2", 1000, 1, releaseContainerList);
                NUnit.Framework.Assert.AreEqual(0, response.GetAllocatedContainers().Count);
                AllocateContainersAndValidateNMTokens(am, containersReceivedForNM2, 5, nmTokens,
                                                      nm2);
                NUnit.Framework.Assert.AreEqual(1, nmTokens.Count);
                NUnit.Framework.Assert.IsTrue(nmTokenSecretManager.IsApplicationAttemptNMTokenPresent
                                                  (attempt.GetAppAttemptId(), nm2.GetNodeId()));
                // After AM is finished making sure that nmtoken entry for app
                NUnit.Framework.Assert.IsTrue(nmTokenSecretManager.IsApplicationAttemptRegistered
                                                  (attempt.GetAppAttemptId()));
                am.UnregisterAppAttempt();
                // marking all the containers as finished.
                foreach (Container container in containersReceivedForNM1)
                {
                    nm1.NodeHeartbeat(attempt.GetAppAttemptId(), container.GetId().GetContainerId(),
                                      ContainerState.Complete);
                }
                foreach (Container container_1 in containersReceivedForNM2)
                {
                    nm2.NodeHeartbeat(attempt.GetAppAttemptId(), container_1.GetId().GetContainerId()
                                      , ContainerState.Complete);
                }
                nm1.NodeHeartbeat(am.GetApplicationAttemptId(), 1, ContainerState.Complete);
                am.WaitForState(RMAppAttemptState.Finished);
                NUnit.Framework.Assert.IsFalse(nmTokenSecretManager.IsApplicationAttemptRegistered
                                                   (attempt.GetAppAttemptId()));
            }
            finally
            {
                rm.Stop();
            }
        }
        // The test verifies processing of NMContainerStatuses which are sent during
        // NM registration.
        // 1. Start the cluster-RM,NM,Submit app with 1024MB,Launch & register AM
        // 2. AM sends ResourceRequest for 1 container with memory 2048MB.
        // 3. Verify for number of container allocated by RM
        // 4. Verify Memory Usage by cluster, it should be 3072. AM memory + requested
        // memory. 1024 + 2048=3072
        // 5. Re-register NM by sending completed container status
        // 6. Verify for Memory Used, it should be 1024
        // 7. Send AM heatbeat to RM. Allocated response should contain completed
        // container.
        /// <exception cref="System.Exception"/>
        public virtual void TestProcessingNMContainerStatusesOnNMRestart()
        {
            conf.SetInt(YarnConfiguration.RmAmMaxAttempts, 1);
            MemoryRMStateStore memStore = new MemoryRMStateStore();

            memStore.Init(conf);
            // 1. Start the cluster-RM,NM,Submit app with 1024MB,Launch & register AM
            MockRM rm1 = new MockRM(conf, memStore);

            rm1.Start();
            int    nmMemory        = 8192;
            int    amMemory        = 1024;
            int    containerMemory = 2048;
            MockNM nm1             = new MockNM("127.0.0.1:1234", nmMemory, rm1.GetResourceTrackerService
                                                    ());

            nm1.RegisterNode();
            RMApp  app0 = rm1.SubmitApp(amMemory);
            MockAM am0  = MockRM.LaunchAndRegisterAM(app0, rm1, nm1);
            // 2. AM sends ResourceRequest for 1 container with memory 2048MB.
            int noOfContainers = 1;
            IList <Container> allocateContainers = am0.AllocateAndWaitForContainers(noOfContainers
                                                                                    , containerMemory, nm1);

            // 3. Verify for number of container allocated by RM
            NUnit.Framework.Assert.AreEqual(noOfContainers, allocateContainers.Count);
            Container container = allocateContainers[0];

            nm1.NodeHeartbeat(am0.GetApplicationAttemptId(), 1, ContainerState.Running);
            nm1.NodeHeartbeat(am0.GetApplicationAttemptId(), container.GetId().GetContainerId
                                  (), ContainerState.Running);
            rm1.WaitForState(app0.GetApplicationId(), RMAppState.Running);
            // 4. Verify Memory Usage by cluster, it should be 3072. AM memory +
            // requested memory. 1024 + 2048=3072
            ResourceScheduler rs = rm1.GetRMContext().GetScheduler();
            int allocatedMB      = rs.GetRootQueueMetrics().GetAllocatedMB();

            NUnit.Framework.Assert.AreEqual(amMemory + containerMemory, allocatedMB);
            // 5. Re-register NM by sending completed container status
            IList <NMContainerStatus> nMContainerStatusForApp = CreateNMContainerStatusForApp(
                am0);

            nm1.RegisterNode(nMContainerStatusForApp, Arrays.AsList(app0.GetApplicationId()));
            WaitForClusterMemory(nm1, rs, amMemory);
            // 6. Verify for Memory Used, it should be 1024
            NUnit.Framework.Assert.AreEqual(amMemory, rs.GetRootQueueMetrics().GetAllocatedMB
                                                ());
            // 7. Send AM heatbeat to RM. Allocated response should contain completed
            // container
            AllocateRequest req = AllocateRequest.NewInstance(0, 0F, new AList <ResourceRequest
                                                                                >(), new AList <ContainerId>(), null);
            AllocateResponse        allocate = am0.Allocate(req);
            IList <ContainerStatus> completedContainersStatuses = allocate.GetCompletedContainersStatuses
                                                                      ();

            NUnit.Framework.Assert.AreEqual(noOfContainers, completedContainersStatuses.Count
                                            );
            // Application clean up should happen Cluster memory used is 0
            nm1.NodeHeartbeat(am0.GetApplicationAttemptId(), 1, ContainerState.Complete);
            WaitForClusterMemory(nm1, rs, 0);
            rm1.Stop();
        }
Esempio n. 17
0
        public virtual void TestReconnectNode()
        {
            DrainDispatcher dispatcher = new DrainDispatcher();

            rm = new _MockRM_567(this, dispatcher);
            rm.Start();
            MockNM nm1 = rm.RegisterNode("host1:1234", 5120);
            MockNM nm2 = rm.RegisterNode("host2:5678", 5120);

            nm1.NodeHeartbeat(true);
            nm2.NodeHeartbeat(false);
            dispatcher.Await();
            CheckUnealthyNMCount(rm, nm2, true, 1);
            int          expectedNMs = ClusterMetrics.GetMetrics().GetNumActiveNMs();
            QueueMetrics metrics     = rm.GetResourceScheduler().GetRootQueueMetrics();

            // TODO Metrics incorrect in case of the FifoScheduler
            NUnit.Framework.Assert.AreEqual(5120, metrics.GetAvailableMB());
            // reconnect of healthy node
            nm1 = rm.RegisterNode("host1:1234", 5120);
            NodeHeartbeatResponse response = nm1.NodeHeartbeat(true);

            NUnit.Framework.Assert.IsTrue(NodeAction.Normal.Equals(response.GetNodeAction()));
            dispatcher.Await();
            NUnit.Framework.Assert.AreEqual(expectedNMs, ClusterMetrics.GetMetrics().GetNumActiveNMs
                                                ());
            CheckUnealthyNMCount(rm, nm2, true, 1);
            // reconnect of unhealthy node
            nm2      = rm.RegisterNode("host2:5678", 5120);
            response = nm2.NodeHeartbeat(false);
            NUnit.Framework.Assert.IsTrue(NodeAction.Normal.Equals(response.GetNodeAction()));
            dispatcher.Await();
            NUnit.Framework.Assert.AreEqual(expectedNMs, ClusterMetrics.GetMetrics().GetNumActiveNMs
                                                ());
            CheckUnealthyNMCount(rm, nm2, true, 1);
            // unhealthy node changed back to healthy
            nm2 = rm.RegisterNode("host2:5678", 5120);
            dispatcher.Await();
            response = nm2.NodeHeartbeat(true);
            response = nm2.NodeHeartbeat(true);
            dispatcher.Await();
            NUnit.Framework.Assert.AreEqual(5120 + 5120, metrics.GetAvailableMB());
            // reconnect of node with changed capability
            nm1 = rm.RegisterNode("host2:5678", 10240);
            dispatcher.Await();
            response = nm1.NodeHeartbeat(true);
            dispatcher.Await();
            NUnit.Framework.Assert.IsTrue(NodeAction.Normal.Equals(response.GetNodeAction()));
            NUnit.Framework.Assert.AreEqual(5120 + 10240, metrics.GetAvailableMB());
            // reconnect of node with changed capability and running applications
            IList <ApplicationId> runningApps = new AList <ApplicationId>();

            runningApps.AddItem(ApplicationId.NewInstance(1, 0));
            nm1 = rm.RegisterNode("host2:5678", 15360, 2, runningApps);
            dispatcher.Await();
            response = nm1.NodeHeartbeat(true);
            dispatcher.Await();
            NUnit.Framework.Assert.IsTrue(NodeAction.Normal.Equals(response.GetNodeAction()));
            NUnit.Framework.Assert.AreEqual(5120 + 15360, metrics.GetAvailableMB());
            // reconnect healthy node changing http port
            nm1 = new MockNM("host1:1234", 5120, rm.GetResourceTrackerService());
            nm1.SetHttpPort(3);
            nm1.RegisterNode();
            dispatcher.Await();
            response = nm1.NodeHeartbeat(true);
            response = nm1.NodeHeartbeat(true);
            dispatcher.Await();
            RMNode rmNode = rm.GetRMContext().GetRMNodes()[nm1.GetNodeId()];

            NUnit.Framework.Assert.AreEqual(3, rmNode.GetHttpPort());
            NUnit.Framework.Assert.AreEqual(5120, rmNode.GetTotalCapability().GetMemory());
            NUnit.Framework.Assert.AreEqual(5120 + 15360, metrics.GetAvailableMB());
        }
Esempio n. 18
0
        public virtual void TestRMHAWithFileSystemBasedConfiguration()
        {
            HAServiceProtocol.StateChangeRequestInfo requestInfo = new HAServiceProtocol.StateChangeRequestInfo
                                                                       (HAServiceProtocol.RequestSource.RequestByUser);
            configuration.Set(YarnConfiguration.RmConfigurationProviderClass, "org.apache.hadoop.yarn.FileSystemBasedConfigurationProvider"
                              );
            configuration.SetBoolean(YarnConfiguration.RmHaEnabled, true);
            configuration.SetBoolean(YarnConfiguration.AutoFailoverEnabled, false);
            configuration.Set(YarnConfiguration.RmHaIds, "rm1,rm2");
            int @base = 100;

            foreach (string confKey in YarnConfiguration.GetServiceAddressConfKeys(configuration
                                                                                   ))
            {
                configuration.Set(HAUtil.AddSuffix(confKey, "rm1"), "0.0.0.0:" + (@base + 20));
                configuration.Set(HAUtil.AddSuffix(confKey, "rm2"), "0.0.0.0:" + (@base + 40));
                @base = @base * 2;
            }
            Configuration conf1 = new Configuration(configuration);

            conf1.Set(YarnConfiguration.RmHaId, "rm1");
            Configuration conf2 = new Configuration(configuration);

            conf2.Set(YarnConfiguration.RmHaId, "rm2");
            // upload default configurations
            UploadDefaultConfiguration();
            MockRM rm1 = null;
            MockRM rm2 = null;

            try
            {
                rm1 = new MockRM(conf1);
                rm1.Init(conf1);
                rm1.Start();
                NUnit.Framework.Assert.IsTrue(rm1.GetRMContext().GetHAServiceState() == HAServiceProtocol.HAServiceState
                                              .Standby);
                rm2 = new MockRM(conf2);
                rm2.Init(conf1);
                rm2.Start();
                NUnit.Framework.Assert.IsTrue(rm2.GetRMContext().GetHAServiceState() == HAServiceProtocol.HAServiceState
                                              .Standby);
                rm1.adminService.TransitionToActive(requestInfo);
                NUnit.Framework.Assert.IsTrue(rm1.GetRMContext().GetHAServiceState() == HAServiceProtocol.HAServiceState
                                              .Active);
                CapacitySchedulerConfiguration csConf = new CapacitySchedulerConfiguration();
                csConf.Set("yarn.scheduler.capacity.maximum-applications", "5000");
                UploadConfiguration(csConf, "capacity-scheduler.xml");
                rm1.adminService.RefreshQueues(RefreshQueuesRequest.NewInstance());
                int maxApps = ((CapacityScheduler)rm1.GetRMContext().GetScheduler()).GetConfiguration
                                  ().GetMaximumSystemApplications();
                NUnit.Framework.Assert.AreEqual(maxApps, 5000);
                // Before failover happens, the maxApps is
                // still the default value on the standby rm : rm2
                int maxAppsBeforeFailOver = ((CapacityScheduler)rm2.GetRMContext().GetScheduler()
                                             ).GetConfiguration().GetMaximumSystemApplications();
                NUnit.Framework.Assert.AreEqual(maxAppsBeforeFailOver, 10000);
                // Do the failover
                rm1.adminService.TransitionToStandby(requestInfo);
                rm2.adminService.TransitionToActive(requestInfo);
                NUnit.Framework.Assert.IsTrue(rm1.GetRMContext().GetHAServiceState() == HAServiceProtocol.HAServiceState
                                              .Standby);
                NUnit.Framework.Assert.IsTrue(rm2.GetRMContext().GetHAServiceState() == HAServiceProtocol.HAServiceState
                                              .Active);
                int maxAppsAfter = ((CapacityScheduler)rm2.GetRMContext().GetScheduler()).GetConfiguration
                                       ().GetMaximumSystemApplications();
                NUnit.Framework.Assert.AreEqual(maxAppsAfter, 5000);
            }
            finally
            {
                if (rm1 != null)
                {
                    rm1.Stop();
                }
                if (rm2 != null)
                {
                    rm2.Stop();
                }
            }
        }
Esempio n. 19
0
        /// <exception cref="System.Exception"/>
        public virtual void TestHeadroom()
        {
            Configuration conf = new Configuration();

            conf.SetClass(YarnConfiguration.RmScheduler, typeof(FifoScheduler), typeof(ResourceScheduler
                                                                                       ));
            MockRM rm = new MockRM(conf);

            rm.Start();
            FifoScheduler fs = (FifoScheduler)rm.GetResourceScheduler();
            // Add a node
            RMNode n1 = MockNodes.NewNodeInfo(0, MockNodes.NewResource(4 * Gb), 1, "127.0.0.2"
                                              );

            fs.Handle(new NodeAddedSchedulerEvent(n1));
            // Add two applications
            ApplicationId        appId1        = BuilderUtils.NewApplicationId(100, 1);
            ApplicationAttemptId appAttemptId1 = BuilderUtils.NewApplicationAttemptId(appId1,
                                                                                      1);

            CreateMockRMApp(appAttemptId1, rm.GetRMContext());
            SchedulerEvent appEvent = new AppAddedSchedulerEvent(appId1, "queue", "user");

            fs.Handle(appEvent);
            SchedulerEvent attemptEvent = new AppAttemptAddedSchedulerEvent(appAttemptId1, false
                                                                            );

            fs.Handle(attemptEvent);
            ApplicationId        appId2        = BuilderUtils.NewApplicationId(200, 2);
            ApplicationAttemptId appAttemptId2 = BuilderUtils.NewApplicationAttemptId(appId2,
                                                                                      1);

            CreateMockRMApp(appAttemptId2, rm.GetRMContext());
            SchedulerEvent appEvent2 = new AppAddedSchedulerEvent(appId2, "queue", "user");

            fs.Handle(appEvent2);
            SchedulerEvent attemptEvent2 = new AppAttemptAddedSchedulerEvent(appAttemptId2, false
                                                                             );

            fs.Handle(attemptEvent2);
            IList <ContainerId>     emptyId  = new AList <ContainerId>();
            IList <ResourceRequest> emptyAsk = new AList <ResourceRequest>();
            // Set up resource requests
            // Ask for a 1 GB container for app 1
            IList <ResourceRequest> ask1 = new AList <ResourceRequest>();

            ask1.AddItem(BuilderUtils.NewResourceRequest(BuilderUtils.NewPriority(0), ResourceRequest
                                                         .Any, BuilderUtils.NewResource(Gb, 1), 1));
            fs.Allocate(appAttemptId1, ask1, emptyId, null, null);
            // Ask for a 2 GB container for app 2
            IList <ResourceRequest> ask2 = new AList <ResourceRequest>();

            ask2.AddItem(BuilderUtils.NewResourceRequest(BuilderUtils.NewPriority(0), ResourceRequest
                                                         .Any, BuilderUtils.NewResource(2 * Gb, 1), 1));
            fs.Allocate(appAttemptId2, ask2, emptyId, null, null);
            // Trigger container assignment
            fs.Handle(new NodeUpdateSchedulerEvent(n1));
            // Get the allocation for the applications and verify headroom
            Allocation allocation1 = fs.Allocate(appAttemptId1, emptyAsk, emptyId, null, null
                                                 );

            NUnit.Framework.Assert.AreEqual("Allocation headroom", 1 * Gb, allocation1.GetResourceLimit
                                                ().GetMemory());
            Allocation allocation2 = fs.Allocate(appAttemptId2, emptyAsk, emptyId, null, null
                                                 );

            NUnit.Framework.Assert.AreEqual("Allocation headroom", 1 * Gb, allocation2.GetResourceLimit
                                                ().GetMemory());
            rm.Stop();
        }
        /// <exception cref="System.Exception"/>
        public virtual void TestUsageWithMultipleContainersAndRMRestart()
        {
            // Set max attempts to 1 so that when the first attempt fails, the app
            // won't try to start a new one.
            conf.SetInt(YarnConfiguration.RmAmMaxAttempts, 1);
            conf.SetBoolean(YarnConfiguration.RecoveryEnabled, true);
            conf.SetBoolean(YarnConfiguration.RmWorkPreservingRecoveryEnabled, false);
            MemoryRMStateStore memStore = new MemoryRMStateStore();

            memStore.Init(conf);
            MockRM rm0 = new MockRM(conf, memStore);

            rm0.Start();
            MockNM nm = new MockNM("127.0.0.1:1234", 65536, rm0.GetResourceTrackerService());

            nm.RegisterNode();
            RMApp app0 = rm0.SubmitApp(200);

            rm0.WaitForState(app0.GetApplicationId(), RMAppState.Accepted);
            RMAppAttempt         attempt0   = app0.GetCurrentAppAttempt();
            ApplicationAttemptId attemptId0 = attempt0.GetAppAttemptId();

            rm0.WaitForState(attemptId0, RMAppAttemptState.Scheduled);
            nm.NodeHeartbeat(true);
            rm0.WaitForState(attemptId0, RMAppAttemptState.Allocated);
            MockAM am0 = rm0.SendAMLaunched(attempt0.GetAppAttemptId());

            am0.RegisterAppAttempt();
            int NumContainers = 2;

            am0.Allocate("127.0.0.1", 1000, NumContainers, new AList <ContainerId>());
            nm.NodeHeartbeat(true);
            IList <Container> conts = am0.Allocate(new AList <ResourceRequest>(), new AList <ContainerId
                                                                                             >()).GetAllocatedContainers();

            while (conts.Count != NumContainers)
            {
                nm.NodeHeartbeat(true);
                Sharpen.Collections.AddAll(conts, am0.Allocate(new AList <ResourceRequest>(), new
                                                               AList <ContainerId>()).GetAllocatedContainers());
                Sharpen.Thread.Sleep(500);
            }
            // launch the 2nd and 3rd containers.
            foreach (Container c in conts)
            {
                nm.NodeHeartbeat(attempt0.GetAppAttemptId(), c.GetId().GetContainerId(), ContainerState
                                 .Running);
                rm0.WaitForState(nm, c.GetId(), RMContainerState.Running);
            }
            // Get the RMContainers for all of the live containers, to be used later
            // for metrics calculations and comparisons.
            ICollection <RMContainer> rmContainers = rm0.scheduler.GetSchedulerAppInfo(attempt0
                                                                                       .GetAppAttemptId()).GetLiveContainers();
            // Allow metrics to accumulate.
            int sleepInterval       = 1000;
            int cumulativeSleepTime = 0;

            while (app0.GetRMAppMetrics().GetMemorySeconds() <= 0 && cumulativeSleepTime < 5000
                   )
            {
                Sharpen.Thread.Sleep(sleepInterval);
                cumulativeSleepTime += sleepInterval;
            }
            // Stop all non-AM containers
            foreach (Container c_1 in conts)
            {
                if (c_1.GetId().GetContainerId() == 1)
                {
                    continue;
                }
                nm.NodeHeartbeat(attempt0.GetAppAttemptId(), c_1.GetId().GetContainerId(), ContainerState
                                 .Complete);
                rm0.WaitForState(nm, c_1.GetId(), RMContainerState.Completed);
            }
            // After all other containers have completed, manually complete the master
            // container in order to trigger a save to the state store of the resource
            // usage metrics. This will cause the attempt to fail, and, since the max
            // attempt retries is 1, the app will also fail. This is intentional so
            // that all containers will complete prior to saving.
            ContainerId cId = ContainerId.NewContainerId(attempt0.GetAppAttemptId(), 1);

            nm.NodeHeartbeat(attempt0.GetAppAttemptId(), cId.GetContainerId(), ContainerState
                             .Complete);
            rm0.WaitForState(nm, cId, RMContainerState.Completed);
            // Check that the container metrics match those from the app usage report.
            long memorySeconds = 0;
            long vcoreSeconds  = 0;

            foreach (RMContainer c_2 in rmContainers)
            {
                AggregateAppResourceUsage ru = CalculateContainerResourceMetrics(c_2);
                memorySeconds += ru.GetMemorySeconds();
                vcoreSeconds  += ru.GetVcoreSeconds();
            }
            RMAppMetrics metricsBefore = app0.GetRMAppMetrics();

            NUnit.Framework.Assert.AreEqual("Unexcpected MemorySeconds value", memorySeconds,
                                            metricsBefore.GetMemorySeconds());
            NUnit.Framework.Assert.AreEqual("Unexpected VcoreSeconds value", vcoreSeconds, metricsBefore
                                            .GetVcoreSeconds());
            // create new RM to represent RM restart. Load up the state store.
            MockRM rm1 = new MockRM(conf, memStore);

            rm1.Start();
            RMApp app0After = rm1.GetRMContext().GetRMApps()[app0.GetApplicationId()];
            // Compare container resource usage metrics from before and after restart.
            RMAppMetrics metricsAfter = app0After.GetRMAppMetrics();

            NUnit.Framework.Assert.AreEqual("Vcore seconds were not the same after RM Restart"
                                            , metricsBefore.GetVcoreSeconds(), metricsAfter.GetVcoreSeconds());
            NUnit.Framework.Assert.AreEqual("Memory seconds were not the same after RM Restart"
                                            , metricsBefore.GetMemorySeconds(), metricsAfter.GetMemorySeconds());
            rm0.Stop();
            rm0.Close();
            rm1.Stop();
            rm1.Close();
        }
Esempio n. 21
0
        /// <exception cref="System.Exception"/>
        public virtual void TestBlackListNodes()
        {
            Configuration conf = new Configuration();

            conf.SetClass(YarnConfiguration.RmScheduler, typeof(FifoScheduler), typeof(ResourceScheduler
                                                                                       ));
            MockRM rm = new MockRM(conf);

            rm.Start();
            FifoScheduler fs         = (FifoScheduler)rm.GetResourceScheduler();
            int           rack_num_0 = 0;
            int           rack_num_1 = 1;
            // Add 4 nodes in 2 racks
            // host_0_0 in rack0
            string host_0_0 = "127.0.0.1";
            RMNode n1       = MockNodes.NewNodeInfo(rack_num_0, MockNodes.NewResource(4 * Gb), 1, host_0_0
                                                    );

            fs.Handle(new NodeAddedSchedulerEvent(n1));
            // host_0_1 in rack0
            string host_0_1 = "127.0.0.2";
            RMNode n2       = MockNodes.NewNodeInfo(rack_num_0, MockNodes.NewResource(4 * Gb), 1, host_0_1
                                                    );

            fs.Handle(new NodeAddedSchedulerEvent(n2));
            // host_1_0 in rack1
            string host_1_0 = "127.0.0.3";
            RMNode n3       = MockNodes.NewNodeInfo(rack_num_1, MockNodes.NewResource(4 * Gb), 1, host_1_0
                                                    );

            fs.Handle(new NodeAddedSchedulerEvent(n3));
            // host_1_1 in rack1
            string host_1_1 = "127.0.0.4";
            RMNode n4       = MockNodes.NewNodeInfo(rack_num_1, MockNodes.NewResource(4 * Gb), 1, host_1_1
                                                    );

            fs.Handle(new NodeAddedSchedulerEvent(n4));
            // Add one application
            ApplicationId        appId1        = BuilderUtils.NewApplicationId(100, 1);
            ApplicationAttemptId appAttemptId1 = BuilderUtils.NewApplicationAttemptId(appId1,
                                                                                      1);

            CreateMockRMApp(appAttemptId1, rm.GetRMContext());
            SchedulerEvent appEvent = new AppAddedSchedulerEvent(appId1, "queue", "user");

            fs.Handle(appEvent);
            SchedulerEvent attemptEvent = new AppAttemptAddedSchedulerEvent(appAttemptId1, false
                                                                            );

            fs.Handle(attemptEvent);
            IList <ContainerId>     emptyId  = new AList <ContainerId>();
            IList <ResourceRequest> emptyAsk = new AList <ResourceRequest>();
            // Allow rack-locality for rack_1, but blacklist host_1_0
            // Set up resource requests
            // Ask for a 1 GB container for app 1
            IList <ResourceRequest> ask1 = new AList <ResourceRequest>();

            ask1.AddItem(BuilderUtils.NewResourceRequest(BuilderUtils.NewPriority(0), "rack1"
                                                         , BuilderUtils.NewResource(Gb, 1), 1));
            ask1.AddItem(BuilderUtils.NewResourceRequest(BuilderUtils.NewPriority(0), ResourceRequest
                                                         .Any, BuilderUtils.NewResource(Gb, 1), 1));
            fs.Allocate(appAttemptId1, ask1, emptyId, Sharpen.Collections.SingletonList(host_1_0
                                                                                        ), null);
            // Trigger container assignment
            fs.Handle(new NodeUpdateSchedulerEvent(n3));
            // Get the allocation for the application and verify no allocation on blacklist node
            Allocation allocation1 = fs.Allocate(appAttemptId1, emptyAsk, emptyId, null, null
                                                 );

            NUnit.Framework.Assert.AreEqual("allocation1", 0, allocation1.GetContainers().Count
                                            );
            // verify host_1_1 can get allocated as not in blacklist
            fs.Handle(new NodeUpdateSchedulerEvent(n4));
            Allocation allocation2 = fs.Allocate(appAttemptId1, emptyAsk, emptyId, null, null
                                                 );

            NUnit.Framework.Assert.AreEqual("allocation2", 1, allocation2.GetContainers().Count
                                            );
            IList <Container> containerList = allocation2.GetContainers();

            foreach (Container container in containerList)
            {
                NUnit.Framework.Assert.AreEqual("Container is allocated on n4", container.GetNodeId
                                                    (), n4.GetNodeID());
            }
            // Ask for a 1 GB container again for app 1
            IList <ResourceRequest> ask2 = new AList <ResourceRequest>();

            // this time, rack0 is also in blacklist, so only host_1_1 is available to
            // be assigned
            ask2.AddItem(BuilderUtils.NewResourceRequest(BuilderUtils.NewPriority(0), ResourceRequest
                                                         .Any, BuilderUtils.NewResource(Gb, 1), 1));
            fs.Allocate(appAttemptId1, ask2, emptyId, Sharpen.Collections.SingletonList("rack0"
                                                                                        ), null);
            // verify n1 is not qualified to be allocated
            fs.Handle(new NodeUpdateSchedulerEvent(n1));
            Allocation allocation3 = fs.Allocate(appAttemptId1, emptyAsk, emptyId, null, null
                                                 );

            NUnit.Framework.Assert.AreEqual("allocation3", 0, allocation3.GetContainers().Count
                                            );
            // verify n2 is not qualified to be allocated
            fs.Handle(new NodeUpdateSchedulerEvent(n2));
            Allocation allocation4 = fs.Allocate(appAttemptId1, emptyAsk, emptyId, null, null
                                                 );

            NUnit.Framework.Assert.AreEqual("allocation4", 0, allocation4.GetContainers().Count
                                            );
            // verify n3 is not qualified to be allocated
            fs.Handle(new NodeUpdateSchedulerEvent(n3));
            Allocation allocation5 = fs.Allocate(appAttemptId1, emptyAsk, emptyId, null, null
                                                 );

            NUnit.Framework.Assert.AreEqual("allocation5", 0, allocation5.GetContainers().Count
                                            );
            fs.Handle(new NodeUpdateSchedulerEvent(n4));
            Allocation allocation6 = fs.Allocate(appAttemptId1, emptyAsk, emptyId, null, null
                                                 );

            NUnit.Framework.Assert.AreEqual("allocation6", 1, allocation6.GetContainers().Count
                                            );
            containerList = allocation6.GetContainers();
            foreach (Container container_1 in containerList)
            {
                NUnit.Framework.Assert.AreEqual("Container is allocated on n4", container_1.GetNodeId
                                                    (), n4.GetNodeID());
            }
            rm.Stop();
        }