Exemple #1
0
        // Test regular RM restart/failover, new RM should not count
        // AM failure towards the max-retry-account and should be able to
        // re-launch the AM.
        /// <exception cref="System.Exception"/>
        public virtual void TestRMRestartOrFailoverNotCountedForAMFailures()
        {
            YarnConfiguration conf = new YarnConfiguration();

            conf.SetClass(YarnConfiguration.RmScheduler, typeof(CapacityScheduler), typeof(ResourceScheduler
                                                                                           ));
            conf.SetBoolean(YarnConfiguration.RecoveryEnabled, true);
            conf.SetBoolean(YarnConfiguration.RmWorkPreservingRecoveryEnabled, false);
            conf.Set(YarnConfiguration.RmStore, typeof(MemoryRMStateStore).FullName);
            // explicitly set max-am-retry count as 1.
            conf.SetInt(YarnConfiguration.RmAmMaxAttempts, 1);
            MemoryRMStateStore memStore = new MemoryRMStateStore();

            memStore.Init(conf);
            MockRM rm1 = new MockRM(conf, memStore);

            rm1.Start();
            MockNM nm1 = new MockNM("127.0.0.1:1234", 8000, rm1.GetResourceTrackerService());

            nm1.RegisterNode();
            RMApp app1 = rm1.SubmitApp(200);
            // AM should be restarted even though max-am-attempt is 1.
            MockAM       am1      = MockRM.LaunchAndRegisterAM(app1, rm1, nm1);
            RMAppAttempt attempt1 = app1.GetCurrentAppAttempt();

            NUnit.Framework.Assert.IsTrue(((RMAppAttemptImpl)attempt1).MayBeLastAttempt());
            // Restart rm.
            MockRM rm2 = new MockRM(conf, memStore);

            rm2.Start();
            ApplicationStateData appState = memStore.GetState().GetApplicationState()[app1.GetApplicationId
                                                                                          ()];

            // re-register the NM
            nm1.SetResourceTrackerService(rm2.GetResourceTrackerService());
            NMContainerStatus status = Org.Apache.Hadoop.Yarn.Util.Records.NewRecord <NMContainerStatus
                                                                                      >();

            status.SetContainerExitStatus(ContainerExitStatus.KilledByResourcemanager);
            status.SetContainerId(attempt1.GetMasterContainer().GetId());
            status.SetContainerState(ContainerState.Complete);
            status.SetDiagnostics(string.Empty);
            nm1.RegisterNode(Sharpen.Collections.SingletonList(status), null);
            rm2.WaitForState(attempt1.GetAppAttemptId(), RMAppAttemptState.Failed);
            NUnit.Framework.Assert.AreEqual(ContainerExitStatus.KilledByResourcemanager, appState
                                            .GetAttempt(am1.GetApplicationAttemptId()).GetAMContainerExitStatus());
            // Will automatically start a new AppAttempt in rm2
            rm2.WaitForState(app1.GetApplicationId(), RMAppState.Accepted);
            MockAM am2 = rm2.WaitForNewAMToLaunchAndRegister(app1.GetApplicationId(), 2, nm1);

            MockRM.FinishAMAndVerifyAppState(app1, rm2, nm1, am2);
            RMAppAttempt attempt3 = rm2.GetRMContext().GetRMApps()[app1.GetApplicationId()].GetCurrentAppAttempt
                                        ();

            NUnit.Framework.Assert.IsTrue(attempt3.ShouldCountTowardsMaxAttemptRetry());
            NUnit.Framework.Assert.AreEqual(ContainerExitStatus.Invalid, appState.GetAttempt(
                                                am2.GetApplicationAttemptId()).GetAMContainerExitStatus());
            rm1.Stop();
            rm2.Stop();
        }
        // Test the DT mast key in the state-store when the mast key is being rolled.
        /// <exception cref="System.Exception"/>
        public virtual void TestRMDTMasterKeyStateOnRollingMasterKey()
        {
            MemoryRMStateStore memStore = new MemoryRMStateStore();

            memStore.Init(conf);
            RMStateStore.RMState rmState = memStore.GetState();
            IDictionary <RMDelegationTokenIdentifier, long> rmDTState = rmState.GetRMDTSecretManagerState
                                                                            ().GetTokenState();
            ICollection <DelegationKey> rmDTMasterKeyState = rmState.GetRMDTSecretManagerState
                                                                 ().GetMasterKeyState();
            MockRM rm1 = new TestRMDelegationTokens.MyMockRM(this, conf, memStore);

            rm1.Start();
            // on rm start, two master keys are created.
            // One is created at RMDTSecretMgr.startThreads.updateCurrentKey();
            // the other is created on the first run of
            // tokenRemoverThread.rollMasterKey()
            RMDelegationTokenSecretManager dtSecretManager = rm1.GetRMContext().GetRMDelegationTokenSecretManager
                                                                 ();

            // assert all master keys are saved
            NUnit.Framework.Assert.AreEqual(dtSecretManager.GetAllMasterKeys(), rmDTMasterKeyState
                                            );
            ICollection <DelegationKey> expiringKeys = new HashSet <DelegationKey>();

            Sharpen.Collections.AddAll(expiringKeys, dtSecretManager.GetAllMasterKeys());
            // request to generate a RMDelegationToken
            GetDelegationTokenRequest request = Org.Mockito.Mockito.Mock <GetDelegationTokenRequest
                                                                          >();

            Org.Mockito.Mockito.When(request.GetRenewer()).ThenReturn("renewer1");
            GetDelegationTokenResponse response = rm1.GetClientRMService().GetDelegationToken
                                                      (request);

            Org.Apache.Hadoop.Yarn.Api.Records.Token delegationToken = response.GetRMDelegationToken
                                                                           ();
            Org.Apache.Hadoop.Security.Token.Token <RMDelegationTokenIdentifier> token1 = ConverterUtils
                                                                                          .ConvertFromYarn(delegationToken, (Text)null);
            RMDelegationTokenIdentifier dtId1 = token1.DecodeIdentifier();

            // For all keys that still remain in memory, we should have them stored
            // in state-store also.
            while (((TestRMDelegationTokens.TestRMDelegationTokenSecretManager)dtSecretManager
                    ).numUpdatedKeys.Get() < 3)
            {
                ((TestRMDelegationTokens.TestRMDelegationTokenSecretManager)dtSecretManager).CheckCurrentKeyInStateStore
                    (rmDTMasterKeyState);
                Sharpen.Thread.Sleep(100);
            }
            // wait for token to expire and remove from state-store
            // rollMasterKey is called every 1 second.
            int count = 0;

            while (rmDTState.Contains(dtId1) && count < 100)
            {
                Sharpen.Thread.Sleep(100);
                count++;
            }
            rm1.Stop();
        }
        /// <exception cref="System.Exception"/>
        public virtual void TestAppCleanupWhenNMReconnects()
        {
            conf.SetInt(YarnConfiguration.RmAmMaxAttempts, 1);
            MemoryRMStateStore memStore = new MemoryRMStateStore();

            memStore.Init(conf);
            // start RM
            MockRM rm1 = new MockRM(conf, memStore);

            rm1.Start();
            MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.GetResourceTrackerService());

            nm1.RegisterNode();
            // create app and launch the AM
            RMApp  app0 = rm1.SubmitApp(200);
            MockAM am0  = LaunchAM(app0, rm1, nm1);

            nm1.NodeHeartbeat(am0.GetApplicationAttemptId(), 1, ContainerState.Complete);
            rm1.WaitForState(app0.GetApplicationId(), RMAppState.Failed);
            // wait for application cleanup message received
            WaitForAppCleanupMessageRecved(nm1, app0.GetApplicationId());
            // reconnect NM with application still active
            nm1.RegisterNode(Arrays.AsList(app0.GetApplicationId()));
            WaitForAppCleanupMessageRecved(nm1, app0.GetApplicationId());
            rm1.Stop();
        }
Exemple #4
0
        // Test RM restarts after AM container is preempted, new RM should not count
        // AM preemption failure towards the max-retry-account and should be able to
        // re-launch the AM.
        /// <exception cref="System.Exception"/>
        public virtual void TestPreemptedAMRestartOnRMRestart()
        {
            YarnConfiguration conf = new YarnConfiguration();

            conf.SetClass(YarnConfiguration.RmScheduler, typeof(CapacityScheduler), typeof(ResourceScheduler
                                                                                           ));
            conf.SetBoolean(YarnConfiguration.RecoveryEnabled, true);
            conf.SetBoolean(YarnConfiguration.RmWorkPreservingRecoveryEnabled, false);
            conf.Set(YarnConfiguration.RmStore, typeof(MemoryRMStateStore).FullName);
            // explicitly set max-am-retry count as 1.
            conf.SetInt(YarnConfiguration.RmAmMaxAttempts, 1);
            MemoryRMStateStore memStore = new MemoryRMStateStore();

            memStore.Init(conf);
            MockRM rm1 = new MockRM(conf, memStore);

            rm1.Start();
            MockNM nm1 = new MockNM("127.0.0.1:1234", 8000, rm1.GetResourceTrackerService());

            nm1.RegisterNode();
            RMApp             app1        = rm1.SubmitApp(200);
            RMAppAttempt      attempt1    = app1.GetCurrentAppAttempt();
            MockAM            am1         = MockRM.LaunchAndRegisterAM(app1, rm1, nm1);
            CapacityScheduler scheduler   = (CapacityScheduler)rm1.GetResourceScheduler();
            ContainerId       amContainer = ContainerId.NewContainerId(am1.GetApplicationAttemptId(
                                                                           ), 1);

            // Forcibly preempt the am container;
            scheduler.KillContainer(scheduler.GetRMContainer(amContainer));
            am1.WaitForState(RMAppAttemptState.Failed);
            NUnit.Framework.Assert.IsTrue(!attempt1.ShouldCountTowardsMaxAttemptRetry());
            rm1.WaitForState(app1.GetApplicationId(), RMAppState.Accepted);
            // state store has 1 attempt stored.
            ApplicationStateData appState = memStore.GetState().GetApplicationState()[app1.GetApplicationId
                                                                                          ()];

            NUnit.Framework.Assert.AreEqual(1, appState.GetAttemptCount());
            // attempt stored has the preempted container exit status.
            NUnit.Framework.Assert.AreEqual(ContainerExitStatus.Preempted, appState.GetAttempt
                                                (am1.GetApplicationAttemptId()).GetAMContainerExitStatus());
            // Restart rm.
            MockRM rm2 = new MockRM(conf, memStore);

            nm1.SetResourceTrackerService(rm2.GetResourceTrackerService());
            nm1.RegisterNode();
            rm2.Start();
            // Restarted RM should re-launch the am.
            MockAM am2 = rm2.WaitForNewAMToLaunchAndRegister(app1.GetApplicationId(), 2, nm1);

            MockRM.FinishAMAndVerifyAppState(app1, rm2, nm1, am2);
            RMAppAttempt attempt2 = rm2.GetRMContext().GetRMApps()[app1.GetApplicationId()].GetCurrentAppAttempt
                                        ();

            NUnit.Framework.Assert.IsTrue(attempt2.ShouldCountTowardsMaxAttemptRetry());
            NUnit.Framework.Assert.AreEqual(ContainerExitStatus.Invalid, appState.GetAttempt(
                                                am2.GetApplicationAttemptId()).GetAMContainerExitStatus());
            rm1.Stop();
            rm2.Stop();
        }
        /// <exception cref="System.Exception"/>
        public virtual void TestAppCleanupWhenRMRestartedBeforeAppFinished()
        {
            conf.SetInt(YarnConfiguration.RmAmMaxAttempts, 1);
            MemoryRMStateStore memStore = new MemoryRMStateStore();

            memStore.Init(conf);
            // start RM
            MockRM rm1 = new MockRM(conf, memStore);

            rm1.Start();
            MockNM nm1 = new MockNM("127.0.0.1:1234", 1024, rm1.GetResourceTrackerService());

            nm1.RegisterNode();
            MockNM nm2 = new MockNM("127.0.0.1:5678", 1024, rm1.GetResourceTrackerService());

            nm2.RegisterNode();
            // create app and launch the AM
            RMApp  app0 = rm1.SubmitApp(200);
            MockAM am0  = LaunchAM(app0, rm1, nm1);
            // alloc another container on nm2
            AllocateResponse allocResponse = am0.Allocate(Arrays.AsList(ResourceRequest.NewInstance
                                                                            (Priority.NewInstance(1), "*", Resource.NewInstance(1024, 0), 1)), null);

            while (null == allocResponse.GetAllocatedContainers() || allocResponse.GetAllocatedContainers
                       ().IsEmpty())
            {
                nm2.NodeHeartbeat(true);
                allocResponse = am0.Allocate(null, null);
                Sharpen.Thread.Sleep(1000);
            }
            // start new RM
            MockRM rm2 = new MockRM(conf, memStore);

            rm2.Start();
            // nm1/nm2 register to rm2, and do a heartbeat
            nm1.SetResourceTrackerService(rm2.GetResourceTrackerService());
            nm1.RegisterNode(Arrays.AsList(NMContainerStatus.NewInstance(ContainerId.NewContainerId
                                                                             (am0.GetApplicationAttemptId(), 1), ContainerState.Complete, Resource.NewInstance
                                                                             (1024, 1), string.Empty, 0, Priority.NewInstance(0), 1234)), Arrays.AsList(app0.
                                                                                                                                                        GetApplicationId()));
            nm2.SetResourceTrackerService(rm2.GetResourceTrackerService());
            nm2.RegisterNode(Arrays.AsList(app0.GetApplicationId()));
            // assert app state has been saved.
            rm2.WaitForState(app0.GetApplicationId(), RMAppState.Failed);
            // wait for application cleanup message received on NM1
            WaitForAppCleanupMessageRecved(nm1, app0.GetApplicationId());
            // wait for application cleanup message received on NM2
            WaitForAppCleanupMessageRecved(nm2, app0.GetApplicationId());
            rm1.Stop();
            rm2.Stop();
        }
Exemple #6
0
        public virtual void TestFailoverClearsRMContext()
        {
            configuration.SetBoolean(YarnConfiguration.AutoFailoverEnabled, false);
            configuration.SetBoolean(YarnConfiguration.RecoveryEnabled, true);
            Configuration      conf     = new YarnConfiguration(configuration);
            MemoryRMStateStore memStore = new MemoryRMStateStore();

            memStore.Init(conf);
            // 1. start RM
            rm = new MockRM(conf, memStore);
            rm.Init(conf);
            rm.Start();
            HAServiceProtocol.StateChangeRequestInfo requestInfo = new HAServiceProtocol.StateChangeRequestInfo
                                                                       (HAServiceProtocol.RequestSource.RequestByUser);
            CheckMonitorHealth();
            CheckStandbyRMFunctionality();
            // 2. Transition to active
            rm.adminService.TransitionToActive(requestInfo);
            CheckMonitorHealth();
            CheckActiveRMFunctionality();
            VerifyClusterMetrics(1, 1, 1, 1, 2048, 1);
            NUnit.Framework.Assert.AreEqual(1, rm.GetRMContext().GetRMNodes().Count);
            NUnit.Framework.Assert.AreEqual(1, rm.GetRMContext().GetRMApps().Count);
            // 3. Create new RM
            rm = new _MockRM_550(conf, memStore);
            rm.Init(conf);
            rm.Start();
            CheckMonitorHealth();
            CheckStandbyRMFunctionality();
            // 4. Try Transition to active, throw exception
            try
            {
                rm.adminService.TransitionToActive(requestInfo);
                NUnit.Framework.Assert.Fail("Transitioned to Active should throw exception.");
            }
            catch (Exception e)
            {
                NUnit.Framework.Assert.IsTrue("Error when transitioning to Active mode".Contains(
                                                  e.Message));
            }
            // 5. Clears the metrics
            VerifyClusterMetrics(0, 0, 0, 0, 0, 0);
            NUnit.Framework.Assert.AreEqual(0, rm.GetRMContext().GetRMNodes().Count);
            NUnit.Framework.Assert.AreEqual(0, rm.GetRMContext().GetRMApps().Count);
        }
        // Test all expired keys are removed from state-store.
        /// <exception cref="System.Exception"/>
        public virtual void TestRemoveExpiredMasterKeyInRMStateStore()
        {
            MemoryRMStateStore memStore = new MemoryRMStateStore();

            memStore.Init(conf);
            RMStateStore.RMState        rmState            = memStore.GetState();
            ICollection <DelegationKey> rmDTMasterKeyState = rmState.GetRMDTSecretManagerState
                                                                 ().GetMasterKeyState();
            MockRM rm1 = new TestRMDelegationTokens.MyMockRM(this, conf, memStore);

            rm1.Start();
            RMDelegationTokenSecretManager dtSecretManager = rm1.GetRMContext().GetRMDelegationTokenSecretManager
                                                                 ();

            // assert all master keys are saved
            NUnit.Framework.Assert.AreEqual(dtSecretManager.GetAllMasterKeys(), rmDTMasterKeyState
                                            );
            ICollection <DelegationKey> expiringKeys = new HashSet <DelegationKey>();

            Sharpen.Collections.AddAll(expiringKeys, dtSecretManager.GetAllMasterKeys());
            // wait for expiringKeys to expire
            while (true)
            {
                bool allExpired = true;
                foreach (DelegationKey key in expiringKeys)
                {
                    if (rmDTMasterKeyState.Contains(key))
                    {
                        allExpired = false;
                    }
                }
                if (allExpired)
                {
                    break;
                }
                Sharpen.Thread.Sleep(500);
            }
        }
        /// <exception cref="System.Exception"/>
        public virtual void TestContainerCleanupWhenRMRestartedAppNotRegistered()
        {
            conf.SetInt(YarnConfiguration.RmAmMaxAttempts, 1);
            MemoryRMStateStore memStore = new MemoryRMStateStore();

            memStore.Init(conf);
            // start RM
            DrainDispatcher dispatcher = new DrainDispatcher();
            MockRM          rm1        = new _MockRM_413(dispatcher, conf, memStore);

            rm1.Start();
            MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.GetResourceTrackerService());

            nm1.RegisterNode();
            // create app and launch the AM
            RMApp  app0 = rm1.SubmitApp(200);
            MockAM am0  = LaunchAM(app0, rm1, nm1);

            nm1.NodeHeartbeat(am0.GetApplicationAttemptId(), 1, ContainerState.Running);
            rm1.WaitForState(app0.GetApplicationId(), RMAppState.Running);
            // start new RM
            DrainDispatcher dispatcher2 = new DrainDispatcher();
            MockRM          rm2         = new _MockRM_432(dispatcher2, conf, memStore);

            rm2.Start();
            // nm1 register to rm2, and do a heartbeat
            nm1.SetResourceTrackerService(rm2.GetResourceTrackerService());
            nm1.RegisterNode(Arrays.AsList(app0.GetApplicationId()));
            rm2.WaitForState(app0.GetApplicationId(), RMAppState.Accepted);
            // Add unknown container for application unknown to scheduler
            NodeHeartbeatResponse response = nm1.NodeHeartbeat(am0.GetApplicationAttemptId(),
                                                               2, ContainerState.Running);

            WaitForContainerCleanup(dispatcher2, nm1, response);
            rm1.Stop();
            rm2.Stop();
        }
Exemple #9
0
        /// <exception cref="System.Exception"/>
        public virtual void TestRMAppAttemptFailuresValidityInterval()
        {
            YarnConfiguration conf = new YarnConfiguration();

            conf.SetClass(YarnConfiguration.RmScheduler, typeof(CapacityScheduler), typeof(ResourceScheduler
                                                                                           ));
            conf.SetBoolean(YarnConfiguration.RecoveryEnabled, true);
            conf.SetBoolean(YarnConfiguration.RmWorkPreservingRecoveryEnabled, false);
            conf.Set(YarnConfiguration.RmStore, typeof(MemoryRMStateStore).FullName);
            // explicitly set max-am-retry count as 2.
            conf.SetInt(YarnConfiguration.RmAmMaxAttempts, 2);
            MemoryRMStateStore memStore = new MemoryRMStateStore();

            memStore.Init(conf);
            MockRM rm1 = new MockRM(conf, memStore);

            rm1.Start();
            MockNM nm1 = new MockNM("127.0.0.1:1234", 8000, rm1.GetResourceTrackerService());

            nm1.RegisterNode();
            // set window size to a larger number : 20s
            // we will verify the app should be failed if
            // two continuous attempts failed in 20s.
            RMApp  app = rm1.SubmitApp(200, 20000);
            MockAM am  = MockRM.LaunchAM(app, rm1, nm1);

            // Fail current attempt normally
            nm1.NodeHeartbeat(am.GetApplicationAttemptId(), 1, ContainerState.Complete);
            am.WaitForState(RMAppAttemptState.Failed);
            // launch the second attempt
            rm1.WaitForState(app.GetApplicationId(), RMAppState.Accepted);
            NUnit.Framework.Assert.AreEqual(2, app.GetAppAttempts().Count);
            NUnit.Framework.Assert.IsTrue(((RMAppAttemptImpl)app.GetCurrentAppAttempt()).MayBeLastAttempt
                                              ());
            MockAM am_2 = MockRM.LaunchAndRegisterAM(app, rm1, nm1);

            am_2.WaitForState(RMAppAttemptState.Running);
            nm1.NodeHeartbeat(am_2.GetApplicationAttemptId(), 1, ContainerState.Complete);
            am_2.WaitForState(RMAppAttemptState.Failed);
            // current app should be failed.
            rm1.WaitForState(app.GetApplicationId(), RMAppState.Failed);
            ControlledClock clock = new ControlledClock(new SystemClock());
            // set window size to 6s
            RMAppImpl app1 = (RMAppImpl)rm1.SubmitApp(200, 6000);

            app1.SetSystemClock(clock);
            MockAM am1 = MockRM.LaunchAndRegisterAM(app1, rm1, nm1);

            // Fail attempt1 normally
            nm1.NodeHeartbeat(am1.GetApplicationAttemptId(), 1, ContainerState.Complete);
            am1.WaitForState(RMAppAttemptState.Failed);
            // launch the second attempt
            rm1.WaitForState(app1.GetApplicationId(), RMAppState.Accepted);
            NUnit.Framework.Assert.AreEqual(2, app1.GetAppAttempts().Count);
            RMAppAttempt attempt2 = app1.GetCurrentAppAttempt();

            NUnit.Framework.Assert.IsTrue(((RMAppAttemptImpl)attempt2).MayBeLastAttempt());
            MockAM am2 = MockRM.LaunchAndRegisterAM(app1, rm1, nm1);

            am2.WaitForState(RMAppAttemptState.Running);
            // wait for 6 seconds
            clock.SetTime(Runtime.CurrentTimeMillis() + 6 * 1000);
            // Fail attempt2 normally
            nm1.NodeHeartbeat(am2.GetApplicationAttemptId(), 1, ContainerState.Complete);
            am2.WaitForState(RMAppAttemptState.Failed);
            // can launch the third attempt successfully
            rm1.WaitForState(app1.GetApplicationId(), RMAppState.Accepted);
            NUnit.Framework.Assert.AreEqual(3, app1.GetAppAttempts().Count);
            RMAppAttempt attempt3 = app1.GetCurrentAppAttempt();

            clock.Reset();
            MockAM am3 = MockRM.LaunchAndRegisterAM(app1, rm1, nm1);

            am3.WaitForState(RMAppAttemptState.Running);
            // Restart rm.
            MockRM rm2 = new MockRM(conf, memStore);

            rm2.Start();
            // re-register the NM
            nm1.SetResourceTrackerService(rm2.GetResourceTrackerService());
            NMContainerStatus status = Org.Apache.Hadoop.Yarn.Util.Records.NewRecord <NMContainerStatus
                                                                                      >();

            status.SetContainerExitStatus(ContainerExitStatus.KilledByResourcemanager);
            status.SetContainerId(attempt3.GetMasterContainer().GetId());
            status.SetContainerState(ContainerState.Complete);
            status.SetDiagnostics(string.Empty);
            nm1.RegisterNode(Sharpen.Collections.SingletonList(status), null);
            rm2.WaitForState(attempt3.GetAppAttemptId(), RMAppAttemptState.Failed);
            rm2.WaitForState(app1.GetApplicationId(), RMAppState.Accepted);
            // Lauch Attempt 4
            MockAM am4 = rm2.WaitForNewAMToLaunchAndRegister(app1.GetApplicationId(), 4, nm1);

            // wait for 6 seconds
            clock.SetTime(Runtime.CurrentTimeMillis() + 6 * 1000);
            // Fail attempt4 normally
            nm1.NodeHeartbeat(am4.GetApplicationAttemptId(), 1, ContainerState.Complete);
            am4.WaitForState(RMAppAttemptState.Failed);
            // can launch the 5th attempt successfully
            rm2.WaitForState(app1.GetApplicationId(), RMAppState.Accepted);
            MockAM am5 = rm2.WaitForNewAMToLaunchAndRegister(app1.GetApplicationId(), 5, nm1);

            clock.Reset();
            am5.WaitForState(RMAppAttemptState.Running);
            // Fail attempt5 normally
            nm1.NodeHeartbeat(am5.GetApplicationAttemptId(), 1, ContainerState.Complete);
            am5.WaitForState(RMAppAttemptState.Failed);
            rm2.WaitForState(app1.GetApplicationId(), RMAppState.Failed);
            rm1.Stop();
            rm2.Stop();
        }
Exemple #10
0
        // AM container preempted, nm disk failure
        // should not be counted towards AM max retry count.
        /// <exception cref="System.Exception"/>
        public virtual void TestShouldNotCountFailureToMaxAttemptRetry()
        {
            YarnConfiguration conf = new YarnConfiguration();

            conf.SetClass(YarnConfiguration.RmScheduler, typeof(CapacityScheduler), typeof(ResourceScheduler
                                                                                           ));
            // explicitly set max-am-retry count as 1.
            conf.SetInt(YarnConfiguration.RmAmMaxAttempts, 1);
            conf.SetBoolean(YarnConfiguration.RecoveryEnabled, true);
            conf.Set(YarnConfiguration.RmStore, typeof(MemoryRMStateStore).FullName);
            MemoryRMStateStore memStore = new MemoryRMStateStore();

            memStore.Init(conf);
            MockRM rm1 = new MockRM(conf, memStore);

            rm1.Start();
            MockNM nm1 = new MockNM("127.0.0.1:1234", 8000, rm1.GetResourceTrackerService());

            nm1.RegisterNode();
            RMApp             app1        = rm1.SubmitApp(200);
            RMAppAttempt      attempt1    = app1.GetCurrentAppAttempt();
            MockAM            am1         = MockRM.LaunchAndRegisterAM(app1, rm1, nm1);
            CapacityScheduler scheduler   = (CapacityScheduler)rm1.GetResourceScheduler();
            ContainerId       amContainer = ContainerId.NewContainerId(am1.GetApplicationAttemptId(
                                                                           ), 1);

            // Preempt the first attempt;
            scheduler.KillContainer(scheduler.GetRMContainer(amContainer));
            am1.WaitForState(RMAppAttemptState.Failed);
            NUnit.Framework.Assert.IsTrue(!attempt1.ShouldCountTowardsMaxAttemptRetry());
            rm1.WaitForState(app1.GetApplicationId(), RMAppState.Accepted);
            ApplicationStateData appState = memStore.GetState().GetApplicationState()[app1.GetApplicationId
                                                                                          ()];
            // AM should be restarted even though max-am-attempt is 1.
            MockAM       am2      = rm1.WaitForNewAMToLaunchAndRegister(app1.GetApplicationId(), 2, nm1);
            RMAppAttempt attempt2 = app1.GetCurrentAppAttempt();

            NUnit.Framework.Assert.IsTrue(((RMAppAttemptImpl)attempt2).MayBeLastAttempt());
            // Preempt the second attempt.
            ContainerId amContainer2 = ContainerId.NewContainerId(am2.GetApplicationAttemptId
                                                                      (), 1);

            scheduler.KillContainer(scheduler.GetRMContainer(amContainer2));
            am2.WaitForState(RMAppAttemptState.Failed);
            NUnit.Framework.Assert.IsTrue(!attempt2.ShouldCountTowardsMaxAttemptRetry());
            rm1.WaitForState(app1.GetApplicationId(), RMAppState.Accepted);
            MockAM       am3      = rm1.WaitForNewAMToLaunchAndRegister(app1.GetApplicationId(), 3, nm1);
            RMAppAttempt attempt3 = app1.GetCurrentAppAttempt();

            NUnit.Framework.Assert.IsTrue(((RMAppAttemptImpl)attempt3).MayBeLastAttempt());
            // mimic NM disk_failure
            ContainerStatus containerStatus = Org.Apache.Hadoop.Yarn.Util.Records.NewRecord <ContainerStatus
                                                                                             >();

            containerStatus.SetContainerId(attempt3.GetMasterContainer().GetId());
            containerStatus.SetDiagnostics("mimic NM disk_failure");
            containerStatus.SetState(ContainerState.Complete);
            containerStatus.SetExitStatus(ContainerExitStatus.DisksFailed);
            IDictionary <ApplicationId, IList <ContainerStatus> > conts = new Dictionary <ApplicationId
                                                                                          , IList <ContainerStatus> >();

            conts[app1.GetApplicationId()] = Sharpen.Collections.SingletonList(containerStatus
                                                                               );
            nm1.NodeHeartbeat(conts, true);
            am3.WaitForState(RMAppAttemptState.Failed);
            NUnit.Framework.Assert.IsTrue(!attempt3.ShouldCountTowardsMaxAttemptRetry());
            NUnit.Framework.Assert.AreEqual(ContainerExitStatus.DisksFailed, appState.GetAttempt
                                                (am3.GetApplicationAttemptId()).GetAMContainerExitStatus());
            rm1.WaitForState(app1.GetApplicationId(), RMAppState.Accepted);
            MockAM       am4      = rm1.WaitForNewAMToLaunchAndRegister(app1.GetApplicationId(), 4, nm1);
            RMAppAttempt attempt4 = app1.GetCurrentAppAttempt();

            NUnit.Framework.Assert.IsTrue(((RMAppAttemptImpl)attempt4).MayBeLastAttempt());
            // create second NM, and register to rm1
            MockNM nm2 = new MockNM("127.0.0.1:2234", 8000, rm1.GetResourceTrackerService());

            nm2.RegisterNode();
            // nm1 heartbeats to report unhealthy
            // This will mimic ContainerExitStatus.ABORT
            nm1.NodeHeartbeat(false);
            am4.WaitForState(RMAppAttemptState.Failed);
            NUnit.Framework.Assert.IsTrue(!attempt4.ShouldCountTowardsMaxAttemptRetry());
            NUnit.Framework.Assert.AreEqual(ContainerExitStatus.Aborted, appState.GetAttempt(
                                                am4.GetApplicationAttemptId()).GetAMContainerExitStatus());
            // launch next AM in nm2
            nm2.NodeHeartbeat(true);
            MockAM       am5      = rm1.WaitForNewAMToLaunchAndRegister(app1.GetApplicationId(), 5, nm2);
            RMAppAttempt attempt5 = app1.GetCurrentAppAttempt();

            NUnit.Framework.Assert.IsTrue(((RMAppAttemptImpl)attempt5).MayBeLastAttempt());
            // fail the AM normally
            nm2.NodeHeartbeat(am5.GetApplicationAttemptId(), 1, ContainerState.Complete);
            am5.WaitForState(RMAppAttemptState.Failed);
            NUnit.Framework.Assert.IsTrue(attempt5.ShouldCountTowardsMaxAttemptRetry());
            // AM should not be restarted.
            rm1.WaitForState(app1.GetApplicationId(), RMAppState.Failed);
            NUnit.Framework.Assert.AreEqual(5, app1.GetAppAttempts().Count);
            rm1.Stop();
        }
        /// <exception cref="System.Exception"/>
        public virtual void TestUsageWithMultipleContainersAndRMRestart()
        {
            // Set max attempts to 1 so that when the first attempt fails, the app
            // won't try to start a new one.
            conf.SetInt(YarnConfiguration.RmAmMaxAttempts, 1);
            conf.SetBoolean(YarnConfiguration.RecoveryEnabled, true);
            conf.SetBoolean(YarnConfiguration.RmWorkPreservingRecoveryEnabled, false);
            MemoryRMStateStore memStore = new MemoryRMStateStore();

            memStore.Init(conf);
            MockRM rm0 = new MockRM(conf, memStore);

            rm0.Start();
            MockNM nm = new MockNM("127.0.0.1:1234", 65536, rm0.GetResourceTrackerService());

            nm.RegisterNode();
            RMApp app0 = rm0.SubmitApp(200);

            rm0.WaitForState(app0.GetApplicationId(), RMAppState.Accepted);
            RMAppAttempt         attempt0   = app0.GetCurrentAppAttempt();
            ApplicationAttemptId attemptId0 = attempt0.GetAppAttemptId();

            rm0.WaitForState(attemptId0, RMAppAttemptState.Scheduled);
            nm.NodeHeartbeat(true);
            rm0.WaitForState(attemptId0, RMAppAttemptState.Allocated);
            MockAM am0 = rm0.SendAMLaunched(attempt0.GetAppAttemptId());

            am0.RegisterAppAttempt();
            int NumContainers = 2;

            am0.Allocate("127.0.0.1", 1000, NumContainers, new AList <ContainerId>());
            nm.NodeHeartbeat(true);
            IList <Container> conts = am0.Allocate(new AList <ResourceRequest>(), new AList <ContainerId
                                                                                             >()).GetAllocatedContainers();

            while (conts.Count != NumContainers)
            {
                nm.NodeHeartbeat(true);
                Sharpen.Collections.AddAll(conts, am0.Allocate(new AList <ResourceRequest>(), new
                                                               AList <ContainerId>()).GetAllocatedContainers());
                Sharpen.Thread.Sleep(500);
            }
            // launch the 2nd and 3rd containers.
            foreach (Container c in conts)
            {
                nm.NodeHeartbeat(attempt0.GetAppAttemptId(), c.GetId().GetContainerId(), ContainerState
                                 .Running);
                rm0.WaitForState(nm, c.GetId(), RMContainerState.Running);
            }
            // Get the RMContainers for all of the live containers, to be used later
            // for metrics calculations and comparisons.
            ICollection <RMContainer> rmContainers = rm0.scheduler.GetSchedulerAppInfo(attempt0
                                                                                       .GetAppAttemptId()).GetLiveContainers();
            // Allow metrics to accumulate.
            int sleepInterval       = 1000;
            int cumulativeSleepTime = 0;

            while (app0.GetRMAppMetrics().GetMemorySeconds() <= 0 && cumulativeSleepTime < 5000
                   )
            {
                Sharpen.Thread.Sleep(sleepInterval);
                cumulativeSleepTime += sleepInterval;
            }
            // Stop all non-AM containers
            foreach (Container c_1 in conts)
            {
                if (c_1.GetId().GetContainerId() == 1)
                {
                    continue;
                }
                nm.NodeHeartbeat(attempt0.GetAppAttemptId(), c_1.GetId().GetContainerId(), ContainerState
                                 .Complete);
                rm0.WaitForState(nm, c_1.GetId(), RMContainerState.Completed);
            }
            // After all other containers have completed, manually complete the master
            // container in order to trigger a save to the state store of the resource
            // usage metrics. This will cause the attempt to fail, and, since the max
            // attempt retries is 1, the app will also fail. This is intentional so
            // that all containers will complete prior to saving.
            ContainerId cId = ContainerId.NewContainerId(attempt0.GetAppAttemptId(), 1);

            nm.NodeHeartbeat(attempt0.GetAppAttemptId(), cId.GetContainerId(), ContainerState
                             .Complete);
            rm0.WaitForState(nm, cId, RMContainerState.Completed);
            // Check that the container metrics match those from the app usage report.
            long memorySeconds = 0;
            long vcoreSeconds  = 0;

            foreach (RMContainer c_2 in rmContainers)
            {
                AggregateAppResourceUsage ru = CalculateContainerResourceMetrics(c_2);
                memorySeconds += ru.GetMemorySeconds();
                vcoreSeconds  += ru.GetVcoreSeconds();
            }
            RMAppMetrics metricsBefore = app0.GetRMAppMetrics();

            NUnit.Framework.Assert.AreEqual("Unexcpected MemorySeconds value", memorySeconds,
                                            metricsBefore.GetMemorySeconds());
            NUnit.Framework.Assert.AreEqual("Unexpected VcoreSeconds value", vcoreSeconds, metricsBefore
                                            .GetVcoreSeconds());
            // create new RM to represent RM restart. Load up the state store.
            MockRM rm1 = new MockRM(conf, memStore);

            rm1.Start();
            RMApp app0After = rm1.GetRMContext().GetRMApps()[app0.GetApplicationId()];
            // Compare container resource usage metrics from before and after restart.
            RMAppMetrics metricsAfter = app0After.GetRMAppMetrics();

            NUnit.Framework.Assert.AreEqual("Vcore seconds were not the same after RM Restart"
                                            , metricsBefore.GetVcoreSeconds(), metricsAfter.GetVcoreSeconds());
            NUnit.Framework.Assert.AreEqual("Memory seconds were not the same after RM Restart"
                                            , metricsBefore.GetMemorySeconds(), metricsAfter.GetMemorySeconds());
            rm0.Stop();
            rm0.Close();
            rm1.Stop();
            rm1.Close();
        }
        // The test verifies processing of NMContainerStatuses which are sent during
        // NM registration.
        // 1. Start the cluster-RM,NM,Submit app with 1024MB,Launch & register AM
        // 2. AM sends ResourceRequest for 1 container with memory 2048MB.
        // 3. Verify for number of container allocated by RM
        // 4. Verify Memory Usage by cluster, it should be 3072. AM memory + requested
        // memory. 1024 + 2048=3072
        // 5. Re-register NM by sending completed container status
        // 6. Verify for Memory Used, it should be 1024
        // 7. Send AM heatbeat to RM. Allocated response should contain completed
        // container.
        /// <exception cref="System.Exception"/>
        public virtual void TestProcessingNMContainerStatusesOnNMRestart()
        {
            conf.SetInt(YarnConfiguration.RmAmMaxAttempts, 1);
            MemoryRMStateStore memStore = new MemoryRMStateStore();

            memStore.Init(conf);
            // 1. Start the cluster-RM,NM,Submit app with 1024MB,Launch & register AM
            MockRM rm1 = new MockRM(conf, memStore);

            rm1.Start();
            int    nmMemory        = 8192;
            int    amMemory        = 1024;
            int    containerMemory = 2048;
            MockNM nm1             = new MockNM("127.0.0.1:1234", nmMemory, rm1.GetResourceTrackerService
                                                    ());

            nm1.RegisterNode();
            RMApp  app0 = rm1.SubmitApp(amMemory);
            MockAM am0  = MockRM.LaunchAndRegisterAM(app0, rm1, nm1);
            // 2. AM sends ResourceRequest for 1 container with memory 2048MB.
            int noOfContainers = 1;
            IList <Container> allocateContainers = am0.AllocateAndWaitForContainers(noOfContainers
                                                                                    , containerMemory, nm1);

            // 3. Verify for number of container allocated by RM
            NUnit.Framework.Assert.AreEqual(noOfContainers, allocateContainers.Count);
            Container container = allocateContainers[0];

            nm1.NodeHeartbeat(am0.GetApplicationAttemptId(), 1, ContainerState.Running);
            nm1.NodeHeartbeat(am0.GetApplicationAttemptId(), container.GetId().GetContainerId
                                  (), ContainerState.Running);
            rm1.WaitForState(app0.GetApplicationId(), RMAppState.Running);
            // 4. Verify Memory Usage by cluster, it should be 3072. AM memory +
            // requested memory. 1024 + 2048=3072
            ResourceScheduler rs = rm1.GetRMContext().GetScheduler();
            int allocatedMB      = rs.GetRootQueueMetrics().GetAllocatedMB();

            NUnit.Framework.Assert.AreEqual(amMemory + containerMemory, allocatedMB);
            // 5. Re-register NM by sending completed container status
            IList <NMContainerStatus> nMContainerStatusForApp = CreateNMContainerStatusForApp(
                am0);

            nm1.RegisterNode(nMContainerStatusForApp, Arrays.AsList(app0.GetApplicationId()));
            WaitForClusterMemory(nm1, rs, amMemory);
            // 6. Verify for Memory Used, it should be 1024
            NUnit.Framework.Assert.AreEqual(amMemory, rs.GetRootQueueMetrics().GetAllocatedMB
                                                ());
            // 7. Send AM heatbeat to RM. Allocated response should contain completed
            // container
            AllocateRequest req = AllocateRequest.NewInstance(0, 0F, new AList <ResourceRequest
                                                                                >(), new AList <ContainerId>(), null);
            AllocateResponse        allocate = am0.Allocate(req);
            IList <ContainerStatus> completedContainersStatuses = allocate.GetCompletedContainersStatuses
                                                                      ();

            NUnit.Framework.Assert.AreEqual(noOfContainers, completedContainersStatuses.Count
                                            );
            // Application clean up should happen Cluster memory used is 0
            nm1.NodeHeartbeat(am0.GetApplicationAttemptId(), 1, ContainerState.Complete);
            WaitForClusterMemory(nm1, rs, 0);
            rm1.Stop();
        }
        // Test does major 6 steps verification.
        // Step-1 : AMRMClient send allocate request for 2 container requests
        // Step-2 : 2 containers are allocated by RM.
        // Step-3 : AM Send 1 containerRequest(cRequest3) and 1 releaseRequests to
        // RM
        // Step-4 : On RM restart, AM(does not know RM is restarted) sends additional
        // containerRequest(cRequest4) and blacklisted nodes.
        // Intern RM send resync command
        // Step-5 : Allocater after resync command & new containerRequest(cRequest5)
        // Step-6 : RM allocates containers i.e cRequest3,cRequest4 and cRequest5
        /// <exception cref="System.Exception"/>
        public virtual void TestAMRMClientResendsRequestsOnRMRestart()
        {
            UserGroupInformation.SetLoginUser(null);
            MemoryRMStateStore memStore = new MemoryRMStateStore();

            memStore.Init(conf);
            // Phase-1 Start 1st RM
            TestAMRMClientOnRMRestart.MyResourceManager rm1 = new TestAMRMClientOnRMRestart.MyResourceManager
                                                                  (conf, memStore);
            rm1.Start();
            DrainDispatcher dispatcher = (DrainDispatcher)rm1.GetRMContext().GetDispatcher();
            // Submit the application
            RMApp app = rm1.SubmitApp(1024);

            dispatcher.Await();
            MockNM nm1 = new MockNM("h1:1234", 15120, rm1.GetResourceTrackerService());

            nm1.RegisterNode();
            nm1.NodeHeartbeat(true);
            // Node heartbeat
            dispatcher.Await();
            ApplicationAttemptId appAttemptId = app.GetCurrentAppAttempt().GetAppAttemptId();

            rm1.SendAMLaunched(appAttemptId);
            dispatcher.Await();
            Org.Apache.Hadoop.Security.Token.Token <AMRMTokenIdentifier> token = rm1.GetRMContext
                                                                                     ().GetRMApps()[appAttemptId.GetApplicationId()].GetRMAppAttempt(appAttemptId).GetAMRMToken
                                                                                     ();
            UserGroupInformation ugi = UserGroupInformation.GetCurrentUser();

            ugi.AddTokenIdentifier(token.DecodeIdentifier());
            // Step-1 : AMRMClient send allocate request for 2 ContainerRequest
            // cRequest1 = h1 and cRequest2 = h1,h2
            // blacklisted nodes = h2
            AMRMClient <AMRMClient.ContainerRequest> amClient = new TestAMRMClientOnRMRestart.MyAMRMClientImpl
                                                                    (rm1);

            amClient.Init(conf);
            amClient.Start();
            amClient.RegisterApplicationMaster("Host", 10000, string.Empty);
            AMRMClient.ContainerRequest cRequest1 = CreateReq(1, 1024, new string[] { "h1" });
            amClient.AddContainerRequest(cRequest1);
            AMRMClient.ContainerRequest cRequest2 = CreateReq(1, 1024, new string[] { "h1", "h2" });
            amClient.AddContainerRequest(cRequest2);
            IList <string> blacklistAdditions = new AList <string>();
            IList <string> blacklistRemoval   = new AList <string>();

            blacklistAdditions.AddItem("h2");
            blacklistRemoval.AddItem("h10");
            amClient.UpdateBlacklist(blacklistAdditions, blacklistRemoval);
            blacklistAdditions.Remove("h2");
            // remove from local list
            AllocateResponse allocateResponse = amClient.Allocate(0.1f);

            dispatcher.Await();
            NUnit.Framework.Assert.AreEqual("No of assignments must be 0", 0, allocateResponse
                                            .GetAllocatedContainers().Count);
            // Why 4 ask, why not 3 ask even h2 is blacklisted?
            // On blacklisting host,applicationmaster has to remove ask request from
            // remoterequest table.Here,test does not remove explicitely
            AssertAsksAndReleases(4, 0, rm1);
            AssertBlacklistAdditionsAndRemovals(1, 1, rm1);
            // Step-2 : NM heart beat is sent.
            // On 2nd AM allocate request, RM allocates 2 containers to AM
            nm1.NodeHeartbeat(true);
            // Node heartbeat
            dispatcher.Await();
            allocateResponse = amClient.Allocate(0.2f);
            dispatcher.Await();
            // 2 containers are allocated i.e for cRequest1 and cRequest2.
            NUnit.Framework.Assert.AreEqual("No of assignments must be 0", 2, allocateResponse
                                            .GetAllocatedContainers().Count);
            AssertAsksAndReleases(0, 0, rm1);
            AssertBlacklistAdditionsAndRemovals(0, 0, rm1);
            IList <Container> allocatedContainers = allocateResponse.GetAllocatedContainers();

            // removed allocated container requests
            amClient.RemoveContainerRequest(cRequest1);
            amClient.RemoveContainerRequest(cRequest2);
            allocateResponse = amClient.Allocate(0.2f);
            dispatcher.Await();
            NUnit.Framework.Assert.AreEqual("No of assignments must be 0", 0, allocateResponse
                                            .GetAllocatedContainers().Count);
            AssertAsksAndReleases(4, 0, rm1);
            AssertBlacklistAdditionsAndRemovals(0, 0, rm1);
            // Step-3 : Send 1 containerRequest and 1 releaseRequests to RM
            AMRMClient.ContainerRequest cRequest3 = CreateReq(1, 1024, new string[] { "h1" });
            amClient.AddContainerRequest(cRequest3);
            int pendingRelease         = 0;
            IEnumerator <Container> it = allocatedContainers.GetEnumerator();

            while (it.HasNext())
            {
                amClient.ReleaseAssignedContainer(it.Next().GetId());
                pendingRelease++;
                it.Remove();
                break;
            }
            // remove one container
            allocateResponse = amClient.Allocate(0.3f);
            dispatcher.Await();
            NUnit.Framework.Assert.AreEqual("No of assignments must be 0", 0, allocateResponse
                                            .GetAllocatedContainers().Count);
            AssertAsksAndReleases(3, pendingRelease, rm1);
            AssertBlacklistAdditionsAndRemovals(0, 0, rm1);
            int completedContainer = allocateResponse.GetCompletedContainersStatuses().Count;

            pendingRelease -= completedContainer;
            // Phase-2 start 2nd RM is up
            TestAMRMClientOnRMRestart.MyResourceManager rm2 = new TestAMRMClientOnRMRestart.MyResourceManager
                                                                  (conf, memStore);
            rm2.Start();
            nm1.SetResourceTrackerService(rm2.GetResourceTrackerService());
            ((TestAMRMClientOnRMRestart.MyAMRMClientImpl)amClient).UpdateRMProxy(rm2);
            dispatcher = (DrainDispatcher)rm2.GetRMContext().GetDispatcher();
            // NM should be rebooted on heartbeat, even first heartbeat for nm2
            NodeHeartbeatResponse hbResponse = nm1.NodeHeartbeat(true);

            NUnit.Framework.Assert.AreEqual(NodeAction.Resync, hbResponse.GetNodeAction());
            // new NM to represent NM re-register
            nm1 = new MockNM("h1:1234", 10240, rm2.GetResourceTrackerService());
            nm1.RegisterNode();
            nm1.NodeHeartbeat(true);
            dispatcher.Await();
            blacklistAdditions.AddItem("h3");
            amClient.UpdateBlacklist(blacklistAdditions, null);
            blacklistAdditions.Remove("h3");
            it = allocatedContainers.GetEnumerator();
            while (it.HasNext())
            {
                amClient.ReleaseAssignedContainer(it.Next().GetId());
                pendingRelease++;
                it.Remove();
            }
            AMRMClient.ContainerRequest cRequest4 = CreateReq(1, 1024, new string[] { "h1", "h2" });
            amClient.AddContainerRequest(cRequest4);
            // Step-4 : On RM restart, AM(does not know RM is restarted) sends
            // additional
            // containerRequest and blacklisted nodes.
            // Intern RM send resync command,AMRMClient resend allocate request
            allocateResponse = amClient.Allocate(0.3f);
            dispatcher.Await();
            completedContainer = allocateResponse.GetCompletedContainersStatuses().Count;
            pendingRelease    -= completedContainer;
            AssertAsksAndReleases(4, pendingRelease, rm2);
            AssertBlacklistAdditionsAndRemovals(2, 0, rm2);
            AMRMClient.ContainerRequest cRequest5 = CreateReq(1, 1024, new string[] { "h1", "h2"
                                                                                      , "h3" });
            amClient.AddContainerRequest(cRequest5);
            // Step-5 : Allocater after resync command
            allocateResponse = amClient.Allocate(0.5f);
            dispatcher.Await();
            NUnit.Framework.Assert.AreEqual("No of assignments must be 0", 0, allocateResponse
                                            .GetAllocatedContainers().Count);
            AssertAsksAndReleases(5, 0, rm2);
            AssertBlacklistAdditionsAndRemovals(0, 0, rm2);
            int noAssignedContainer = 0;
            int count = 5;

            while (count-- > 0)
            {
                nm1.NodeHeartbeat(true);
                dispatcher.Await();
                allocateResponse = amClient.Allocate(0.5f);
                dispatcher.Await();
                noAssignedContainer += allocateResponse.GetAllocatedContainers().Count;
                if (noAssignedContainer == 3)
                {
                    break;
                }
                Sharpen.Thread.Sleep(1000);
            }
            // Step-6 : RM allocates containers i.e cRequest3,cRequest4 and cRequest5
            NUnit.Framework.Assert.AreEqual("Number of container should be 3", 3, noAssignedContainer
                                            );
            amClient.Stop();
            rm1.Stop();
            rm2.Stop();
        }
        // Test verify for AM issued with rolled-over AMRMToken
        // is still able to communicate with restarted RM.
        /// <exception cref="System.Exception"/>
        public virtual void TestAMRMClientOnAMRMTokenRollOverOnRMRestart()
        {
            conf.SetLong(YarnConfiguration.RmAmrmTokenMasterKeyRollingIntervalSecs, rolling_interval_sec
                         );
            conf.SetLong(YarnConfiguration.RmAmExpiryIntervalMs, am_expire_ms);
            MemoryRMStateStore memStore = new MemoryRMStateStore();

            memStore.Init(conf);
            // start first RM
            TestAMRMClientOnRMRestart.MyResourceManager2 rm1 = new TestAMRMClientOnRMRestart.MyResourceManager2
                                                                   (conf, memStore);
            rm1.Start();
            DrainDispatcher dispatcher = (DrainDispatcher)rm1.GetRMContext().GetDispatcher();
            long            startTime  = Runtime.CurrentTimeMillis();
            // Submit the application
            RMApp app = rm1.SubmitApp(1024);

            dispatcher.Await();
            MockNM nm1 = new MockNM("h1:1234", 15120, rm1.GetResourceTrackerService());

            nm1.RegisterNode();
            nm1.NodeHeartbeat(true);
            // Node heartbeat
            dispatcher.Await();
            ApplicationAttemptId appAttemptId = app.GetCurrentAppAttempt().GetAppAttemptId();

            rm1.SendAMLaunched(appAttemptId);
            dispatcher.Await();
            AMRMTokenSecretManager amrmTokenSecretManagerForRM1 = rm1.GetRMContext().GetAMRMTokenSecretManager
                                                                      ();

            Org.Apache.Hadoop.Security.Token.Token <AMRMTokenIdentifier> token = amrmTokenSecretManagerForRM1
                                                                                 .CreateAndGetAMRMToken(appAttemptId);
            UserGroupInformation ugi = UserGroupInformation.GetCurrentUser();

            ugi.AddTokenIdentifier(token.DecodeIdentifier());
            AMRMClient <AMRMClient.ContainerRequest> amClient = new TestAMRMClientOnRMRestart.MyAMRMClientImpl
                                                                    (rm1);

            amClient.Init(conf);
            amClient.Start();
            amClient.RegisterApplicationMaster("h1", 10000, string.Empty);
            amClient.Allocate(0.1f);
            // Wait for enough time and make sure the roll_over happens
            // At mean time, the old AMRMToken should continue to work
            while (Runtime.CurrentTimeMillis() - startTime < rolling_interval_sec * 1000)
            {
                amClient.Allocate(0.1f);
                try
                {
                    Sharpen.Thread.Sleep(1000);
                }
                catch (Exception)
                {
                }
            }
            // DO NOTHING
            NUnit.Framework.Assert.IsTrue(amrmTokenSecretManagerForRM1.GetMasterKey().GetMasterKey
                                              ().GetKeyId() != token.DecodeIdentifier().GetKeyId());
            amClient.Allocate(0.1f);
            // active the nextMasterKey, and replace the currentMasterKey
            Org.Apache.Hadoop.Security.Token.Token <AMRMTokenIdentifier> newToken = amrmTokenSecretManagerForRM1
                                                                                    .CreateAndGetAMRMToken(appAttemptId);
            int waitCount = 0;

            while (waitCount++ <= 50)
            {
                if (amrmTokenSecretManagerForRM1.GetCurrnetMasterKeyData().GetMasterKey().GetKeyId
                        () != token.DecodeIdentifier().GetKeyId())
                {
                    break;
                }
                try
                {
                    amClient.Allocate(0.1f);
                }
                catch (Exception)
                {
                    break;
                }
                Sharpen.Thread.Sleep(500);
            }
            NUnit.Framework.Assert.IsTrue(amrmTokenSecretManagerForRM1.GetNextMasterKeyData()
                                          == null);
            NUnit.Framework.Assert.IsTrue(amrmTokenSecretManagerForRM1.GetCurrnetMasterKeyData
                                              ().GetMasterKey().GetKeyId() == newToken.DecodeIdentifier().GetKeyId());
            // start 2nd RM
            conf.Set(YarnConfiguration.RmSchedulerAddress, "0.0.0.0:9030");
            TestAMRMClientOnRMRestart.MyResourceManager2 rm2 = new TestAMRMClientOnRMRestart.MyResourceManager2
                                                                   (conf, memStore);
            rm2.Start();
            nm1.SetResourceTrackerService(rm2.GetResourceTrackerService());
            ((TestAMRMClientOnRMRestart.MyAMRMClientImpl)amClient).UpdateRMProxy(rm2);
            dispatcher = (DrainDispatcher)rm2.GetRMContext().GetDispatcher();
            AMRMTokenSecretManager amrmTokenSecretManagerForRM2 = rm2.GetRMContext().GetAMRMTokenSecretManager
                                                                      ();

            NUnit.Framework.Assert.IsTrue(amrmTokenSecretManagerForRM2.GetCurrnetMasterKeyData
                                              ().GetMasterKey().GetKeyId() == newToken.DecodeIdentifier().GetKeyId());
            NUnit.Framework.Assert.IsTrue(amrmTokenSecretManagerForRM2.GetNextMasterKeyData()
                                          == null);
            try
            {
                UserGroupInformation testUser = UserGroupInformation.CreateRemoteUser("testUser");
                SecurityUtil.SetTokenService(token, rm2.GetApplicationMasterService().GetBindAddress
                                                 ());
                testUser.AddToken(token);
                testUser.DoAs(new _PrivilegedAction_480(rm2)).Allocate(Org.Apache.Hadoop.Yarn.Util.Records
                                                                       .NewRecord <AllocateRequest>());
                NUnit.Framework.Assert.Fail("The old Token should not work");
            }
            catch (Exception ex)
            {
                NUnit.Framework.Assert.IsTrue(ex is SecretManager.InvalidToken);
                NUnit.Framework.Assert.IsTrue(ex.Message.Contains("Invalid AMRMToken from " + token
                                                                  .DecodeIdentifier().GetApplicationAttemptId()));
            }
            // make sure the recovered AMRMToken works for new RM
            amClient.Allocate(0.1f);
            amClient.UnregisterApplicationMaster(FinalApplicationStatus.Succeeded, null, null
                                                 );
            amClient.Stop();
            rm1.Stop();
            rm2.Stop();
        }
        // Test verify for
        // 1. AM try to unregister without registering
        // 2. AM register to RM, and try to unregister immediately after RM restart
        /// <exception cref="System.Exception"/>
        public virtual void TestAMRMClientForUnregisterAMOnRMRestart()
        {
            MemoryRMStateStore memStore = new MemoryRMStateStore();

            memStore.Init(conf);
            // Phase-1 Start 1st RM
            TestAMRMClientOnRMRestart.MyResourceManager rm1 = new TestAMRMClientOnRMRestart.MyResourceManager
                                                                  (conf, memStore);
            rm1.Start();
            DrainDispatcher dispatcher = (DrainDispatcher)rm1.GetRMContext().GetDispatcher();
            // Submit the application
            RMApp app = rm1.SubmitApp(1024);

            dispatcher.Await();
            MockNM nm1 = new MockNM("h1:1234", 15120, rm1.GetResourceTrackerService());

            nm1.RegisterNode();
            nm1.NodeHeartbeat(true);
            // Node heartbeat
            dispatcher.Await();
            ApplicationAttemptId appAttemptId = app.GetCurrentAppAttempt().GetAppAttemptId();

            rm1.SendAMLaunched(appAttemptId);
            dispatcher.Await();
            Org.Apache.Hadoop.Security.Token.Token <AMRMTokenIdentifier> token = rm1.GetRMContext
                                                                                     ().GetRMApps()[appAttemptId.GetApplicationId()].GetRMAppAttempt(appAttemptId).GetAMRMToken
                                                                                     ();
            UserGroupInformation ugi = UserGroupInformation.GetCurrentUser();

            ugi.AddTokenIdentifier(token.DecodeIdentifier());
            AMRMClient <AMRMClient.ContainerRequest> amClient = new TestAMRMClientOnRMRestart.MyAMRMClientImpl
                                                                    (rm1);

            amClient.Init(conf);
            amClient.Start();
            amClient.RegisterApplicationMaster("h1", 10000, string.Empty);
            amClient.Allocate(0.1f);
            // Phase-2 start 2nd RM is up
            TestAMRMClientOnRMRestart.MyResourceManager rm2 = new TestAMRMClientOnRMRestart.MyResourceManager
                                                                  (conf, memStore);
            rm2.Start();
            nm1.SetResourceTrackerService(rm2.GetResourceTrackerService());
            ((TestAMRMClientOnRMRestart.MyAMRMClientImpl)amClient).UpdateRMProxy(rm2);
            dispatcher = (DrainDispatcher)rm2.GetRMContext().GetDispatcher();
            // NM should be rebooted on heartbeat, even first heartbeat for nm2
            NodeHeartbeatResponse hbResponse = nm1.NodeHeartbeat(true);

            NUnit.Framework.Assert.AreEqual(NodeAction.Resync, hbResponse.GetNodeAction());
            // new NM to represent NM re-register
            nm1 = new MockNM("h1:1234", 10240, rm2.GetResourceTrackerService());
            ContainerId       containerId     = ContainerId.NewContainerId(appAttemptId, 1);
            NMContainerStatus containerReport = NMContainerStatus.NewInstance(containerId, ContainerState
                                                                              .Running, Resource.NewInstance(1024, 1), "recover container", 0, Priority.NewInstance
                                                                                  (0), 0);

            nm1.RegisterNode(Arrays.AsList(containerReport), null);
            nm1.NodeHeartbeat(true);
            dispatcher.Await();
            amClient.UnregisterApplicationMaster(FinalApplicationStatus.Succeeded, null, null
                                                 );
            rm2.WaitForState(appAttemptId, RMAppAttemptState.Finishing);
            nm1.NodeHeartbeat(appAttemptId, 1, ContainerState.Complete);
            rm2.WaitForState(appAttemptId, RMAppAttemptState.Finished);
            rm2.WaitForState(app.GetApplicationId(), RMAppState.Finished);
            amClient.Stop();
            rm1.Stop();
            rm2.Stop();
        }