// Test verify for AM issued with rolled-over AMRMToken
        // is still able to communicate with restarted RM.
        /// <exception cref="System.Exception"/>
        public virtual void TestAMRMClientOnAMRMTokenRollOverOnRMRestart()
        {
            conf.SetLong(YarnConfiguration.RmAmrmTokenMasterKeyRollingIntervalSecs, rolling_interval_sec
                         );
            conf.SetLong(YarnConfiguration.RmAmExpiryIntervalMs, am_expire_ms);
            MemoryRMStateStore memStore = new MemoryRMStateStore();

            memStore.Init(conf);
            // start first RM
            TestAMRMClientOnRMRestart.MyResourceManager2 rm1 = new TestAMRMClientOnRMRestart.MyResourceManager2
                                                                   (conf, memStore);
            rm1.Start();
            DrainDispatcher dispatcher = (DrainDispatcher)rm1.GetRMContext().GetDispatcher();
            long            startTime  = Runtime.CurrentTimeMillis();
            // Submit the application
            RMApp app = rm1.SubmitApp(1024);

            dispatcher.Await();
            MockNM nm1 = new MockNM("h1:1234", 15120, rm1.GetResourceTrackerService());

            nm1.RegisterNode();
            nm1.NodeHeartbeat(true);
            // Node heartbeat
            dispatcher.Await();
            ApplicationAttemptId appAttemptId = app.GetCurrentAppAttempt().GetAppAttemptId();

            rm1.SendAMLaunched(appAttemptId);
            dispatcher.Await();
            AMRMTokenSecretManager amrmTokenSecretManagerForRM1 = rm1.GetRMContext().GetAMRMTokenSecretManager
                                                                      ();

            Org.Apache.Hadoop.Security.Token.Token <AMRMTokenIdentifier> token = amrmTokenSecretManagerForRM1
                                                                                 .CreateAndGetAMRMToken(appAttemptId);
            UserGroupInformation ugi = UserGroupInformation.GetCurrentUser();

            ugi.AddTokenIdentifier(token.DecodeIdentifier());
            AMRMClient <AMRMClient.ContainerRequest> amClient = new TestAMRMClientOnRMRestart.MyAMRMClientImpl
                                                                    (rm1);

            amClient.Init(conf);
            amClient.Start();
            amClient.RegisterApplicationMaster("h1", 10000, string.Empty);
            amClient.Allocate(0.1f);
            // Wait for enough time and make sure the roll_over happens
            // At mean time, the old AMRMToken should continue to work
            while (Runtime.CurrentTimeMillis() - startTime < rolling_interval_sec * 1000)
            {
                amClient.Allocate(0.1f);
                try
                {
                    Sharpen.Thread.Sleep(1000);
                }
                catch (Exception)
                {
                }
            }
            // DO NOTHING
            NUnit.Framework.Assert.IsTrue(amrmTokenSecretManagerForRM1.GetMasterKey().GetMasterKey
                                              ().GetKeyId() != token.DecodeIdentifier().GetKeyId());
            amClient.Allocate(0.1f);
            // active the nextMasterKey, and replace the currentMasterKey
            Org.Apache.Hadoop.Security.Token.Token <AMRMTokenIdentifier> newToken = amrmTokenSecretManagerForRM1
                                                                                    .CreateAndGetAMRMToken(appAttemptId);
            int waitCount = 0;

            while (waitCount++ <= 50)
            {
                if (amrmTokenSecretManagerForRM1.GetCurrnetMasterKeyData().GetMasterKey().GetKeyId
                        () != token.DecodeIdentifier().GetKeyId())
                {
                    break;
                }
                try
                {
                    amClient.Allocate(0.1f);
                }
                catch (Exception)
                {
                    break;
                }
                Sharpen.Thread.Sleep(500);
            }
            NUnit.Framework.Assert.IsTrue(amrmTokenSecretManagerForRM1.GetNextMasterKeyData()
                                          == null);
            NUnit.Framework.Assert.IsTrue(amrmTokenSecretManagerForRM1.GetCurrnetMasterKeyData
                                              ().GetMasterKey().GetKeyId() == newToken.DecodeIdentifier().GetKeyId());
            // start 2nd RM
            conf.Set(YarnConfiguration.RmSchedulerAddress, "0.0.0.0:9030");
            TestAMRMClientOnRMRestart.MyResourceManager2 rm2 = new TestAMRMClientOnRMRestart.MyResourceManager2
                                                                   (conf, memStore);
            rm2.Start();
            nm1.SetResourceTrackerService(rm2.GetResourceTrackerService());
            ((TestAMRMClientOnRMRestart.MyAMRMClientImpl)amClient).UpdateRMProxy(rm2);
            dispatcher = (DrainDispatcher)rm2.GetRMContext().GetDispatcher();
            AMRMTokenSecretManager amrmTokenSecretManagerForRM2 = rm2.GetRMContext().GetAMRMTokenSecretManager
                                                                      ();

            NUnit.Framework.Assert.IsTrue(amrmTokenSecretManagerForRM2.GetCurrnetMasterKeyData
                                              ().GetMasterKey().GetKeyId() == newToken.DecodeIdentifier().GetKeyId());
            NUnit.Framework.Assert.IsTrue(amrmTokenSecretManagerForRM2.GetNextMasterKeyData()
                                          == null);
            try
            {
                UserGroupInformation testUser = UserGroupInformation.CreateRemoteUser("testUser");
                SecurityUtil.SetTokenService(token, rm2.GetApplicationMasterService().GetBindAddress
                                                 ());
                testUser.AddToken(token);
                testUser.DoAs(new _PrivilegedAction_480(rm2)).Allocate(Org.Apache.Hadoop.Yarn.Util.Records
                                                                       .NewRecord <AllocateRequest>());
                NUnit.Framework.Assert.Fail("The old Token should not work");
            }
            catch (Exception ex)
            {
                NUnit.Framework.Assert.IsTrue(ex is SecretManager.InvalidToken);
                NUnit.Framework.Assert.IsTrue(ex.Message.Contains("Invalid AMRMToken from " + token
                                                                  .DecodeIdentifier().GetApplicationAttemptId()));
            }
            // make sure the recovered AMRMToken works for new RM
            amClient.Allocate(0.1f);
            amClient.UnregisterApplicationMaster(FinalApplicationStatus.Succeeded, null, null
                                                 );
            amClient.Stop();
            rm1.Stop();
            rm2.Stop();
        }
Ejemplo n.º 2
0
 /// <exception cref="System.Exception"/>
 private void SyncNodeHeartbeat(MockNM nm, bool health)
 {
     nm.NodeHeartbeat(health);
     dispatcher.Await();
 }
        // Test verify for
        // 1. AM try to unregister without registering
        // 2. AM register to RM, and try to unregister immediately after RM restart
        /// <exception cref="System.Exception"/>
        public virtual void TestAMRMClientForUnregisterAMOnRMRestart()
        {
            MemoryRMStateStore memStore = new MemoryRMStateStore();

            memStore.Init(conf);
            // Phase-1 Start 1st RM
            TestAMRMClientOnRMRestart.MyResourceManager rm1 = new TestAMRMClientOnRMRestart.MyResourceManager
                                                                  (conf, memStore);
            rm1.Start();
            DrainDispatcher dispatcher = (DrainDispatcher)rm1.GetRMContext().GetDispatcher();
            // Submit the application
            RMApp app = rm1.SubmitApp(1024);

            dispatcher.Await();
            MockNM nm1 = new MockNM("h1:1234", 15120, rm1.GetResourceTrackerService());

            nm1.RegisterNode();
            nm1.NodeHeartbeat(true);
            // Node heartbeat
            dispatcher.Await();
            ApplicationAttemptId appAttemptId = app.GetCurrentAppAttempt().GetAppAttemptId();

            rm1.SendAMLaunched(appAttemptId);
            dispatcher.Await();
            Org.Apache.Hadoop.Security.Token.Token <AMRMTokenIdentifier> token = rm1.GetRMContext
                                                                                     ().GetRMApps()[appAttemptId.GetApplicationId()].GetRMAppAttempt(appAttemptId).GetAMRMToken
                                                                                     ();
            UserGroupInformation ugi = UserGroupInformation.GetCurrentUser();

            ugi.AddTokenIdentifier(token.DecodeIdentifier());
            AMRMClient <AMRMClient.ContainerRequest> amClient = new TestAMRMClientOnRMRestart.MyAMRMClientImpl
                                                                    (rm1);

            amClient.Init(conf);
            amClient.Start();
            amClient.RegisterApplicationMaster("h1", 10000, string.Empty);
            amClient.Allocate(0.1f);
            // Phase-2 start 2nd RM is up
            TestAMRMClientOnRMRestart.MyResourceManager rm2 = new TestAMRMClientOnRMRestart.MyResourceManager
                                                                  (conf, memStore);
            rm2.Start();
            nm1.SetResourceTrackerService(rm2.GetResourceTrackerService());
            ((TestAMRMClientOnRMRestart.MyAMRMClientImpl)amClient).UpdateRMProxy(rm2);
            dispatcher = (DrainDispatcher)rm2.GetRMContext().GetDispatcher();
            // NM should be rebooted on heartbeat, even first heartbeat for nm2
            NodeHeartbeatResponse hbResponse = nm1.NodeHeartbeat(true);

            NUnit.Framework.Assert.AreEqual(NodeAction.Resync, hbResponse.GetNodeAction());
            // new NM to represent NM re-register
            nm1 = new MockNM("h1:1234", 10240, rm2.GetResourceTrackerService());
            ContainerId       containerId     = ContainerId.NewContainerId(appAttemptId, 1);
            NMContainerStatus containerReport = NMContainerStatus.NewInstance(containerId, ContainerState
                                                                              .Running, Resource.NewInstance(1024, 1), "recover container", 0, Priority.NewInstance
                                                                                  (0), 0);

            nm1.RegisterNode(Arrays.AsList(containerReport), null);
            nm1.NodeHeartbeat(true);
            dispatcher.Await();
            amClient.UnregisterApplicationMaster(FinalApplicationStatus.Succeeded, null, null
                                                 );
            rm2.WaitForState(appAttemptId, RMAppAttemptState.Finishing);
            nm1.NodeHeartbeat(appAttemptId, 1, ContainerState.Complete);
            rm2.WaitForState(appAttemptId, RMAppAttemptState.Finished);
            rm2.WaitForState(app.GetApplicationId(), RMAppState.Finished);
            amClient.Stop();
            rm1.Stop();
            rm2.Stop();
        }
Ejemplo n.º 4
0
        public virtual void TestAMRMUnusableNodes()
        {
            MockNM nm1 = rm.RegisterNode("127.0.0.1:1234", 10000);
            MockNM nm2 = rm.RegisterNode("127.0.0.2:1234", 10000);
            MockNM nm3 = rm.RegisterNode("127.0.0.3:1234", 10000);
            MockNM nm4 = rm.RegisterNode("127.0.0.4:1234", 10000);

            dispatcher.Await();
            RMApp app1 = rm.SubmitApp(2000);

            // Trigger the scheduling so the AM gets 'launched' on nm1
            nm1.NodeHeartbeat(true);
            RMAppAttempt attempt1 = app1.GetCurrentAppAttempt();
            MockAM       am1      = rm.SendAMLaunched(attempt1.GetAppAttemptId());

            // register AM returns no unusable node
            am1.RegisterAppAttempt();
            // allocate request returns no updated node
            AllocateRequest allocateRequest1 = AllocateRequest.NewInstance(0, 0F, null, null,
                                                                           null);
            AllocateResponse response1 = Allocate(attempt1.GetAppAttemptId(), allocateRequest1
                                                  );
            IList <NodeReport> updatedNodes = response1.GetUpdatedNodes();

            NUnit.Framework.Assert.AreEqual(0, updatedNodes.Count);
            SyncNodeHeartbeat(nm4, false);
            // allocate request returns updated node
            allocateRequest1 = AllocateRequest.NewInstance(response1.GetResponseId(), 0F, null
                                                           , null, null);
            response1    = Allocate(attempt1.GetAppAttemptId(), allocateRequest1);
            updatedNodes = response1.GetUpdatedNodes();
            NUnit.Framework.Assert.AreEqual(1, updatedNodes.Count);
            NodeReport nr = updatedNodes.GetEnumerator().Next();

            NUnit.Framework.Assert.AreEqual(nm4.GetNodeId(), nr.GetNodeId());
            NUnit.Framework.Assert.AreEqual(NodeState.Unhealthy, nr.GetNodeState());
            // resending the allocate request returns the same result
            response1    = Allocate(attempt1.GetAppAttemptId(), allocateRequest1);
            updatedNodes = response1.GetUpdatedNodes();
            NUnit.Framework.Assert.AreEqual(1, updatedNodes.Count);
            nr = updatedNodes.GetEnumerator().Next();
            NUnit.Framework.Assert.AreEqual(nm4.GetNodeId(), nr.GetNodeId());
            NUnit.Framework.Assert.AreEqual(NodeState.Unhealthy, nr.GetNodeState());
            SyncNodeLost(nm3);
            // subsequent allocate request returns delta
            allocateRequest1 = AllocateRequest.NewInstance(response1.GetResponseId(), 0F, null
                                                           , null, null);
            response1    = Allocate(attempt1.GetAppAttemptId(), allocateRequest1);
            updatedNodes = response1.GetUpdatedNodes();
            NUnit.Framework.Assert.AreEqual(1, updatedNodes.Count);
            nr = updatedNodes.GetEnumerator().Next();
            NUnit.Framework.Assert.AreEqual(nm3.GetNodeId(), nr.GetNodeId());
            NUnit.Framework.Assert.AreEqual(NodeState.Lost, nr.GetNodeState());
            // registering another AM gives it the complete failed list
            RMApp app2 = rm.SubmitApp(2000);

            // Trigger nm2 heartbeat so that AM gets launched on it
            nm2.NodeHeartbeat(true);
            RMAppAttempt attempt2 = app2.GetCurrentAppAttempt();
            MockAM       am2      = rm.SendAMLaunched(attempt2.GetAppAttemptId());

            // register AM returns all unusable nodes
            am2.RegisterAppAttempt();
            // allocate request returns no updated node
            AllocateRequest allocateRequest2 = AllocateRequest.NewInstance(0, 0F, null, null,
                                                                           null);
            AllocateResponse response2 = Allocate(attempt2.GetAppAttemptId(), allocateRequest2
                                                  );

            updatedNodes = response2.GetUpdatedNodes();
            NUnit.Framework.Assert.AreEqual(0, updatedNodes.Count);
            SyncNodeHeartbeat(nm4, true);
            // both AM's should get delta updated nodes
            allocateRequest1 = AllocateRequest.NewInstance(response1.GetResponseId(), 0F, null
                                                           , null, null);
            response1    = Allocate(attempt1.GetAppAttemptId(), allocateRequest1);
            updatedNodes = response1.GetUpdatedNodes();
            NUnit.Framework.Assert.AreEqual(1, updatedNodes.Count);
            nr = updatedNodes.GetEnumerator().Next();
            NUnit.Framework.Assert.AreEqual(nm4.GetNodeId(), nr.GetNodeId());
            NUnit.Framework.Assert.AreEqual(NodeState.Running, nr.GetNodeState());
            allocateRequest2 = AllocateRequest.NewInstance(response2.GetResponseId(), 0F, null
                                                           , null, null);
            response2    = Allocate(attempt2.GetAppAttemptId(), allocateRequest2);
            updatedNodes = response2.GetUpdatedNodes();
            NUnit.Framework.Assert.AreEqual(1, updatedNodes.Count);
            nr = updatedNodes.GetEnumerator().Next();
            NUnit.Framework.Assert.AreEqual(nm4.GetNodeId(), nr.GetNodeId());
            NUnit.Framework.Assert.AreEqual(NodeState.Running, nr.GetNodeState());
            // subsequent allocate calls should return no updated nodes
            allocateRequest2 = AllocateRequest.NewInstance(response2.GetResponseId(), 0F, null
                                                           , null, null);
            response2    = Allocate(attempt2.GetAppAttemptId(), allocateRequest2);
            updatedNodes = response2.GetUpdatedNodes();
            NUnit.Framework.Assert.AreEqual(0, updatedNodes.Count);
        }