예제 #1
0
        public virtual void TestAddNewExcludePathToConfiguration()
        {
            Configuration conf = new Configuration();

            rm = new MockRM(conf);
            rm.Start();
            MockNM         nm1     = rm.RegisterNode("host1:1234", 5120);
            MockNM         nm2     = rm.RegisterNode("host2:5678", 10240);
            ClusterMetrics metrics = ClusterMetrics.GetMetrics();

            System.Diagnostics.Debug.Assert((metrics != null));
            int initialMetricCount = metrics.GetNumDecommisionedNMs();
            NodeHeartbeatResponse nodeHeartbeat = nm1.NodeHeartbeat(true);

            NUnit.Framework.Assert.AreEqual(NodeAction.Normal, nodeHeartbeat.GetNodeAction());
            nodeHeartbeat = nm2.NodeHeartbeat(true);
            NUnit.Framework.Assert.AreEqual(NodeAction.Normal, nodeHeartbeat.GetNodeAction());
            WriteToHostsFile("host2");
            conf.Set(YarnConfiguration.RmNodesExcludeFilePath, hostFile.GetAbsolutePath());
            rm.GetNodesListManager().RefreshNodes(conf);
            CheckDecommissionedNMCount(rm, ++initialMetricCount);
            nodeHeartbeat = nm1.NodeHeartbeat(true);
            NUnit.Framework.Assert.AreEqual("Node should not have been decomissioned.", NodeAction
                                            .Normal, nodeHeartbeat.GetNodeAction());
            nodeHeartbeat = nm2.NodeHeartbeat(true);
            NUnit.Framework.Assert.AreEqual("Node should have been decomissioned but is in state"
                                            + nodeHeartbeat.GetNodeAction(), NodeAction.Shutdown, nodeHeartbeat.GetNodeAction
                                                ());
        }
예제 #2
0
        public virtual void TestReboot()
        {
            Configuration conf = new Configuration();

            rm = new MockRM(conf);
            rm.Start();
            MockNM nm1 = rm.RegisterNode("host1:1234", 5120);
            MockNM nm2 = rm.RegisterNode("host2:1234", 2048);
            int    initialMetricCount           = ClusterMetrics.GetMetrics().GetNumRebootedNMs();
            NodeHeartbeatResponse nodeHeartbeat = nm1.NodeHeartbeat(true);

            NUnit.Framework.Assert.IsTrue(NodeAction.Normal.Equals(nodeHeartbeat.GetNodeAction
                                                                       ()));
            nodeHeartbeat = nm2.NodeHeartbeat(new Dictionary <ApplicationId, IList <ContainerStatus
                                                                                    > >(), true, -100);
            NUnit.Framework.Assert.IsTrue(NodeAction.Resync.Equals(nodeHeartbeat.GetNodeAction
                                                                       ()));
            NUnit.Framework.Assert.AreEqual("Too far behind rm response id:0 nm response id:-100"
                                            , nodeHeartbeat.GetDiagnosticsMessage());
            CheckRebootedNMCount(rm, ++initialMetricCount);
        }
예제 #3
0
        public virtual void TestRPCResponseId()
        {
            string   node       = "localhost";
            Resource capability = BuilderUtils.NewResource(1024, 1);
            RegisterNodeManagerRequest request = recordFactory.NewRecordInstance <RegisterNodeManagerRequest
                                                                                  >();

            nodeId = NodeId.NewInstance(node, 1234);
            request.SetNodeId(nodeId);
            request.SetHttpPort(0);
            request.SetResource(capability);
            RegisterNodeManagerRequest request1 = recordFactory.NewRecordInstance <RegisterNodeManagerRequest
                                                                                   >();

            request1.SetNodeId(nodeId);
            request1.SetHttpPort(0);
            request1.SetResource(capability);
            resourceTrackerService.RegisterNodeManager(request1);
            NodeStatus nodeStatus = recordFactory.NewRecordInstance <NodeStatus>();

            nodeStatus.SetNodeId(nodeId);
            NodeHealthStatus nodeHealthStatus = recordFactory.NewRecordInstance <NodeHealthStatus
                                                                                 >();

            nodeHealthStatus.SetIsNodeHealthy(true);
            nodeStatus.SetNodeHealthStatus(nodeHealthStatus);
            NodeHeartbeatRequest nodeHeartBeatRequest = recordFactory.NewRecordInstance <NodeHeartbeatRequest
                                                                                         >();

            nodeHeartBeatRequest.SetNodeStatus(nodeStatus);
            nodeStatus.SetResponseId(0);
            NodeHeartbeatResponse response = resourceTrackerService.NodeHeartbeat(nodeHeartBeatRequest
                                                                                  );

            NUnit.Framework.Assert.IsTrue(response.GetResponseId() == 1);
            nodeStatus.SetResponseId(response.GetResponseId());
            response = resourceTrackerService.NodeHeartbeat(nodeHeartBeatRequest);
            NUnit.Framework.Assert.IsTrue(response.GetResponseId() == 2);
            /* try calling with less response id */
            response = resourceTrackerService.NodeHeartbeat(nodeHeartBeatRequest);
            NUnit.Framework.Assert.IsTrue(response.GetResponseId() == 2);
            nodeStatus.SetResponseId(0);
            response = resourceTrackerService.NodeHeartbeat(nodeHeartBeatRequest);
            NUnit.Framework.Assert.IsTrue(NodeAction.Resync.Equals(response.GetNodeAction()));
            NUnit.Framework.Assert.AreEqual("Too far behind rm response id:2 nm response id:0"
                                            , response.GetDiagnosticsMessage());
        }
예제 #4
0
        public virtual void TestDecommissionWithExcludeHosts()
        {
            Configuration conf = new Configuration();

            conf.Set(YarnConfiguration.RmNodesExcludeFilePath, hostFile.GetAbsolutePath());
            WriteToHostsFile(string.Empty);
            DrainDispatcher dispatcher = new DrainDispatcher();

            rm = new _MockRM_162(dispatcher, conf);
            rm.Start();
            MockNM nm1 = rm.RegisterNode("host1:1234", 5120);
            MockNM nm2 = rm.RegisterNode("host2:5678", 10240);
            MockNM nm3 = rm.RegisterNode("localhost:4433", 1024);

            dispatcher.Await();
            int metricCount = ClusterMetrics.GetMetrics().GetNumDecommisionedNMs();
            NodeHeartbeatResponse nodeHeartbeat = nm1.NodeHeartbeat(true);

            NUnit.Framework.Assert.IsTrue(NodeAction.Normal.Equals(nodeHeartbeat.GetNodeAction
                                                                       ()));
            nodeHeartbeat = nm2.NodeHeartbeat(true);
            NUnit.Framework.Assert.IsTrue(NodeAction.Normal.Equals(nodeHeartbeat.GetNodeAction
                                                                       ()));
            dispatcher.Await();
            // To test that IPs also work
            string ip = NetUtils.NormalizeHostName("localhost");

            WriteToHostsFile("host2", ip);
            rm.GetNodesListManager().RefreshNodes(conf);
            CheckDecommissionedNMCount(rm, metricCount + 2);
            nodeHeartbeat = nm1.NodeHeartbeat(true);
            NUnit.Framework.Assert.IsTrue(NodeAction.Normal.Equals(nodeHeartbeat.GetNodeAction
                                                                       ()));
            nodeHeartbeat = nm2.NodeHeartbeat(true);
            NUnit.Framework.Assert.IsTrue("The decommisioned metrics are not updated", NodeAction
                                          .Shutdown.Equals(nodeHeartbeat.GetNodeAction()));
            nodeHeartbeat = nm3.NodeHeartbeat(true);
            NUnit.Framework.Assert.IsTrue("The decommisioned metrics are not updated", NodeAction
                                          .Shutdown.Equals(nodeHeartbeat.GetNodeAction()));
            dispatcher.Await();
            WriteToHostsFile(string.Empty);
            rm.GetNodesListManager().RefreshNodes(conf);
            nm3 = rm.RegisterNode("localhost:4433", 1024);
            dispatcher.Await();
            nodeHeartbeat = nm3.NodeHeartbeat(true);
            dispatcher.Await();
            NUnit.Framework.Assert.IsTrue(NodeAction.Normal.Equals(nodeHeartbeat.GetNodeAction
                                                                       ()));
            // decommissined node is 1 since 1 node is rejoined after updating exclude
            // file
            CheckDecommissionedNMCount(rm, metricCount + 1);
        }
예제 #5
0
        public virtual void TestDecommissionWithIncludeHosts()
        {
            WriteToHostsFile("localhost", "host1", "host2");
            Configuration conf = new Configuration();

            conf.Set(YarnConfiguration.RmNodesIncludeFilePath, hostFile.GetAbsolutePath());
            rm = new MockRM(conf);
            rm.Start();
            MockNM         nm1     = rm.RegisterNode("host1:1234", 5120);
            MockNM         nm2     = rm.RegisterNode("host2:5678", 10240);
            MockNM         nm3     = rm.RegisterNode("localhost:4433", 1024);
            ClusterMetrics metrics = ClusterMetrics.GetMetrics();

            System.Diagnostics.Debug.Assert((metrics != null));
            int metricCount = metrics.GetNumDecommisionedNMs();
            NodeHeartbeatResponse nodeHeartbeat = nm1.NodeHeartbeat(true);

            NUnit.Framework.Assert.IsTrue(NodeAction.Normal.Equals(nodeHeartbeat.GetNodeAction
                                                                       ()));
            nodeHeartbeat = nm2.NodeHeartbeat(true);
            NUnit.Framework.Assert.IsTrue(NodeAction.Normal.Equals(nodeHeartbeat.GetNodeAction
                                                                       ()));
            nodeHeartbeat = nm3.NodeHeartbeat(true);
            NUnit.Framework.Assert.IsTrue(NodeAction.Normal.Equals(nodeHeartbeat.GetNodeAction
                                                                       ()));
            // To test that IPs also work
            string ip = NetUtils.NormalizeHostName("localhost");

            WriteToHostsFile("host1", ip);
            rm.GetNodesListManager().RefreshNodes(conf);
            CheckDecommissionedNMCount(rm, ++metricCount);
            nodeHeartbeat = nm1.NodeHeartbeat(true);
            NUnit.Framework.Assert.IsTrue(NodeAction.Normal.Equals(nodeHeartbeat.GetNodeAction
                                                                       ()));
            NUnit.Framework.Assert.AreEqual(1, ClusterMetrics.GetMetrics().GetNumDecommisionedNMs
                                                ());
            nodeHeartbeat = nm2.NodeHeartbeat(true);
            NUnit.Framework.Assert.IsTrue("Node is not decommisioned.", NodeAction.Shutdown.Equals
                                              (nodeHeartbeat.GetNodeAction()));
            nodeHeartbeat = nm3.NodeHeartbeat(true);
            NUnit.Framework.Assert.IsTrue(NodeAction.Normal.Equals(nodeHeartbeat.GetNodeAction
                                                                       ()));
            NUnit.Framework.Assert.AreEqual(metricCount, ClusterMetrics.GetMetrics().GetNumDecommisionedNMs
                                                ());
        }
예제 #6
0
            public void Run()
            {
                int lastHeartBeatID = 0;

                while (!this._enclosing.isStopped)
                {
                    try
                    {
                        NodeHeartbeatResponse response   = null;
                        NodeStatus            nodeStatus = this._enclosing.GetNodeStatus(lastHeartBeatID);
                        NodeHeartbeatRequest  request    = NodeHeartbeatRequest.NewInstance(nodeStatus, this.
                                                                                            _enclosing.context.GetContainerTokenSecretManager().GetCurrentKey(), this._enclosing
                                                                                            .context.GetNMTokenSecretManager().GetCurrentKey());
                        response = this._enclosing.resourceTracker.NodeHeartbeat(request);
                        this._enclosing.nextHeartBeatInterval = response.GetNextHeartBeatInterval();
                        this.UpdateMasterKeys(response);
                        if (response.GetNodeAction() == NodeAction.Shutdown)
                        {
                            Org.Apache.Hadoop.Yarn.Server.Nodemanager.NodeStatusUpdaterImpl.Log.Warn("Recieved SHUTDOWN signal from Resourcemanager as part of heartbeat,"
                                                                                                     + " hence shutting down.");
                            Org.Apache.Hadoop.Yarn.Server.Nodemanager.NodeStatusUpdaterImpl.Log.Warn("Message from ResourceManager: "
                                                                                                     + response.GetDiagnosticsMessage());
                            this._enclosing.context.SetDecommissioned(true);
                            this._enclosing.dispatcher.GetEventHandler().Handle(new NodeManagerEvent(NodeManagerEventType
                                                                                                     .Shutdown));
                            break;
                        }
                        if (response.GetNodeAction() == NodeAction.Resync)
                        {
                            Org.Apache.Hadoop.Yarn.Server.Nodemanager.NodeStatusUpdaterImpl.Log.Warn("Node is out of sync with ResourceManager,"
                                                                                                     + " hence resyncing.");
                            Org.Apache.Hadoop.Yarn.Server.Nodemanager.NodeStatusUpdaterImpl.Log.Warn("Message from ResourceManager: "
                                                                                                     + response.GetDiagnosticsMessage());
                            this._enclosing.rmIdentifier = ResourceManagerConstants.RmInvalidIdentifier;
                            this._enclosing.dispatcher.GetEventHandler().Handle(new NodeManagerEvent(NodeManagerEventType
                                                                                                     .Resync));
                            this._enclosing.pendingCompletedContainers.Clear();
                            break;
                        }
                        this._enclosing.RemoveOrTrackCompletedContainersFromContext(response.GetContainersToBeRemovedFromNM
                                                                                        ());
                        lastHeartBeatID = response.GetResponseId();
                        IList <ContainerId> containersToCleanup = response.GetContainersToCleanup();
                        if (!containersToCleanup.IsEmpty())
                        {
                            this._enclosing.dispatcher.GetEventHandler().Handle(new CMgrCompletedContainersEvent
                                                                                    (containersToCleanup, CMgrCompletedContainersEvent.Reason.ByResourcemanager));
                        }
                        IList <ApplicationId> appsToCleanup = response.GetApplicationsToCleanup();
                        this._enclosing.TrackAppsForKeepAlive(appsToCleanup);
                        if (!appsToCleanup.IsEmpty())
                        {
                            this._enclosing.dispatcher.GetEventHandler().Handle(new CMgrCompletedAppsEvent(appsToCleanup
                                                                                                           , CMgrCompletedAppsEvent.Reason.ByResourcemanager));
                        }
                        IDictionary <ApplicationId, ByteBuffer> systemCredentials = response.GetSystemCredentialsForApps
                                                                                        ();
                        if (systemCredentials != null && !systemCredentials.IsEmpty())
                        {
                            ((NodeManager.NMContext) this._enclosing.context).SetSystemCrendentialsForApps(Org.Apache.Hadoop.Yarn.Server.Nodemanager.NodeStatusUpdaterImpl
                                                                                                           .ParseCredentials(systemCredentials));
                        }
                    }
                    catch (ConnectException e)
                    {
                        this._enclosing.dispatcher.GetEventHandler().Handle(new NodeManagerEvent(NodeManagerEventType
                                                                                                 .Shutdown));
                        throw new YarnRuntimeException(e);
                    }
                    catch (Exception e)
                    {
                        Org.Apache.Hadoop.Yarn.Server.Nodemanager.NodeStatusUpdaterImpl.Log.Error("Caught exception in status-updater"
                                                                                                  , e);
                    }
                    finally
                    {
                        lock (this._enclosing.heartbeatMonitor)
                        {
                            this._enclosing.nextHeartBeatInterval = this._enclosing.nextHeartBeatInterval <=
                                                                    0 ? YarnConfiguration.DefaultRmNmHeartbeatIntervalMs : this._enclosing.nextHeartBeatInterval;
                            try
                            {
                                Sharpen.Runtime.Wait(this._enclosing.heartbeatMonitor, this._enclosing.nextHeartBeatInterval
                                                     );
                            }
                            catch (Exception)
                            {
                            }
                        }
                    }
                }
            }
예제 #7
0
        public virtual void TestReconnectNode()
        {
            DrainDispatcher dispatcher = new DrainDispatcher();

            rm = new _MockRM_567(this, dispatcher);
            rm.Start();
            MockNM nm1 = rm.RegisterNode("host1:1234", 5120);
            MockNM nm2 = rm.RegisterNode("host2:5678", 5120);

            nm1.NodeHeartbeat(true);
            nm2.NodeHeartbeat(false);
            dispatcher.Await();
            CheckUnealthyNMCount(rm, nm2, true, 1);
            int          expectedNMs = ClusterMetrics.GetMetrics().GetNumActiveNMs();
            QueueMetrics metrics     = rm.GetResourceScheduler().GetRootQueueMetrics();

            // TODO Metrics incorrect in case of the FifoScheduler
            NUnit.Framework.Assert.AreEqual(5120, metrics.GetAvailableMB());
            // reconnect of healthy node
            nm1 = rm.RegisterNode("host1:1234", 5120);
            NodeHeartbeatResponse response = nm1.NodeHeartbeat(true);

            NUnit.Framework.Assert.IsTrue(NodeAction.Normal.Equals(response.GetNodeAction()));
            dispatcher.Await();
            NUnit.Framework.Assert.AreEqual(expectedNMs, ClusterMetrics.GetMetrics().GetNumActiveNMs
                                                ());
            CheckUnealthyNMCount(rm, nm2, true, 1);
            // reconnect of unhealthy node
            nm2      = rm.RegisterNode("host2:5678", 5120);
            response = nm2.NodeHeartbeat(false);
            NUnit.Framework.Assert.IsTrue(NodeAction.Normal.Equals(response.GetNodeAction()));
            dispatcher.Await();
            NUnit.Framework.Assert.AreEqual(expectedNMs, ClusterMetrics.GetMetrics().GetNumActiveNMs
                                                ());
            CheckUnealthyNMCount(rm, nm2, true, 1);
            // unhealthy node changed back to healthy
            nm2 = rm.RegisterNode("host2:5678", 5120);
            dispatcher.Await();
            response = nm2.NodeHeartbeat(true);
            response = nm2.NodeHeartbeat(true);
            dispatcher.Await();
            NUnit.Framework.Assert.AreEqual(5120 + 5120, metrics.GetAvailableMB());
            // reconnect of node with changed capability
            nm1 = rm.RegisterNode("host2:5678", 10240);
            dispatcher.Await();
            response = nm1.NodeHeartbeat(true);
            dispatcher.Await();
            NUnit.Framework.Assert.IsTrue(NodeAction.Normal.Equals(response.GetNodeAction()));
            NUnit.Framework.Assert.AreEqual(5120 + 10240, metrics.GetAvailableMB());
            // reconnect of node with changed capability and running applications
            IList <ApplicationId> runningApps = new AList <ApplicationId>();

            runningApps.AddItem(ApplicationId.NewInstance(1, 0));
            nm1 = rm.RegisterNode("host2:5678", 15360, 2, runningApps);
            dispatcher.Await();
            response = nm1.NodeHeartbeat(true);
            dispatcher.Await();
            NUnit.Framework.Assert.IsTrue(NodeAction.Normal.Equals(response.GetNodeAction()));
            NUnit.Framework.Assert.AreEqual(5120 + 15360, metrics.GetAvailableMB());
            // reconnect healthy node changing http port
            nm1 = new MockNM("host1:1234", 5120, rm.GetResourceTrackerService());
            nm1.SetHttpPort(3);
            nm1.RegisterNode();
            dispatcher.Await();
            response = nm1.NodeHeartbeat(true);
            response = nm1.NodeHeartbeat(true);
            dispatcher.Await();
            RMNode rmNode = rm.GetRMContext().GetRMNodes()[nm1.GetNodeId()];

            NUnit.Framework.Assert.AreEqual(3, rmNode.GetHttpPort());
            NUnit.Framework.Assert.AreEqual(5120, rmNode.GetTotalCapability().GetMemory());
            NUnit.Framework.Assert.AreEqual(5120 + 15360, metrics.GetAvailableMB());
        }
        // Test does major 6 steps verification.
        // Step-1 : AMRMClient send allocate request for 2 container requests
        // Step-2 : 2 containers are allocated by RM.
        // Step-3 : AM Send 1 containerRequest(cRequest3) and 1 releaseRequests to
        // RM
        // Step-4 : On RM restart, AM(does not know RM is restarted) sends additional
        // containerRequest(cRequest4) and blacklisted nodes.
        // Intern RM send resync command
        // Step-5 : Allocater after resync command & new containerRequest(cRequest5)
        // Step-6 : RM allocates containers i.e cRequest3,cRequest4 and cRequest5
        /// <exception cref="System.Exception"/>
        public virtual void TestAMRMClientResendsRequestsOnRMRestart()
        {
            UserGroupInformation.SetLoginUser(null);
            MemoryRMStateStore memStore = new MemoryRMStateStore();

            memStore.Init(conf);
            // Phase-1 Start 1st RM
            TestAMRMClientOnRMRestart.MyResourceManager rm1 = new TestAMRMClientOnRMRestart.MyResourceManager
                                                                  (conf, memStore);
            rm1.Start();
            DrainDispatcher dispatcher = (DrainDispatcher)rm1.GetRMContext().GetDispatcher();
            // Submit the application
            RMApp app = rm1.SubmitApp(1024);

            dispatcher.Await();
            MockNM nm1 = new MockNM("h1:1234", 15120, rm1.GetResourceTrackerService());

            nm1.RegisterNode();
            nm1.NodeHeartbeat(true);
            // Node heartbeat
            dispatcher.Await();
            ApplicationAttemptId appAttemptId = app.GetCurrentAppAttempt().GetAppAttemptId();

            rm1.SendAMLaunched(appAttemptId);
            dispatcher.Await();
            Org.Apache.Hadoop.Security.Token.Token <AMRMTokenIdentifier> token = rm1.GetRMContext
                                                                                     ().GetRMApps()[appAttemptId.GetApplicationId()].GetRMAppAttempt(appAttemptId).GetAMRMToken
                                                                                     ();
            UserGroupInformation ugi = UserGroupInformation.GetCurrentUser();

            ugi.AddTokenIdentifier(token.DecodeIdentifier());
            // Step-1 : AMRMClient send allocate request for 2 ContainerRequest
            // cRequest1 = h1 and cRequest2 = h1,h2
            // blacklisted nodes = h2
            AMRMClient <AMRMClient.ContainerRequest> amClient = new TestAMRMClientOnRMRestart.MyAMRMClientImpl
                                                                    (rm1);

            amClient.Init(conf);
            amClient.Start();
            amClient.RegisterApplicationMaster("Host", 10000, string.Empty);
            AMRMClient.ContainerRequest cRequest1 = CreateReq(1, 1024, new string[] { "h1" });
            amClient.AddContainerRequest(cRequest1);
            AMRMClient.ContainerRequest cRequest2 = CreateReq(1, 1024, new string[] { "h1", "h2" });
            amClient.AddContainerRequest(cRequest2);
            IList <string> blacklistAdditions = new AList <string>();
            IList <string> blacklistRemoval   = new AList <string>();

            blacklistAdditions.AddItem("h2");
            blacklistRemoval.AddItem("h10");
            amClient.UpdateBlacklist(blacklistAdditions, blacklistRemoval);
            blacklistAdditions.Remove("h2");
            // remove from local list
            AllocateResponse allocateResponse = amClient.Allocate(0.1f);

            dispatcher.Await();
            NUnit.Framework.Assert.AreEqual("No of assignments must be 0", 0, allocateResponse
                                            .GetAllocatedContainers().Count);
            // Why 4 ask, why not 3 ask even h2 is blacklisted?
            // On blacklisting host,applicationmaster has to remove ask request from
            // remoterequest table.Here,test does not remove explicitely
            AssertAsksAndReleases(4, 0, rm1);
            AssertBlacklistAdditionsAndRemovals(1, 1, rm1);
            // Step-2 : NM heart beat is sent.
            // On 2nd AM allocate request, RM allocates 2 containers to AM
            nm1.NodeHeartbeat(true);
            // Node heartbeat
            dispatcher.Await();
            allocateResponse = amClient.Allocate(0.2f);
            dispatcher.Await();
            // 2 containers are allocated i.e for cRequest1 and cRequest2.
            NUnit.Framework.Assert.AreEqual("No of assignments must be 0", 2, allocateResponse
                                            .GetAllocatedContainers().Count);
            AssertAsksAndReleases(0, 0, rm1);
            AssertBlacklistAdditionsAndRemovals(0, 0, rm1);
            IList <Container> allocatedContainers = allocateResponse.GetAllocatedContainers();

            // removed allocated container requests
            amClient.RemoveContainerRequest(cRequest1);
            amClient.RemoveContainerRequest(cRequest2);
            allocateResponse = amClient.Allocate(0.2f);
            dispatcher.Await();
            NUnit.Framework.Assert.AreEqual("No of assignments must be 0", 0, allocateResponse
                                            .GetAllocatedContainers().Count);
            AssertAsksAndReleases(4, 0, rm1);
            AssertBlacklistAdditionsAndRemovals(0, 0, rm1);
            // Step-3 : Send 1 containerRequest and 1 releaseRequests to RM
            AMRMClient.ContainerRequest cRequest3 = CreateReq(1, 1024, new string[] { "h1" });
            amClient.AddContainerRequest(cRequest3);
            int pendingRelease         = 0;
            IEnumerator <Container> it = allocatedContainers.GetEnumerator();

            while (it.HasNext())
            {
                amClient.ReleaseAssignedContainer(it.Next().GetId());
                pendingRelease++;
                it.Remove();
                break;
            }
            // remove one container
            allocateResponse = amClient.Allocate(0.3f);
            dispatcher.Await();
            NUnit.Framework.Assert.AreEqual("No of assignments must be 0", 0, allocateResponse
                                            .GetAllocatedContainers().Count);
            AssertAsksAndReleases(3, pendingRelease, rm1);
            AssertBlacklistAdditionsAndRemovals(0, 0, rm1);
            int completedContainer = allocateResponse.GetCompletedContainersStatuses().Count;

            pendingRelease -= completedContainer;
            // Phase-2 start 2nd RM is up
            TestAMRMClientOnRMRestart.MyResourceManager rm2 = new TestAMRMClientOnRMRestart.MyResourceManager
                                                                  (conf, memStore);
            rm2.Start();
            nm1.SetResourceTrackerService(rm2.GetResourceTrackerService());
            ((TestAMRMClientOnRMRestart.MyAMRMClientImpl)amClient).UpdateRMProxy(rm2);
            dispatcher = (DrainDispatcher)rm2.GetRMContext().GetDispatcher();
            // NM should be rebooted on heartbeat, even first heartbeat for nm2
            NodeHeartbeatResponse hbResponse = nm1.NodeHeartbeat(true);

            NUnit.Framework.Assert.AreEqual(NodeAction.Resync, hbResponse.GetNodeAction());
            // new NM to represent NM re-register
            nm1 = new MockNM("h1:1234", 10240, rm2.GetResourceTrackerService());
            nm1.RegisterNode();
            nm1.NodeHeartbeat(true);
            dispatcher.Await();
            blacklistAdditions.AddItem("h3");
            amClient.UpdateBlacklist(blacklistAdditions, null);
            blacklistAdditions.Remove("h3");
            it = allocatedContainers.GetEnumerator();
            while (it.HasNext())
            {
                amClient.ReleaseAssignedContainer(it.Next().GetId());
                pendingRelease++;
                it.Remove();
            }
            AMRMClient.ContainerRequest cRequest4 = CreateReq(1, 1024, new string[] { "h1", "h2" });
            amClient.AddContainerRequest(cRequest4);
            // Step-4 : On RM restart, AM(does not know RM is restarted) sends
            // additional
            // containerRequest and blacklisted nodes.
            // Intern RM send resync command,AMRMClient resend allocate request
            allocateResponse = amClient.Allocate(0.3f);
            dispatcher.Await();
            completedContainer = allocateResponse.GetCompletedContainersStatuses().Count;
            pendingRelease    -= completedContainer;
            AssertAsksAndReleases(4, pendingRelease, rm2);
            AssertBlacklistAdditionsAndRemovals(2, 0, rm2);
            AMRMClient.ContainerRequest cRequest5 = CreateReq(1, 1024, new string[] { "h1", "h2"
                                                                                      , "h3" });
            amClient.AddContainerRequest(cRequest5);
            // Step-5 : Allocater after resync command
            allocateResponse = amClient.Allocate(0.5f);
            dispatcher.Await();
            NUnit.Framework.Assert.AreEqual("No of assignments must be 0", 0, allocateResponse
                                            .GetAllocatedContainers().Count);
            AssertAsksAndReleases(5, 0, rm2);
            AssertBlacklistAdditionsAndRemovals(0, 0, rm2);
            int noAssignedContainer = 0;
            int count = 5;

            while (count-- > 0)
            {
                nm1.NodeHeartbeat(true);
                dispatcher.Await();
                allocateResponse = amClient.Allocate(0.5f);
                dispatcher.Await();
                noAssignedContainer += allocateResponse.GetAllocatedContainers().Count;
                if (noAssignedContainer == 3)
                {
                    break;
                }
                Sharpen.Thread.Sleep(1000);
            }
            // Step-6 : RM allocates containers i.e cRequest3,cRequest4 and cRequest5
            NUnit.Framework.Assert.AreEqual("Number of container should be 3", 3, noAssignedContainer
                                            );
            amClient.Stop();
            rm1.Stop();
            rm2.Stop();
        }
        // Test verify for
        // 1. AM try to unregister without registering
        // 2. AM register to RM, and try to unregister immediately after RM restart
        /// <exception cref="System.Exception"/>
        public virtual void TestAMRMClientForUnregisterAMOnRMRestart()
        {
            MemoryRMStateStore memStore = new MemoryRMStateStore();

            memStore.Init(conf);
            // Phase-1 Start 1st RM
            TestAMRMClientOnRMRestart.MyResourceManager rm1 = new TestAMRMClientOnRMRestart.MyResourceManager
                                                                  (conf, memStore);
            rm1.Start();
            DrainDispatcher dispatcher = (DrainDispatcher)rm1.GetRMContext().GetDispatcher();
            // Submit the application
            RMApp app = rm1.SubmitApp(1024);

            dispatcher.Await();
            MockNM nm1 = new MockNM("h1:1234", 15120, rm1.GetResourceTrackerService());

            nm1.RegisterNode();
            nm1.NodeHeartbeat(true);
            // Node heartbeat
            dispatcher.Await();
            ApplicationAttemptId appAttemptId = app.GetCurrentAppAttempt().GetAppAttemptId();

            rm1.SendAMLaunched(appAttemptId);
            dispatcher.Await();
            Org.Apache.Hadoop.Security.Token.Token <AMRMTokenIdentifier> token = rm1.GetRMContext
                                                                                     ().GetRMApps()[appAttemptId.GetApplicationId()].GetRMAppAttempt(appAttemptId).GetAMRMToken
                                                                                     ();
            UserGroupInformation ugi = UserGroupInformation.GetCurrentUser();

            ugi.AddTokenIdentifier(token.DecodeIdentifier());
            AMRMClient <AMRMClient.ContainerRequest> amClient = new TestAMRMClientOnRMRestart.MyAMRMClientImpl
                                                                    (rm1);

            amClient.Init(conf);
            amClient.Start();
            amClient.RegisterApplicationMaster("h1", 10000, string.Empty);
            amClient.Allocate(0.1f);
            // Phase-2 start 2nd RM is up
            TestAMRMClientOnRMRestart.MyResourceManager rm2 = new TestAMRMClientOnRMRestart.MyResourceManager
                                                                  (conf, memStore);
            rm2.Start();
            nm1.SetResourceTrackerService(rm2.GetResourceTrackerService());
            ((TestAMRMClientOnRMRestart.MyAMRMClientImpl)amClient).UpdateRMProxy(rm2);
            dispatcher = (DrainDispatcher)rm2.GetRMContext().GetDispatcher();
            // NM should be rebooted on heartbeat, even first heartbeat for nm2
            NodeHeartbeatResponse hbResponse = nm1.NodeHeartbeat(true);

            NUnit.Framework.Assert.AreEqual(NodeAction.Resync, hbResponse.GetNodeAction());
            // new NM to represent NM re-register
            nm1 = new MockNM("h1:1234", 10240, rm2.GetResourceTrackerService());
            ContainerId       containerId     = ContainerId.NewContainerId(appAttemptId, 1);
            NMContainerStatus containerReport = NMContainerStatus.NewInstance(containerId, ContainerState
                                                                              .Running, Resource.NewInstance(1024, 1), "recover container", 0, Priority.NewInstance
                                                                                  (0), 0);

            nm1.RegisterNode(Arrays.AsList(containerReport), null);
            nm1.NodeHeartbeat(true);
            dispatcher.Await();
            amClient.UnregisterApplicationMaster(FinalApplicationStatus.Succeeded, null, null
                                                 );
            rm2.WaitForState(appAttemptId, RMAppAttemptState.Finishing);
            nm1.NodeHeartbeat(appAttemptId, 1, ContainerState.Complete);
            rm2.WaitForState(appAttemptId, RMAppAttemptState.Finished);
            rm2.WaitForState(app.GetApplicationId(), RMAppState.Finished);
            amClient.Stop();
            rm1.Stop();
            rm2.Stop();
        }