// Test does major 6 steps verification. // Step-1 : AMRMClient send allocate request for 2 container requests // Step-2 : 2 containers are allocated by RM. // Step-3 : AM Send 1 containerRequest(cRequest3) and 1 releaseRequests to // RM // Step-4 : On RM restart, AM(does not know RM is restarted) sends additional // containerRequest(cRequest4) and blacklisted nodes. // Intern RM send resync command // Step-5 : Allocater after resync command & new containerRequest(cRequest5) // Step-6 : RM allocates containers i.e cRequest3,cRequest4 and cRequest5 /// <exception cref="System.Exception"/> public virtual void TestAMRMClientResendsRequestsOnRMRestart() { UserGroupInformation.SetLoginUser(null); MemoryRMStateStore memStore = new MemoryRMStateStore(); memStore.Init(conf); // Phase-1 Start 1st RM TestAMRMClientOnRMRestart.MyResourceManager rm1 = new TestAMRMClientOnRMRestart.MyResourceManager (conf, memStore); rm1.Start(); DrainDispatcher dispatcher = (DrainDispatcher)rm1.GetRMContext().GetDispatcher(); // Submit the application RMApp app = rm1.SubmitApp(1024); dispatcher.Await(); MockNM nm1 = new MockNM("h1:1234", 15120, rm1.GetResourceTrackerService()); nm1.RegisterNode(); nm1.NodeHeartbeat(true); // Node heartbeat dispatcher.Await(); ApplicationAttemptId appAttemptId = app.GetCurrentAppAttempt().GetAppAttemptId(); rm1.SendAMLaunched(appAttemptId); dispatcher.Await(); Org.Apache.Hadoop.Security.Token.Token <AMRMTokenIdentifier> token = rm1.GetRMContext ().GetRMApps()[appAttemptId.GetApplicationId()].GetRMAppAttempt(appAttemptId).GetAMRMToken (); UserGroupInformation ugi = UserGroupInformation.GetCurrentUser(); ugi.AddTokenIdentifier(token.DecodeIdentifier()); // Step-1 : AMRMClient send allocate request for 2 ContainerRequest // cRequest1 = h1 and cRequest2 = h1,h2 // blacklisted nodes = h2 AMRMClient <AMRMClient.ContainerRequest> amClient = new TestAMRMClientOnRMRestart.MyAMRMClientImpl (rm1); amClient.Init(conf); amClient.Start(); amClient.RegisterApplicationMaster("Host", 10000, string.Empty); AMRMClient.ContainerRequest cRequest1 = CreateReq(1, 1024, new string[] { "h1" }); amClient.AddContainerRequest(cRequest1); AMRMClient.ContainerRequest cRequest2 = CreateReq(1, 1024, new string[] { "h1", "h2" }); amClient.AddContainerRequest(cRequest2); IList <string> blacklistAdditions = new AList <string>(); IList <string> blacklistRemoval = new AList <string>(); blacklistAdditions.AddItem("h2"); blacklistRemoval.AddItem("h10"); amClient.UpdateBlacklist(blacklistAdditions, blacklistRemoval); blacklistAdditions.Remove("h2"); // remove from local list AllocateResponse allocateResponse = amClient.Allocate(0.1f); dispatcher.Await(); NUnit.Framework.Assert.AreEqual("No of assignments must be 0", 0, allocateResponse .GetAllocatedContainers().Count); // Why 4 ask, why not 3 ask even h2 is blacklisted? // On blacklisting host,applicationmaster has to remove ask request from // remoterequest table.Here,test does not remove explicitely AssertAsksAndReleases(4, 0, rm1); AssertBlacklistAdditionsAndRemovals(1, 1, rm1); // Step-2 : NM heart beat is sent. // On 2nd AM allocate request, RM allocates 2 containers to AM nm1.NodeHeartbeat(true); // Node heartbeat dispatcher.Await(); allocateResponse = amClient.Allocate(0.2f); dispatcher.Await(); // 2 containers are allocated i.e for cRequest1 and cRequest2. NUnit.Framework.Assert.AreEqual("No of assignments must be 0", 2, allocateResponse .GetAllocatedContainers().Count); AssertAsksAndReleases(0, 0, rm1); AssertBlacklistAdditionsAndRemovals(0, 0, rm1); IList <Container> allocatedContainers = allocateResponse.GetAllocatedContainers(); // removed allocated container requests amClient.RemoveContainerRequest(cRequest1); amClient.RemoveContainerRequest(cRequest2); allocateResponse = amClient.Allocate(0.2f); dispatcher.Await(); NUnit.Framework.Assert.AreEqual("No of assignments must be 0", 0, allocateResponse .GetAllocatedContainers().Count); AssertAsksAndReleases(4, 0, rm1); AssertBlacklistAdditionsAndRemovals(0, 0, rm1); // Step-3 : Send 1 containerRequest and 1 releaseRequests to RM AMRMClient.ContainerRequest cRequest3 = CreateReq(1, 1024, new string[] { "h1" }); amClient.AddContainerRequest(cRequest3); int pendingRelease = 0; IEnumerator <Container> it = allocatedContainers.GetEnumerator(); while (it.HasNext()) { amClient.ReleaseAssignedContainer(it.Next().GetId()); pendingRelease++; it.Remove(); break; } // remove one container allocateResponse = amClient.Allocate(0.3f); dispatcher.Await(); NUnit.Framework.Assert.AreEqual("No of assignments must be 0", 0, allocateResponse .GetAllocatedContainers().Count); AssertAsksAndReleases(3, pendingRelease, rm1); AssertBlacklistAdditionsAndRemovals(0, 0, rm1); int completedContainer = allocateResponse.GetCompletedContainersStatuses().Count; pendingRelease -= completedContainer; // Phase-2 start 2nd RM is up TestAMRMClientOnRMRestart.MyResourceManager rm2 = new TestAMRMClientOnRMRestart.MyResourceManager (conf, memStore); rm2.Start(); nm1.SetResourceTrackerService(rm2.GetResourceTrackerService()); ((TestAMRMClientOnRMRestart.MyAMRMClientImpl)amClient).UpdateRMProxy(rm2); dispatcher = (DrainDispatcher)rm2.GetRMContext().GetDispatcher(); // NM should be rebooted on heartbeat, even first heartbeat for nm2 NodeHeartbeatResponse hbResponse = nm1.NodeHeartbeat(true); NUnit.Framework.Assert.AreEqual(NodeAction.Resync, hbResponse.GetNodeAction()); // new NM to represent NM re-register nm1 = new MockNM("h1:1234", 10240, rm2.GetResourceTrackerService()); nm1.RegisterNode(); nm1.NodeHeartbeat(true); dispatcher.Await(); blacklistAdditions.AddItem("h3"); amClient.UpdateBlacklist(blacklistAdditions, null); blacklistAdditions.Remove("h3"); it = allocatedContainers.GetEnumerator(); while (it.HasNext()) { amClient.ReleaseAssignedContainer(it.Next().GetId()); pendingRelease++; it.Remove(); } AMRMClient.ContainerRequest cRequest4 = CreateReq(1, 1024, new string[] { "h1", "h2" }); amClient.AddContainerRequest(cRequest4); // Step-4 : On RM restart, AM(does not know RM is restarted) sends // additional // containerRequest and blacklisted nodes. // Intern RM send resync command,AMRMClient resend allocate request allocateResponse = amClient.Allocate(0.3f); dispatcher.Await(); completedContainer = allocateResponse.GetCompletedContainersStatuses().Count; pendingRelease -= completedContainer; AssertAsksAndReleases(4, pendingRelease, rm2); AssertBlacklistAdditionsAndRemovals(2, 0, rm2); AMRMClient.ContainerRequest cRequest5 = CreateReq(1, 1024, new string[] { "h1", "h2" , "h3" }); amClient.AddContainerRequest(cRequest5); // Step-5 : Allocater after resync command allocateResponse = amClient.Allocate(0.5f); dispatcher.Await(); NUnit.Framework.Assert.AreEqual("No of assignments must be 0", 0, allocateResponse .GetAllocatedContainers().Count); AssertAsksAndReleases(5, 0, rm2); AssertBlacklistAdditionsAndRemovals(0, 0, rm2); int noAssignedContainer = 0; int count = 5; while (count-- > 0) { nm1.NodeHeartbeat(true); dispatcher.Await(); allocateResponse = amClient.Allocate(0.5f); dispatcher.Await(); noAssignedContainer += allocateResponse.GetAllocatedContainers().Count; if (noAssignedContainer == 3) { break; } Sharpen.Thread.Sleep(1000); } // Step-6 : RM allocates containers i.e cRequest3,cRequest4 and cRequest5 NUnit.Framework.Assert.AreEqual("Number of container should be 3", 3, noAssignedContainer ); amClient.Stop(); rm1.Stop(); rm2.Stop(); }
// Test verify for // 1. AM try to unregister without registering // 2. AM register to RM, and try to unregister immediately after RM restart /// <exception cref="System.Exception"/> public virtual void TestAMRMClientForUnregisterAMOnRMRestart() { MemoryRMStateStore memStore = new MemoryRMStateStore(); memStore.Init(conf); // Phase-1 Start 1st RM TestAMRMClientOnRMRestart.MyResourceManager rm1 = new TestAMRMClientOnRMRestart.MyResourceManager (conf, memStore); rm1.Start(); DrainDispatcher dispatcher = (DrainDispatcher)rm1.GetRMContext().GetDispatcher(); // Submit the application RMApp app = rm1.SubmitApp(1024); dispatcher.Await(); MockNM nm1 = new MockNM("h1:1234", 15120, rm1.GetResourceTrackerService()); nm1.RegisterNode(); nm1.NodeHeartbeat(true); // Node heartbeat dispatcher.Await(); ApplicationAttemptId appAttemptId = app.GetCurrentAppAttempt().GetAppAttemptId(); rm1.SendAMLaunched(appAttemptId); dispatcher.Await(); Org.Apache.Hadoop.Security.Token.Token <AMRMTokenIdentifier> token = rm1.GetRMContext ().GetRMApps()[appAttemptId.GetApplicationId()].GetRMAppAttempt(appAttemptId).GetAMRMToken (); UserGroupInformation ugi = UserGroupInformation.GetCurrentUser(); ugi.AddTokenIdentifier(token.DecodeIdentifier()); AMRMClient <AMRMClient.ContainerRequest> amClient = new TestAMRMClientOnRMRestart.MyAMRMClientImpl (rm1); amClient.Init(conf); amClient.Start(); amClient.RegisterApplicationMaster("h1", 10000, string.Empty); amClient.Allocate(0.1f); // Phase-2 start 2nd RM is up TestAMRMClientOnRMRestart.MyResourceManager rm2 = new TestAMRMClientOnRMRestart.MyResourceManager (conf, memStore); rm2.Start(); nm1.SetResourceTrackerService(rm2.GetResourceTrackerService()); ((TestAMRMClientOnRMRestart.MyAMRMClientImpl)amClient).UpdateRMProxy(rm2); dispatcher = (DrainDispatcher)rm2.GetRMContext().GetDispatcher(); // NM should be rebooted on heartbeat, even first heartbeat for nm2 NodeHeartbeatResponse hbResponse = nm1.NodeHeartbeat(true); NUnit.Framework.Assert.AreEqual(NodeAction.Resync, hbResponse.GetNodeAction()); // new NM to represent NM re-register nm1 = new MockNM("h1:1234", 10240, rm2.GetResourceTrackerService()); ContainerId containerId = ContainerId.NewContainerId(appAttemptId, 1); NMContainerStatus containerReport = NMContainerStatus.NewInstance(containerId, ContainerState .Running, Resource.NewInstance(1024, 1), "recover container", 0, Priority.NewInstance (0), 0); nm1.RegisterNode(Arrays.AsList(containerReport), null); nm1.NodeHeartbeat(true); dispatcher.Await(); amClient.UnregisterApplicationMaster(FinalApplicationStatus.Succeeded, null, null ); rm2.WaitForState(appAttemptId, RMAppAttemptState.Finishing); nm1.NodeHeartbeat(appAttemptId, 1, ContainerState.Complete); rm2.WaitForState(appAttemptId, RMAppAttemptState.Finished); rm2.WaitForState(app.GetApplicationId(), RMAppState.Finished); amClient.Stop(); rm1.Stop(); rm2.Stop(); }