/// <exception cref="System.Exception"/> public virtual void TestInvalidContainerReleaseRequest() { MockRM rm = new MockRM(conf); try { rm.Start(); // Register node1 MockNM nm1 = rm.RegisterNode("127.0.0.1:1234", 6 * Gb); // Submit an application RMApp app1 = rm.SubmitApp(1024); // kick the scheduling nm1.NodeHeartbeat(true); RMAppAttempt attempt1 = app1.GetCurrentAppAttempt(); MockAM am1 = rm.SendAMLaunched(attempt1.GetAppAttemptId()); am1.RegisterAppAttempt(); am1.AddRequests(new string[] { "127.0.0.1" }, Gb, 1, 1); AllocateResponse alloc1Response = am1.Schedule(); // send the request // kick the scheduler nm1.NodeHeartbeat(true); while (alloc1Response.GetAllocatedContainers().Count < 1) { Log.Info("Waiting for containers to be created for app 1..."); Sharpen.Thread.Sleep(1000); alloc1Response = am1.Schedule(); } NUnit.Framework.Assert.IsTrue(alloc1Response.GetAllocatedContainers().Count > 0); RMApp app2 = rm.SubmitApp(1024); nm1.NodeHeartbeat(true); RMAppAttempt attempt2 = app2.GetCurrentAppAttempt(); MockAM am2 = rm.SendAMLaunched(attempt2.GetAppAttemptId()); am2.RegisterAppAttempt(); // Now trying to release container allocated for app1 -> appAttempt1. ContainerId cId = alloc1Response.GetAllocatedContainers()[0].GetId(); am2.AddContainerToBeReleased(cId); try { am2.Schedule(); NUnit.Framework.Assert.Fail("Exception was expected!!"); } catch (InvalidContainerReleaseException e) { StringBuilder sb = new StringBuilder("Cannot release container : "); sb.Append(cId.ToString()); sb.Append(" not belonging to this application attempt : "); sb.Append(attempt2.GetAppAttemptId().ToString()); NUnit.Framework.Assert.IsTrue(e.Message.Contains(sb.ToString())); } } finally { if (rm != null) { rm.Stop(); } } }
/// <exception cref="System.Exception"/> public virtual void TestAppCleanupWhenRMRestartedBeforeAppFinished() { conf.SetInt(YarnConfiguration.RmAmMaxAttempts, 1); MemoryRMStateStore memStore = new MemoryRMStateStore(); memStore.Init(conf); // start RM MockRM rm1 = new MockRM(conf, memStore); rm1.Start(); MockNM nm1 = new MockNM("127.0.0.1:1234", 1024, rm1.GetResourceTrackerService()); nm1.RegisterNode(); MockNM nm2 = new MockNM("127.0.0.1:5678", 1024, rm1.GetResourceTrackerService()); nm2.RegisterNode(); // create app and launch the AM RMApp app0 = rm1.SubmitApp(200); MockAM am0 = LaunchAM(app0, rm1, nm1); // alloc another container on nm2 AllocateResponse allocResponse = am0.Allocate(Arrays.AsList(ResourceRequest.NewInstance (Priority.NewInstance(1), "*", Resource.NewInstance(1024, 0), 1)), null); while (null == allocResponse.GetAllocatedContainers() || allocResponse.GetAllocatedContainers ().IsEmpty()) { nm2.NodeHeartbeat(true); allocResponse = am0.Allocate(null, null); Sharpen.Thread.Sleep(1000); } // start new RM MockRM rm2 = new MockRM(conf, memStore); rm2.Start(); // nm1/nm2 register to rm2, and do a heartbeat nm1.SetResourceTrackerService(rm2.GetResourceTrackerService()); nm1.RegisterNode(Arrays.AsList(NMContainerStatus.NewInstance(ContainerId.NewContainerId (am0.GetApplicationAttemptId(), 1), ContainerState.Complete, Resource.NewInstance (1024, 1), string.Empty, 0, Priority.NewInstance(0), 1234)), Arrays.AsList(app0. GetApplicationId())); nm2.SetResourceTrackerService(rm2.GetResourceTrackerService()); nm2.RegisterNode(Arrays.AsList(app0.GetApplicationId())); // assert app state has been saved. rm2.WaitForState(app0.GetApplicationId(), RMAppState.Failed); // wait for application cleanup message received on NM1 WaitForAppCleanupMessageRecved(nm1, app0.GetApplicationId()); // wait for application cleanup message received on NM2 WaitForAppCleanupMessageRecved(nm2, app0.GetApplicationId()); rm1.Stop(); rm2.Stop(); }
/// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> /// <exception cref="System.IO.IOException"/> private ICollection <Container> AllocateContainers(AMRMClientImpl <AMRMClient.ContainerRequest > rmClient, int num) { // setup container request Resource capability = Resource.NewInstance(1024, 0); Priority priority = Priority.NewInstance(0); string node = nodeReports[0].GetNodeId().GetHost(); string rack = nodeReports[0].GetRackName(); string[] nodes = new string[] { node }; string[] racks = new string[] { rack }; for (int i = 0; i < num; ++i) { rmClient.AddContainerRequest(new AMRMClient.ContainerRequest(capability, nodes, racks , priority)); } int containersRequestedAny = rmClient.remoteRequestsTable[priority][ResourceRequest .Any][capability].remoteRequest.GetNumContainers(); // RM should allocate container within 2 calls to allocate() int allocatedContainerCount = 0; int iterationsLeft = 2; ICollection <Container> containers = new TreeSet <Container>(); while (allocatedContainerCount < containersRequestedAny && iterationsLeft > 0) { AllocateResponse allocResponse = rmClient.Allocate(0.1f); allocatedContainerCount += allocResponse.GetAllocatedContainers().Count; foreach (Container container in allocResponse.GetAllocatedContainers()) { containers.AddItem(container); } if (!allocResponse.GetNMTokens().IsEmpty()) { foreach (NMToken token in allocResponse.GetNMTokens()) { rmClient.GetNMTokenCache().SetToken(token.GetNodeId().ToString(), token.GetToken( )); } } if (allocatedContainerCount < containersRequestedAny) { // sleep to let NM's heartbeat to RM and trigger allocations Sleep(1000); } --iterationsLeft; } return(containers); }
// Test even if AM container is allocated with containerId not equal to 1, the // following allocate requests from AM should be able to retrieve the // corresponding NM Token. /// <exception cref="System.Exception"/> public virtual void TestNMTokenSentForNormalContainer() { conf.Set(YarnConfiguration.RmScheduler, typeof(CapacityScheduler).GetCanonicalName ()); MockRM rm = new MockRM(conf); rm.Start(); MockNM nm1 = rm.RegisterNode("h1:1234", 5120); RMApp app = rm.SubmitApp(2000); RMAppAttempt attempt = app.GetCurrentAppAttempt(); // Call getNewContainerId to increase container Id so that the AM container // Id doesn't equal to one. CapacityScheduler cs = (CapacityScheduler)rm.GetResourceScheduler(); cs.GetApplicationAttempt(attempt.GetAppAttemptId()).GetNewContainerId(); // kick the scheduling nm1.NodeHeartbeat(true); MockAM am = MockRM.LaunchAM(app, rm, nm1); // am container Id not equal to 1. NUnit.Framework.Assert.IsTrue(attempt.GetMasterContainer().GetId().GetContainerId () != 1); // NMSecretManager doesn't record the node on which the am is allocated. NUnit.Framework.Assert.IsFalse(rm.GetRMContext().GetNMTokenSecretManager().IsApplicationAttemptNMTokenPresent (attempt.GetAppAttemptId(), nm1.GetNodeId())); am.RegisterAppAttempt(); rm.WaitForState(app.GetApplicationId(), RMAppState.Running); int NumContainers = 1; IList <Container> containers = new AList <Container>(); // nmTokens keeps track of all the nmTokens issued in the allocate call. IList <NMToken> expectedNMTokens = new AList <NMToken>(); // am1 allocate 1 container on nm1. while (true) { AllocateResponse response = am.Allocate("127.0.0.1", 2000, NumContainers, new AList <ContainerId>()); nm1.NodeHeartbeat(true); Sharpen.Collections.AddAll(containers, response.GetAllocatedContainers()); Sharpen.Collections.AddAll(expectedNMTokens, response.GetNMTokens()); if (containers.Count == NumContainers) { break; } Sharpen.Thread.Sleep(200); System.Console.Out.WriteLine("Waiting for container to be allocated."); } NodeId nodeId = expectedNMTokens[0].GetNodeId(); // NMToken is sent for the allocated container. NUnit.Framework.Assert.AreEqual(nm1.GetNodeId(), nodeId); }
/// <exception cref="System.Exception"/> public virtual void TestRMIdentifierOnContainerAllocation() { MockRM rm = new MockRM(conf); rm.Start(); // Register node1 MockNM nm1 = rm.RegisterNode("127.0.0.1:1234", 6 * Gb); // Submit an application RMApp app1 = rm.SubmitApp(2048); // kick the scheduling nm1.NodeHeartbeat(true); RMAppAttempt attempt1 = app1.GetCurrentAppAttempt(); MockAM am1 = rm.SendAMLaunched(attempt1.GetAppAttemptId()); am1.RegisterAppAttempt(); am1.AddRequests(new string[] { "127.0.0.1" }, Gb, 1, 1); AllocateResponse alloc1Response = am1.Schedule(); // send the request // kick the scheduler nm1.NodeHeartbeat(true); while (alloc1Response.GetAllocatedContainers().Count < 1) { Log.Info("Waiting for containers to be created for app 1..."); Sharpen.Thread.Sleep(1000); alloc1Response = am1.Schedule(); } // assert RMIdentifer is set properly in allocated containers Container allocatedContainer = alloc1Response.GetAllocatedContainers()[0]; ContainerTokenIdentifier tokenId = BuilderUtils.NewContainerTokenIdentifier(allocatedContainer .GetContainerToken()); NUnit.Framework.Assert.AreEqual(MockRM.GetClusterTimeStamp(), tokenId.GetRMIdentifier ()); rm.Stop(); }
/// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> /// <exception cref="System.IO.IOException"/> protected internal virtual AllocateResponse MakeRemoteRequest() { ApplyRequestLimits(); ResourceBlacklistRequest blacklistRequest = ResourceBlacklistRequest.NewInstance( new AList <string>(blacklistAdditions), new AList <string>(blacklistRemovals)); AllocateRequest allocateRequest = AllocateRequest.NewInstance(lastResponseID, base .GetApplicationProgress(), new AList <ResourceRequest>(ask), new AList <ContainerId >(release), blacklistRequest); AllocateResponse allocateResponse = scheduler.Allocate(allocateRequest); lastResponseID = allocateResponse.GetResponseId(); availableResources = allocateResponse.GetAvailableResources(); lastClusterNmCount = clusterNmCount; clusterNmCount = allocateResponse.GetNumClusterNodes(); int numCompletedContainers = allocateResponse.GetCompletedContainersStatuses().Count; if (ask.Count > 0 || release.Count > 0) { Log.Info("getResources() for " + applicationId + ":" + " ask=" + ask.Count + " release= " + release.Count + " newContainers=" + allocateResponse.GetAllocatedContainers() .Count + " finishedContainers=" + numCompletedContainers + " resourcelimit=" + availableResources + " knownNMs=" + clusterNmCount); } ask.Clear(); release.Clear(); if (numCompletedContainers > 0) { // re-send limited requests when a container completes to trigger asking // for more containers Sharpen.Collections.AddAll(requestLimitsToUpdate, requestLimits.Keys); } if (blacklistAdditions.Count > 0 || blacklistRemovals.Count > 0) { Log.Info("Update the blacklist for " + applicationId + ": blacklistAdditions=" + blacklistAdditions.Count + " blacklistRemovals=" + blacklistRemovals.Count); } blacklistAdditions.Clear(); blacklistRemovals.Clear(); return(allocateResponse); }
/// <exception cref="System.Exception"/> public virtual void TestNMTokensRebindOnAMRestart() { YarnConfiguration conf = new YarnConfiguration(); conf.SetInt(YarnConfiguration.RmAmMaxAttempts, 3); MockRM rm1 = new MockRM(conf); rm1.Start(); RMApp app1 = rm1.SubmitApp(200, "myname", "myuser", new Dictionary <ApplicationAccessType , string>(), false, "default", -1, null, "MAPREDUCE", false, true); MockNM nm1 = new MockNM("127.0.0.1:1234", 8000, rm1.GetResourceTrackerService()); nm1.RegisterNode(); MockNM nm2 = new MockNM("127.1.1.1:4321", 8000, rm1.GetResourceTrackerService()); nm2.RegisterNode(); MockAM am1 = MockRM.LaunchAndRegisterAM(app1, rm1, nm1); IList <Container> containers = new AList <Container>(); // nmTokens keeps track of all the nmTokens issued in the allocate call. IList <NMToken> expectedNMTokens = new AList <NMToken>(); // am1 allocate 2 container on nm1. // first container while (true) { AllocateResponse response = am1.Allocate("127.0.0.1", 2000, 2, new AList <ContainerId >()); nm1.NodeHeartbeat(true); Sharpen.Collections.AddAll(containers, response.GetAllocatedContainers()); Sharpen.Collections.AddAll(expectedNMTokens, response.GetNMTokens()); if (containers.Count == 2) { break; } Sharpen.Thread.Sleep(200); System.Console.Out.WriteLine("Waiting for container to be allocated."); } // launch the container-2 nm1.NodeHeartbeat(am1.GetApplicationAttemptId(), 2, ContainerState.Running); ContainerId containerId2 = ContainerId.NewContainerId(am1.GetApplicationAttemptId (), 2); rm1.WaitForState(nm1, containerId2, RMContainerState.Running); // launch the container-3 nm1.NodeHeartbeat(am1.GetApplicationAttemptId(), 3, ContainerState.Running); ContainerId containerId3 = ContainerId.NewContainerId(am1.GetApplicationAttemptId (), 3); rm1.WaitForState(nm1, containerId3, RMContainerState.Running); // fail am1 nm1.NodeHeartbeat(am1.GetApplicationAttemptId(), 1, ContainerState.Complete); am1.WaitForState(RMAppAttemptState.Failed); rm1.WaitForState(app1.GetApplicationId(), RMAppState.Accepted); // restart the am MockAM am2 = MockRM.LaunchAM(app1, rm1, nm1); RegisterApplicationMasterResponse registerResponse = am2.RegisterAppAttempt(); rm1.WaitForState(app1.GetApplicationId(), RMAppState.Running); // check am2 get the nm token from am1. NUnit.Framework.Assert.AreEqual(expectedNMTokens, registerResponse.GetNMTokensFromPreviousAttempts ()); // am2 allocate 1 container on nm2 containers = new AList <Container>(); while (true) { AllocateResponse allocateResponse = am2.Allocate("127.1.1.1", 4000, 1, new AList < ContainerId>()); nm2.NodeHeartbeat(true); Sharpen.Collections.AddAll(containers, allocateResponse.GetAllocatedContainers()); Sharpen.Collections.AddAll(expectedNMTokens, allocateResponse.GetNMTokens()); if (containers.Count == 1) { break; } Sharpen.Thread.Sleep(200); System.Console.Out.WriteLine("Waiting for container to be allocated."); } nm1.NodeHeartbeat(am2.GetApplicationAttemptId(), 2, ContainerState.Running); ContainerId am2ContainerId2 = ContainerId.NewContainerId(am2.GetApplicationAttemptId (), 2); rm1.WaitForState(nm1, am2ContainerId2, RMContainerState.Running); // fail am2. nm1.NodeHeartbeat(am2.GetApplicationAttemptId(), 1, ContainerState.Complete); am2.WaitForState(RMAppAttemptState.Failed); rm1.WaitForState(app1.GetApplicationId(), RMAppState.Accepted); // restart am MockAM am3 = MockRM.LaunchAM(app1, rm1, nm1); registerResponse = am3.RegisterAppAttempt(); rm1.WaitForState(app1.GetApplicationId(), RMAppState.Running); // check am3 get the NM token from both am1 and am2; IList <NMToken> transferredTokens = registerResponse.GetNMTokensFromPreviousAttempts (); NUnit.Framework.Assert.AreEqual(2, transferredTokens.Count); NUnit.Framework.Assert.IsTrue(transferredTokens.ContainsAll(expectedNMTokens)); rm1.Stop(); }
/// <exception cref="System.Exception"/> public virtual void TestExcessReservationThanNodeManagerCapacity() { MockRM rm = new MockRM(conf); rm.Start(); // Register node1 MockNM nm1 = rm.RegisterNode("127.0.0.1:1234", 2 * Gb, 4); MockNM nm2 = rm.RegisterNode("127.0.0.1:2234", 3 * Gb, 4); nm1.NodeHeartbeat(true); nm2.NodeHeartbeat(true); // wait.. int waitCount = 20; int size = rm.GetRMContext().GetRMNodes().Count; while ((size = rm.GetRMContext().GetRMNodes().Count) != 2 && waitCount-- > 0) { Log.Info("Waiting for node managers to register : " + size); Sharpen.Thread.Sleep(100); } NUnit.Framework.Assert.AreEqual(2, rm.GetRMContext().GetRMNodes().Count); // Submit an application RMApp app1 = rm.SubmitApp(128); // kick the scheduling nm1.NodeHeartbeat(true); RMAppAttempt attempt1 = app1.GetCurrentAppAttempt(); MockAM am1 = rm.SendAMLaunched(attempt1.GetAppAttemptId()); am1.RegisterAppAttempt(); Log.Info("sending container requests "); am1.AddRequests(new string[] { "*" }, 2 * Gb, 1, 1); AllocateResponse alloc1Response = am1.Schedule(); // send the request // kick the scheduler nm1.NodeHeartbeat(true); int waitCounter = 20; Log.Info("heartbeating nm1"); while (alloc1Response.GetAllocatedContainers().Count < 1 && waitCounter-- > 0) { Log.Info("Waiting for containers to be created for app 1..."); Sharpen.Thread.Sleep(500); alloc1Response = am1.Schedule(); } Log.Info("received container : " + alloc1Response.GetAllocatedContainers().Count); // No container should be allocated. // Internally it should not been reserved. NUnit.Framework.Assert.IsTrue(alloc1Response.GetAllocatedContainers().Count == 0); Log.Info("heartbeating nm2"); waitCounter = 20; nm2.NodeHeartbeat(true); while (alloc1Response.GetAllocatedContainers().Count < 1 && waitCounter-- > 0) { Log.Info("Waiting for containers to be created for app 1..."); Sharpen.Thread.Sleep(500); alloc1Response = am1.Schedule(); } Log.Info("received container : " + alloc1Response.GetAllocatedContainers().Count); NUnit.Framework.Assert.IsTrue(alloc1Response.GetAllocatedContainers().Count == 1); rm.Stop(); }
public virtual void TestResourceOverCommit() { MockRM rm = new MockRM(conf); rm.Start(); MockNM nm1 = rm.RegisterNode("127.0.0.1:1234", 4 * Gb); RMApp app1 = rm.SubmitApp(2048); // kick the scheduling, 2 GB given to AM1, remaining 2GB on nm1 nm1.NodeHeartbeat(true); RMAppAttempt attempt1 = app1.GetCurrentAppAttempt(); MockAM am1 = rm.SendAMLaunched(attempt1.GetAppAttemptId()); am1.RegisterAppAttempt(); SchedulerNodeReport report_nm1 = rm.GetResourceScheduler().GetNodeReport(nm1.GetNodeId ()); // check node report, 2 GB used and 2 GB available NUnit.Framework.Assert.AreEqual(2 * Gb, report_nm1.GetUsedResource().GetMemory()); NUnit.Framework.Assert.AreEqual(2 * Gb, report_nm1.GetAvailableResource().GetMemory ()); // add request for containers am1.AddRequests(new string[] { "127.0.0.1", "127.0.0.2" }, 2 * Gb, 1, 1); AllocateResponse alloc1Response = am1.Schedule(); // send the request // kick the scheduler, 2 GB given to AM1, resource remaining 0 nm1.NodeHeartbeat(true); while (alloc1Response.GetAllocatedContainers().Count < 1) { Log.Info("Waiting for containers to be created for app 1..."); Sharpen.Thread.Sleep(1000); alloc1Response = am1.Schedule(); } IList <Container> allocated1 = alloc1Response.GetAllocatedContainers(); NUnit.Framework.Assert.AreEqual(1, allocated1.Count); NUnit.Framework.Assert.AreEqual(2 * Gb, allocated1[0].GetResource().GetMemory()); NUnit.Framework.Assert.AreEqual(nm1.GetNodeId(), allocated1[0].GetNodeId()); report_nm1 = rm.GetResourceScheduler().GetNodeReport(nm1.GetNodeId()); // check node report, 4 GB used and 0 GB available NUnit.Framework.Assert.AreEqual(0, report_nm1.GetAvailableResource().GetMemory()); NUnit.Framework.Assert.AreEqual(4 * Gb, report_nm1.GetUsedResource().GetMemory()); // check container is assigned with 2 GB. Container c1 = allocated1[0]; NUnit.Framework.Assert.AreEqual(2 * Gb, c1.GetResource().GetMemory()); // update node resource to 2 GB, so resource is over-consumed. IDictionary <NodeId, ResourceOption> nodeResourceMap = new Dictionary <NodeId, ResourceOption >(); nodeResourceMap[nm1.GetNodeId()] = ResourceOption.NewInstance(Org.Apache.Hadoop.Yarn.Api.Records.Resource .NewInstance(2 * Gb, 1), -1); UpdateNodeResourceRequest request = UpdateNodeResourceRequest.NewInstance(nodeResourceMap ); AdminService @as = rm.adminService; @as.UpdateNodeResource(request); // Now, the used resource is still 4 GB, and available resource is minus value. report_nm1 = rm.GetResourceScheduler().GetNodeReport(nm1.GetNodeId()); NUnit.Framework.Assert.AreEqual(4 * Gb, report_nm1.GetUsedResource().GetMemory()); NUnit.Framework.Assert.AreEqual(-2 * Gb, report_nm1.GetAvailableResource().GetMemory ()); // Check container can complete successfully in case of resource over-commitment. ContainerStatus containerStatus = BuilderUtils.NewContainerStatus(c1.GetId(), ContainerState .Complete, string.Empty, 0); nm1.ContainerStatus(containerStatus); int waitCount = 0; while (attempt1.GetJustFinishedContainers().Count < 1 && waitCount++ != 20) { Log.Info("Waiting for containers to be finished for app 1... Tried " + waitCount + " times already.."); Sharpen.Thread.Sleep(100); } NUnit.Framework.Assert.AreEqual(1, attempt1.GetJustFinishedContainers().Count); NUnit.Framework.Assert.AreEqual(1, am1.Schedule().GetCompletedContainersStatuses( ).Count); report_nm1 = rm.GetResourceScheduler().GetNodeReport(nm1.GetNodeId()); NUnit.Framework.Assert.AreEqual(2 * Gb, report_nm1.GetUsedResource().GetMemory()); // As container return 2 GB back, the available resource becomes 0 again. NUnit.Framework.Assert.AreEqual(0 * Gb, report_nm1.GetAvailableResource().GetMemory ()); rm.Stop(); }
public virtual void Test() { Logger rootLogger = LogManager.GetRootLogger(); rootLogger.SetLevel(Level.Debug); MockRM rm = new MockRM(conf); rm.Start(); MockNM nm1 = rm.RegisterNode("127.0.0.1:1234", 6 * Gb); MockNM nm2 = rm.RegisterNode("127.0.0.2:5678", 4 * Gb); RMApp app1 = rm.SubmitApp(2048); // kick the scheduling, 2 GB given to AM1, remaining 4GB on nm1 nm1.NodeHeartbeat(true); RMAppAttempt attempt1 = app1.GetCurrentAppAttempt(); MockAM am1 = rm.SendAMLaunched(attempt1.GetAppAttemptId()); am1.RegisterAppAttempt(); SchedulerNodeReport report_nm1 = rm.GetResourceScheduler().GetNodeReport(nm1.GetNodeId ()); NUnit.Framework.Assert.AreEqual(2 * Gb, report_nm1.GetUsedResource().GetMemory()); RMApp app2 = rm.SubmitApp(2048); // kick the scheduling, 2GB given to AM, remaining 2 GB on nm2 nm2.NodeHeartbeat(true); RMAppAttempt attempt2 = app2.GetCurrentAppAttempt(); MockAM am2 = rm.SendAMLaunched(attempt2.GetAppAttemptId()); am2.RegisterAppAttempt(); SchedulerNodeReport report_nm2 = rm.GetResourceScheduler().GetNodeReport(nm2.GetNodeId ()); NUnit.Framework.Assert.AreEqual(2 * Gb, report_nm2.GetUsedResource().GetMemory()); // add request for containers am1.AddRequests(new string[] { "127.0.0.1", "127.0.0.2" }, Gb, 1, 1); AllocateResponse alloc1Response = am1.Schedule(); // send the request // add request for containers am2.AddRequests(new string[] { "127.0.0.1", "127.0.0.2" }, 3 * Gb, 0, 1); AllocateResponse alloc2Response = am2.Schedule(); // send the request // kick the scheduler, 1 GB and 3 GB given to AM1 and AM2, remaining 0 nm1.NodeHeartbeat(true); while (alloc1Response.GetAllocatedContainers().Count < 1) { Log.Info("Waiting for containers to be created for app 1..."); Sharpen.Thread.Sleep(1000); alloc1Response = am1.Schedule(); } while (alloc2Response.GetAllocatedContainers().Count < 1) { Log.Info("Waiting for containers to be created for app 2..."); Sharpen.Thread.Sleep(1000); alloc2Response = am2.Schedule(); } // kick the scheduler, nothing given remaining 2 GB. nm2.NodeHeartbeat(true); IList <Container> allocated1 = alloc1Response.GetAllocatedContainers(); NUnit.Framework.Assert.AreEqual(1, allocated1.Count); NUnit.Framework.Assert.AreEqual(1 * Gb, allocated1[0].GetResource().GetMemory()); NUnit.Framework.Assert.AreEqual(nm1.GetNodeId(), allocated1[0].GetNodeId()); IList <Container> allocated2 = alloc2Response.GetAllocatedContainers(); NUnit.Framework.Assert.AreEqual(1, allocated2.Count); NUnit.Framework.Assert.AreEqual(3 * Gb, allocated2[0].GetResource().GetMemory()); NUnit.Framework.Assert.AreEqual(nm1.GetNodeId(), allocated2[0].GetNodeId()); report_nm1 = rm.GetResourceScheduler().GetNodeReport(nm1.GetNodeId()); report_nm2 = rm.GetResourceScheduler().GetNodeReport(nm2.GetNodeId()); NUnit.Framework.Assert.AreEqual(0, report_nm1.GetAvailableResource().GetMemory()); NUnit.Framework.Assert.AreEqual(2 * Gb, report_nm2.GetAvailableResource().GetMemory ()); NUnit.Framework.Assert.AreEqual(6 * Gb, report_nm1.GetUsedResource().GetMemory()); NUnit.Framework.Assert.AreEqual(2 * Gb, report_nm2.GetUsedResource().GetMemory()); Container c1 = allocated1[0]; NUnit.Framework.Assert.AreEqual(Gb, c1.GetResource().GetMemory()); ContainerStatus containerStatus = BuilderUtils.NewContainerStatus(c1.GetId(), ContainerState .Complete, string.Empty, 0); nm1.ContainerStatus(containerStatus); int waitCount = 0; while (attempt1.GetJustFinishedContainers().Count < 1 && waitCount++ != 20) { Log.Info("Waiting for containers to be finished for app 1... Tried " + waitCount + " times already.."); Sharpen.Thread.Sleep(1000); } NUnit.Framework.Assert.AreEqual(1, attempt1.GetJustFinishedContainers().Count); NUnit.Framework.Assert.AreEqual(1, am1.Schedule().GetCompletedContainersStatuses( ).Count); report_nm1 = rm.GetResourceScheduler().GetNodeReport(nm1.GetNodeId()); NUnit.Framework.Assert.AreEqual(5 * Gb, report_nm1.GetUsedResource().GetMemory()); rm.Stop(); }
// Test does major 6 steps verification. // Step-1 : AMRMClient send allocate request for 2 container requests // Step-2 : 2 containers are allocated by RM. // Step-3 : AM Send 1 containerRequest(cRequest3) and 1 releaseRequests to // RM // Step-4 : On RM restart, AM(does not know RM is restarted) sends additional // containerRequest(cRequest4) and blacklisted nodes. // Intern RM send resync command // Step-5 : Allocater after resync command & new containerRequest(cRequest5) // Step-6 : RM allocates containers i.e cRequest3,cRequest4 and cRequest5 /// <exception cref="System.Exception"/> public virtual void TestAMRMClientResendsRequestsOnRMRestart() { UserGroupInformation.SetLoginUser(null); MemoryRMStateStore memStore = new MemoryRMStateStore(); memStore.Init(conf); // Phase-1 Start 1st RM TestAMRMClientOnRMRestart.MyResourceManager rm1 = new TestAMRMClientOnRMRestart.MyResourceManager (conf, memStore); rm1.Start(); DrainDispatcher dispatcher = (DrainDispatcher)rm1.GetRMContext().GetDispatcher(); // Submit the application RMApp app = rm1.SubmitApp(1024); dispatcher.Await(); MockNM nm1 = new MockNM("h1:1234", 15120, rm1.GetResourceTrackerService()); nm1.RegisterNode(); nm1.NodeHeartbeat(true); // Node heartbeat dispatcher.Await(); ApplicationAttemptId appAttemptId = app.GetCurrentAppAttempt().GetAppAttemptId(); rm1.SendAMLaunched(appAttemptId); dispatcher.Await(); Org.Apache.Hadoop.Security.Token.Token <AMRMTokenIdentifier> token = rm1.GetRMContext ().GetRMApps()[appAttemptId.GetApplicationId()].GetRMAppAttempt(appAttemptId).GetAMRMToken (); UserGroupInformation ugi = UserGroupInformation.GetCurrentUser(); ugi.AddTokenIdentifier(token.DecodeIdentifier()); // Step-1 : AMRMClient send allocate request for 2 ContainerRequest // cRequest1 = h1 and cRequest2 = h1,h2 // blacklisted nodes = h2 AMRMClient <AMRMClient.ContainerRequest> amClient = new TestAMRMClientOnRMRestart.MyAMRMClientImpl (rm1); amClient.Init(conf); amClient.Start(); amClient.RegisterApplicationMaster("Host", 10000, string.Empty); AMRMClient.ContainerRequest cRequest1 = CreateReq(1, 1024, new string[] { "h1" }); amClient.AddContainerRequest(cRequest1); AMRMClient.ContainerRequest cRequest2 = CreateReq(1, 1024, new string[] { "h1", "h2" }); amClient.AddContainerRequest(cRequest2); IList <string> blacklistAdditions = new AList <string>(); IList <string> blacklistRemoval = new AList <string>(); blacklistAdditions.AddItem("h2"); blacklistRemoval.AddItem("h10"); amClient.UpdateBlacklist(blacklistAdditions, blacklistRemoval); blacklistAdditions.Remove("h2"); // remove from local list AllocateResponse allocateResponse = amClient.Allocate(0.1f); dispatcher.Await(); NUnit.Framework.Assert.AreEqual("No of assignments must be 0", 0, allocateResponse .GetAllocatedContainers().Count); // Why 4 ask, why not 3 ask even h2 is blacklisted? // On blacklisting host,applicationmaster has to remove ask request from // remoterequest table.Here,test does not remove explicitely AssertAsksAndReleases(4, 0, rm1); AssertBlacklistAdditionsAndRemovals(1, 1, rm1); // Step-2 : NM heart beat is sent. // On 2nd AM allocate request, RM allocates 2 containers to AM nm1.NodeHeartbeat(true); // Node heartbeat dispatcher.Await(); allocateResponse = amClient.Allocate(0.2f); dispatcher.Await(); // 2 containers are allocated i.e for cRequest1 and cRequest2. NUnit.Framework.Assert.AreEqual("No of assignments must be 0", 2, allocateResponse .GetAllocatedContainers().Count); AssertAsksAndReleases(0, 0, rm1); AssertBlacklistAdditionsAndRemovals(0, 0, rm1); IList <Container> allocatedContainers = allocateResponse.GetAllocatedContainers(); // removed allocated container requests amClient.RemoveContainerRequest(cRequest1); amClient.RemoveContainerRequest(cRequest2); allocateResponse = amClient.Allocate(0.2f); dispatcher.Await(); NUnit.Framework.Assert.AreEqual("No of assignments must be 0", 0, allocateResponse .GetAllocatedContainers().Count); AssertAsksAndReleases(4, 0, rm1); AssertBlacklistAdditionsAndRemovals(0, 0, rm1); // Step-3 : Send 1 containerRequest and 1 releaseRequests to RM AMRMClient.ContainerRequest cRequest3 = CreateReq(1, 1024, new string[] { "h1" }); amClient.AddContainerRequest(cRequest3); int pendingRelease = 0; IEnumerator <Container> it = allocatedContainers.GetEnumerator(); while (it.HasNext()) { amClient.ReleaseAssignedContainer(it.Next().GetId()); pendingRelease++; it.Remove(); break; } // remove one container allocateResponse = amClient.Allocate(0.3f); dispatcher.Await(); NUnit.Framework.Assert.AreEqual("No of assignments must be 0", 0, allocateResponse .GetAllocatedContainers().Count); AssertAsksAndReleases(3, pendingRelease, rm1); AssertBlacklistAdditionsAndRemovals(0, 0, rm1); int completedContainer = allocateResponse.GetCompletedContainersStatuses().Count; pendingRelease -= completedContainer; // Phase-2 start 2nd RM is up TestAMRMClientOnRMRestart.MyResourceManager rm2 = new TestAMRMClientOnRMRestart.MyResourceManager (conf, memStore); rm2.Start(); nm1.SetResourceTrackerService(rm2.GetResourceTrackerService()); ((TestAMRMClientOnRMRestart.MyAMRMClientImpl)amClient).UpdateRMProxy(rm2); dispatcher = (DrainDispatcher)rm2.GetRMContext().GetDispatcher(); // NM should be rebooted on heartbeat, even first heartbeat for nm2 NodeHeartbeatResponse hbResponse = nm1.NodeHeartbeat(true); NUnit.Framework.Assert.AreEqual(NodeAction.Resync, hbResponse.GetNodeAction()); // new NM to represent NM re-register nm1 = new MockNM("h1:1234", 10240, rm2.GetResourceTrackerService()); nm1.RegisterNode(); nm1.NodeHeartbeat(true); dispatcher.Await(); blacklistAdditions.AddItem("h3"); amClient.UpdateBlacklist(blacklistAdditions, null); blacklistAdditions.Remove("h3"); it = allocatedContainers.GetEnumerator(); while (it.HasNext()) { amClient.ReleaseAssignedContainer(it.Next().GetId()); pendingRelease++; it.Remove(); } AMRMClient.ContainerRequest cRequest4 = CreateReq(1, 1024, new string[] { "h1", "h2" }); amClient.AddContainerRequest(cRequest4); // Step-4 : On RM restart, AM(does not know RM is restarted) sends // additional // containerRequest and blacklisted nodes. // Intern RM send resync command,AMRMClient resend allocate request allocateResponse = amClient.Allocate(0.3f); dispatcher.Await(); completedContainer = allocateResponse.GetCompletedContainersStatuses().Count; pendingRelease -= completedContainer; AssertAsksAndReleases(4, pendingRelease, rm2); AssertBlacklistAdditionsAndRemovals(2, 0, rm2); AMRMClient.ContainerRequest cRequest5 = CreateReq(1, 1024, new string[] { "h1", "h2" , "h3" }); amClient.AddContainerRequest(cRequest5); // Step-5 : Allocater after resync command allocateResponse = amClient.Allocate(0.5f); dispatcher.Await(); NUnit.Framework.Assert.AreEqual("No of assignments must be 0", 0, allocateResponse .GetAllocatedContainers().Count); AssertAsksAndReleases(5, 0, rm2); AssertBlacklistAdditionsAndRemovals(0, 0, rm2); int noAssignedContainer = 0; int count = 5; while (count-- > 0) { nm1.NodeHeartbeat(true); dispatcher.Await(); allocateResponse = amClient.Allocate(0.5f); dispatcher.Await(); noAssignedContainer += allocateResponse.GetAllocatedContainers().Count; if (noAssignedContainer == 3) { break; } Sharpen.Thread.Sleep(1000); } // Step-6 : RM allocates containers i.e cRequest3,cRequest4 and cRequest5 NUnit.Framework.Assert.AreEqual("Number of container should be 3", 3, noAssignedContainer ); amClient.Stop(); rm1.Stop(); rm2.Stop(); }
/// <exception cref="System.Exception"/> public virtual void TestNMToken() { MockRM rm = new MockRM(conf); try { rm.Start(); MockNM nm1 = rm.RegisterNode("h1:1234", 10000); NMTokenSecretManagerInRM nmTokenSecretManager = rm.GetRMContext().GetNMTokenSecretManager (); // submitting new application RMApp app = rm.SubmitApp(1000); // start scheduling. nm1.NodeHeartbeat(true); // Starting application attempt and launching // It should get registered with NMTokenSecretManager. RMAppAttempt attempt = app.GetCurrentAppAttempt(); MockAM am = rm.SendAMLaunched(attempt.GetAppAttemptId()); NUnit.Framework.Assert.IsTrue(nmTokenSecretManager.IsApplicationAttemptRegistered (attempt.GetAppAttemptId())); // This will register application master. am.RegisterAppAttempt(); AList <Container> containersReceivedForNM1 = new AList <Container>(); IList <ContainerId> releaseContainerList = new AList <ContainerId>(); Dictionary <string, Token> nmTokens = new Dictionary <string, Token>(); // initially requesting 2 containers. AllocateResponse response = am.Allocate("h1", 1000, 2, releaseContainerList); NUnit.Framework.Assert.AreEqual(0, response.GetAllocatedContainers().Count); AllocateContainersAndValidateNMTokens(am, containersReceivedForNM1, 2, nmTokens, nm1); NUnit.Framework.Assert.AreEqual(1, nmTokens.Count); // requesting 2 more containers. response = am.Allocate("h1", 1000, 2, releaseContainerList); NUnit.Framework.Assert.AreEqual(0, response.GetAllocatedContainers().Count); AllocateContainersAndValidateNMTokens(am, containersReceivedForNM1, 4, nmTokens, nm1); NUnit.Framework.Assert.AreEqual(1, nmTokens.Count); // We will be simulating NM restart so restarting newly added h2:1234 // NM 2 now registers. MockNM nm2 = rm.RegisterNode("h2:1234", 10000); nm2.NodeHeartbeat(true); AList <Container> containersReceivedForNM2 = new AList <Container>(); response = am.Allocate("h2", 1000, 2, releaseContainerList); NUnit.Framework.Assert.AreEqual(0, response.GetAllocatedContainers().Count); AllocateContainersAndValidateNMTokens(am, containersReceivedForNM2, 2, nmTokens, nm2); NUnit.Framework.Assert.AreEqual(2, nmTokens.Count); // Simulating NM-2 restart. nm2 = rm.RegisterNode("h2:1234", 10000); // Wait for reconnect to make it through the RM and create a new RMNode IDictionary <NodeId, RMNode> nodes = rm.GetRMContext().GetRMNodes(); while (nodes[nm2.GetNodeId()].GetLastNodeHeartBeatResponse().GetResponseId() > 0) { Sharpen.Thread.Sleep(WaitSleepMs); } int interval = 40; // Wait for nm Token to be cleared. while (nmTokenSecretManager.IsApplicationAttemptNMTokenPresent(attempt.GetAppAttemptId (), nm2.GetNodeId()) && interval-- > 0) { Log.Info("waiting for nmToken to be cleared for : " + nm2.GetNodeId()); Sharpen.Thread.Sleep(WaitSleepMs); } NUnit.Framework.Assert.IsTrue(nmTokenSecretManager.IsApplicationAttemptRegistered (attempt.GetAppAttemptId())); // removing NMToken for h2:1234 Sharpen.Collections.Remove(nmTokens, nm2.GetNodeId().ToString()); NUnit.Framework.Assert.AreEqual(1, nmTokens.Count); // We should again receive the NMToken. response = am.Allocate("h2", 1000, 2, releaseContainerList); NUnit.Framework.Assert.AreEqual(0, response.GetAllocatedContainers().Count); AllocateContainersAndValidateNMTokens(am, containersReceivedForNM2, 4, nmTokens, nm2); NUnit.Framework.Assert.AreEqual(2, nmTokens.Count); // Now rolling over NMToken masterKey. it should resend the NMToken in // next allocate call. NUnit.Framework.Assert.IsTrue(nmTokenSecretManager.IsApplicationAttemptNMTokenPresent (attempt.GetAppAttemptId(), nm1.GetNodeId())); NUnit.Framework.Assert.IsTrue(nmTokenSecretManager.IsApplicationAttemptNMTokenPresent (attempt.GetAppAttemptId(), nm2.GetNodeId())); nmTokenSecretManager.RollMasterKey(); nmTokenSecretManager.ActivateNextMasterKey(); NUnit.Framework.Assert.IsFalse(nmTokenSecretManager.IsApplicationAttemptNMTokenPresent (attempt.GetAppAttemptId(), nm1.GetNodeId())); NUnit.Framework.Assert.IsFalse(nmTokenSecretManager.IsApplicationAttemptNMTokenPresent (attempt.GetAppAttemptId(), nm2.GetNodeId())); // It should not remove application attempt entry. NUnit.Framework.Assert.IsTrue(nmTokenSecretManager.IsApplicationAttemptRegistered (attempt.GetAppAttemptId())); nmTokens.Clear(); NUnit.Framework.Assert.AreEqual(0, nmTokens.Count); // We should again receive the NMToken. response = am.Allocate("h2", 1000, 1, releaseContainerList); NUnit.Framework.Assert.AreEqual(0, response.GetAllocatedContainers().Count); AllocateContainersAndValidateNMTokens(am, containersReceivedForNM2, 5, nmTokens, nm2); NUnit.Framework.Assert.AreEqual(1, nmTokens.Count); NUnit.Framework.Assert.IsTrue(nmTokenSecretManager.IsApplicationAttemptNMTokenPresent (attempt.GetAppAttemptId(), nm2.GetNodeId())); // After AM is finished making sure that nmtoken entry for app NUnit.Framework.Assert.IsTrue(nmTokenSecretManager.IsApplicationAttemptRegistered (attempt.GetAppAttemptId())); am.UnregisterAppAttempt(); // marking all the containers as finished. foreach (Container container in containersReceivedForNM1) { nm1.NodeHeartbeat(attempt.GetAppAttemptId(), container.GetId().GetContainerId(), ContainerState.Complete); } foreach (Container container_1 in containersReceivedForNM2) { nm2.NodeHeartbeat(attempt.GetAppAttemptId(), container_1.GetId().GetContainerId() , ContainerState.Complete); } nm1.NodeHeartbeat(am.GetApplicationAttemptId(), 1, ContainerState.Complete); am.WaitForState(RMAppAttemptState.Finished); NUnit.Framework.Assert.IsFalse(nmTokenSecretManager.IsApplicationAttemptRegistered (attempt.GetAppAttemptId())); } finally { rm.Stop(); } }