/// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> /// <exception cref="System.IO.IOException"/> private ICollection <Container> AllocateContainers(AMRMClientImpl <AMRMClient.ContainerRequest > rmClient, int num) { // setup container request Resource capability = Resource.NewInstance(1024, 0); Priority priority = Priority.NewInstance(0); string node = nodeReports[0].GetNodeId().GetHost(); string rack = nodeReports[0].GetRackName(); string[] nodes = new string[] { node }; string[] racks = new string[] { rack }; for (int i = 0; i < num; ++i) { rmClient.AddContainerRequest(new AMRMClient.ContainerRequest(capability, nodes, racks , priority)); } int containersRequestedAny = rmClient.remoteRequestsTable[priority][ResourceRequest .Any][capability].remoteRequest.GetNumContainers(); // RM should allocate container within 2 calls to allocate() int allocatedContainerCount = 0; int iterationsLeft = 2; ICollection <Container> containers = new TreeSet <Container>(); while (allocatedContainerCount < containersRequestedAny && iterationsLeft > 0) { AllocateResponse allocResponse = rmClient.Allocate(0.1f); allocatedContainerCount += allocResponse.GetAllocatedContainers().Count; foreach (Container container in allocResponse.GetAllocatedContainers()) { containers.AddItem(container); } if (!allocResponse.GetNMTokens().IsEmpty()) { foreach (NMToken token in allocResponse.GetNMTokens()) { rmClient.GetNMTokenCache().SetToken(token.GetNodeId().ToString(), token.GetToken( )); } } if (allocatedContainerCount < containersRequestedAny) { // sleep to let NM's heartbeat to RM and trigger allocations Sleep(1000); } --iterationsLeft; } return(containers); }
// Test even if AM container is allocated with containerId not equal to 1, the // following allocate requests from AM should be able to retrieve the // corresponding NM Token. /// <exception cref="System.Exception"/> public virtual void TestNMTokenSentForNormalContainer() { conf.Set(YarnConfiguration.RmScheduler, typeof(CapacityScheduler).GetCanonicalName ()); MockRM rm = new MockRM(conf); rm.Start(); MockNM nm1 = rm.RegisterNode("h1:1234", 5120); RMApp app = rm.SubmitApp(2000); RMAppAttempt attempt = app.GetCurrentAppAttempt(); // Call getNewContainerId to increase container Id so that the AM container // Id doesn't equal to one. CapacityScheduler cs = (CapacityScheduler)rm.GetResourceScheduler(); cs.GetApplicationAttempt(attempt.GetAppAttemptId()).GetNewContainerId(); // kick the scheduling nm1.NodeHeartbeat(true); MockAM am = MockRM.LaunchAM(app, rm, nm1); // am container Id not equal to 1. NUnit.Framework.Assert.IsTrue(attempt.GetMasterContainer().GetId().GetContainerId () != 1); // NMSecretManager doesn't record the node on which the am is allocated. NUnit.Framework.Assert.IsFalse(rm.GetRMContext().GetNMTokenSecretManager().IsApplicationAttemptNMTokenPresent (attempt.GetAppAttemptId(), nm1.GetNodeId())); am.RegisterAppAttempt(); rm.WaitForState(app.GetApplicationId(), RMAppState.Running); int NumContainers = 1; IList <Container> containers = new AList <Container>(); // nmTokens keeps track of all the nmTokens issued in the allocate call. IList <NMToken> expectedNMTokens = new AList <NMToken>(); // am1 allocate 1 container on nm1. while (true) { AllocateResponse response = am.Allocate("127.0.0.1", 2000, NumContainers, new AList <ContainerId>()); nm1.NodeHeartbeat(true); Sharpen.Collections.AddAll(containers, response.GetAllocatedContainers()); Sharpen.Collections.AddAll(expectedNMTokens, response.GetNMTokens()); if (containers.Count == NumContainers) { break; } Sharpen.Thread.Sleep(200); System.Console.Out.WriteLine("Waiting for container to be allocated."); } NodeId nodeId = expectedNMTokens[0].GetNodeId(); // NMToken is sent for the allocated container. NUnit.Framework.Assert.AreEqual(nm1.GetNodeId(), nodeId); }
/// <exception cref="System.Exception"/> public virtual void TestNMTokensRebindOnAMRestart() { YarnConfiguration conf = new YarnConfiguration(); conf.SetInt(YarnConfiguration.RmAmMaxAttempts, 3); MockRM rm1 = new MockRM(conf); rm1.Start(); RMApp app1 = rm1.SubmitApp(200, "myname", "myuser", new Dictionary <ApplicationAccessType , string>(), false, "default", -1, null, "MAPREDUCE", false, true); MockNM nm1 = new MockNM("127.0.0.1:1234", 8000, rm1.GetResourceTrackerService()); nm1.RegisterNode(); MockNM nm2 = new MockNM("127.1.1.1:4321", 8000, rm1.GetResourceTrackerService()); nm2.RegisterNode(); MockAM am1 = MockRM.LaunchAndRegisterAM(app1, rm1, nm1); IList <Container> containers = new AList <Container>(); // nmTokens keeps track of all the nmTokens issued in the allocate call. IList <NMToken> expectedNMTokens = new AList <NMToken>(); // am1 allocate 2 container on nm1. // first container while (true) { AllocateResponse response = am1.Allocate("127.0.0.1", 2000, 2, new AList <ContainerId >()); nm1.NodeHeartbeat(true); Sharpen.Collections.AddAll(containers, response.GetAllocatedContainers()); Sharpen.Collections.AddAll(expectedNMTokens, response.GetNMTokens()); if (containers.Count == 2) { break; } Sharpen.Thread.Sleep(200); System.Console.Out.WriteLine("Waiting for container to be allocated."); } // launch the container-2 nm1.NodeHeartbeat(am1.GetApplicationAttemptId(), 2, ContainerState.Running); ContainerId containerId2 = ContainerId.NewContainerId(am1.GetApplicationAttemptId (), 2); rm1.WaitForState(nm1, containerId2, RMContainerState.Running); // launch the container-3 nm1.NodeHeartbeat(am1.GetApplicationAttemptId(), 3, ContainerState.Running); ContainerId containerId3 = ContainerId.NewContainerId(am1.GetApplicationAttemptId (), 3); rm1.WaitForState(nm1, containerId3, RMContainerState.Running); // fail am1 nm1.NodeHeartbeat(am1.GetApplicationAttemptId(), 1, ContainerState.Complete); am1.WaitForState(RMAppAttemptState.Failed); rm1.WaitForState(app1.GetApplicationId(), RMAppState.Accepted); // restart the am MockAM am2 = MockRM.LaunchAM(app1, rm1, nm1); RegisterApplicationMasterResponse registerResponse = am2.RegisterAppAttempt(); rm1.WaitForState(app1.GetApplicationId(), RMAppState.Running); // check am2 get the nm token from am1. NUnit.Framework.Assert.AreEqual(expectedNMTokens, registerResponse.GetNMTokensFromPreviousAttempts ()); // am2 allocate 1 container on nm2 containers = new AList <Container>(); while (true) { AllocateResponse allocateResponse = am2.Allocate("127.1.1.1", 4000, 1, new AList < ContainerId>()); nm2.NodeHeartbeat(true); Sharpen.Collections.AddAll(containers, allocateResponse.GetAllocatedContainers()); Sharpen.Collections.AddAll(expectedNMTokens, allocateResponse.GetNMTokens()); if (containers.Count == 1) { break; } Sharpen.Thread.Sleep(200); System.Console.Out.WriteLine("Waiting for container to be allocated."); } nm1.NodeHeartbeat(am2.GetApplicationAttemptId(), 2, ContainerState.Running); ContainerId am2ContainerId2 = ContainerId.NewContainerId(am2.GetApplicationAttemptId (), 2); rm1.WaitForState(nm1, am2ContainerId2, RMContainerState.Running); // fail am2. nm1.NodeHeartbeat(am2.GetApplicationAttemptId(), 1, ContainerState.Complete); am2.WaitForState(RMAppAttemptState.Failed); rm1.WaitForState(app1.GetApplicationId(), RMAppState.Accepted); // restart am MockAM am3 = MockRM.LaunchAM(app1, rm1, nm1); registerResponse = am3.RegisterAppAttempt(); rm1.WaitForState(app1.GetApplicationId(), RMAppState.Running); // check am3 get the NM token from both am1 and am2; IList <NMToken> transferredTokens = registerResponse.GetNMTokensFromPreviousAttempts (); NUnit.Framework.Assert.AreEqual(2, transferredTokens.Count); NUnit.Framework.Assert.IsTrue(transferredTokens.ContainsAll(expectedNMTokens)); rm1.Stop(); }
/// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> /// <exception cref="System.IO.IOException"/> public override AllocateResponse Allocate(float progressIndicator) { Preconditions.CheckArgument(progressIndicator >= 0, "Progress indicator should not be negative" ); AllocateResponse allocateResponse = null; IList <ResourceRequest> askList = null; IList <ContainerId> releaseList = null; AllocateRequest allocateRequest = null; IList <string> blacklistToAdd = new AList <string>(); IList <string> blacklistToRemove = new AList <string>(); try { lock (this) { askList = new AList <ResourceRequest>(ask.Count); foreach (ResourceRequest r in ask) { // create a copy of ResourceRequest as we might change it while the // RPC layer is using it to send info across askList.AddItem(ResourceRequest.NewInstance(r.GetPriority(), r.GetResourceName(), r.GetCapability(), r.GetNumContainers(), r.GetRelaxLocality(), r.GetNodeLabelExpression ())); } releaseList = new AList <ContainerId>(release); // optimistically clear this collection assuming no RPC failure ask.Clear(); release.Clear(); Sharpen.Collections.AddAll(blacklistToAdd, blacklistAdditions); Sharpen.Collections.AddAll(blacklistToRemove, blacklistRemovals); ResourceBlacklistRequest blacklistRequest = ResourceBlacklistRequest.NewInstance( blacklistToAdd, blacklistToRemove); allocateRequest = AllocateRequest.NewInstance(lastResponseId, progressIndicator, askList, releaseList, blacklistRequest); // clear blacklistAdditions and blacklistRemovals before // unsynchronized part blacklistAdditions.Clear(); blacklistRemovals.Clear(); } try { allocateResponse = rmClient.Allocate(allocateRequest); } catch (ApplicationMasterNotRegisteredException) { Log.Warn("ApplicationMaster is out of sync with ResourceManager," + " hence resyncing." ); lock (this) { Sharpen.Collections.AddAll(release, this.pendingRelease); Sharpen.Collections.AddAll(blacklistAdditions, this.blacklistedNodes); foreach (IDictionary <string, SortedDictionary <Resource, AMRMClientImpl.ResourceRequestInfo > > rr in remoteRequestsTable.Values) { foreach (IDictionary <Resource, AMRMClientImpl.ResourceRequestInfo> capabalities in rr.Values) { foreach (AMRMClientImpl.ResourceRequestInfo request in capabalities.Values) { AddResourceRequestToAsk(request.remoteRequest); } } } } // re register with RM RegisterApplicationMaster(); allocateResponse = Allocate(progressIndicator); return(allocateResponse); } lock (this) { // update these on successful RPC clusterNodeCount = allocateResponse.GetNumClusterNodes(); lastResponseId = allocateResponse.GetResponseId(); clusterAvailableResources = allocateResponse.GetAvailableResources(); if (!allocateResponse.GetNMTokens().IsEmpty()) { PopulateNMTokens(allocateResponse.GetNMTokens()); } if (allocateResponse.GetAMRMToken() != null) { UpdateAMRMToken(allocateResponse.GetAMRMToken()); } if (!pendingRelease.IsEmpty() && !allocateResponse.GetCompletedContainersStatuses ().IsEmpty()) { RemovePendingReleaseRequests(allocateResponse.GetCompletedContainersStatuses()); } } } finally { // TODO how to differentiate remote yarn exception vs error in rpc if (allocateResponse == null) { // we hit an exception in allocate() // preserve ask and release for next call to allocate() lock (this) { Sharpen.Collections.AddAll(release, releaseList); // requests could have been added or deleted during call to allocate // If requests were added/removed then there is nothing to do since // the ResourceRequest object in ask would have the actual new value. // If ask does not have this ResourceRequest then it was unchanged and // so we can add the value back safely. // This assumes that there will no concurrent calls to allocate() and // so we dont have to worry about ask being changed in the // synchronized block at the beginning of this method. foreach (ResourceRequest oldAsk in askList) { if (!ask.Contains(oldAsk)) { ask.AddItem(oldAsk); } } Sharpen.Collections.AddAll(blacklistAdditions, blacklistToAdd); Sharpen.Collections.AddAll(blacklistRemovals, blacklistToRemove); } } } return(allocateResponse); }