// To avoid using cached client /// <exception cref="System.Exception"/> public virtual void TestAMRMMasterKeysUpdate() { AtomicReference <AMRMTokenSecretManager> spySecretMgrRef = new AtomicReference <AMRMTokenSecretManager >(); MockRM rm = new _MockRM_349(this, spySecretMgrRef, conf); // Skip the login. rm.Start(); MockNM nm = rm.RegisterNode("127.0.0.1:1234", 8000); RMApp app = rm.SubmitApp(200); MockAM am = MockRM.LaunchAndRegisterAM(app, rm, nm); AMRMTokenSecretManager spySecretMgr = spySecretMgrRef.Get(); // Do allocate. Should not update AMRMToken AllocateResponse response = am.Allocate(Org.Apache.Hadoop.Yarn.Util.Records.NewRecord <AllocateRequest>()); NUnit.Framework.Assert.IsNull(response.GetAMRMToken()); Org.Apache.Hadoop.Security.Token.Token <AMRMTokenIdentifier> oldToken = rm.GetRMContext ().GetRMApps()[app.GetApplicationId()].GetRMAppAttempt(am.GetApplicationAttemptId ()).GetAMRMToken(); // roll over the master key // Do allocate again. the AM should get the latest AMRMToken rm.GetRMContext().GetAMRMTokenSecretManager().RollMasterKey(); response = am.Allocate(Org.Apache.Hadoop.Yarn.Util.Records.NewRecord <AllocateRequest >()); NUnit.Framework.Assert.IsNotNull(response.GetAMRMToken()); Org.Apache.Hadoop.Security.Token.Token <AMRMTokenIdentifier> amrmToken = ConverterUtils .ConvertFromYarn(response.GetAMRMToken(), new Text(response.GetAMRMToken().GetService ())); NUnit.Framework.Assert.AreEqual(amrmToken.DecodeIdentifier().GetKeyId(), rm.GetRMContext ().GetAMRMTokenSecretManager().GetMasterKey().GetMasterKey().GetKeyId()); // Do allocate again with the same old token and verify the RM sends // back the last generated token instead of generating it again. Org.Mockito.Mockito.Reset(spySecretMgr); UserGroupInformation ugi = UserGroupInformation.CreateUserForTesting(am.GetApplicationAttemptId ().ToString(), new string[0]); ugi.AddTokenIdentifier(oldToken.DecodeIdentifier()); response = am.DoAllocateAs(ugi, Org.Apache.Hadoop.Yarn.Util.Records.NewRecord <AllocateRequest >()); NUnit.Framework.Assert.IsNotNull(response.GetAMRMToken()); Org.Mockito.Mockito.Verify(spySecretMgr, Org.Mockito.Mockito.Never()).CreateAndGetAMRMToken (Matchers.IsA <ApplicationAttemptId>()); // Do allocate again with the updated token and verify we do not // receive a new token to use. response = am.Allocate(Org.Apache.Hadoop.Yarn.Util.Records.NewRecord <AllocateRequest >()); NUnit.Framework.Assert.IsNull(response.GetAMRMToken()); // Activate the next master key. Since there is new master key generated // in AMRMTokenSecretManager. The AMRMToken will not get updated for AM rm.GetRMContext().GetAMRMTokenSecretManager().ActivateNextMasterKey(); response = am.Allocate(Org.Apache.Hadoop.Yarn.Util.Records.NewRecord <AllocateRequest >()); NUnit.Framework.Assert.IsNull(response.GetAMRMToken()); rm.Stop(); }
/// <exception cref="System.Exception"/> protected internal override void Heartbeat() { lock (this) { AllocateRequest allocateRequest = AllocateRequest.NewInstance(this.lastResponseID , base.GetApplicationProgress(), new AList <ResourceRequest>(), new AList <ContainerId >(), null); AllocateResponse allocateResponse = null; try { allocateResponse = scheduler.Allocate(allocateRequest); // Reset retry count if no exception occurred. retrystartTime = Runtime.CurrentTimeMillis(); } catch (ApplicationAttemptNotFoundException e) { Log.Info("Event from RM: shutting down Application Master"); // This can happen if the RM has been restarted. If it is in that state, // this application must clean itself up. eventHandler.Handle(new JobEvent(this.GetJob().GetID(), JobEventType.JobAmReboot) ); throw new YarnRuntimeException("Resource Manager doesn't recognize AttemptId: " + this.GetContext().GetApplicationID(), e); } catch (ApplicationMasterNotRegisteredException) { Log.Info("ApplicationMaster is out of sync with ResourceManager," + " hence resync and send outstanding requests." ); this.lastResponseID = 0; Register(); } catch (Exception e) { // This can happen when the connection to the RM has gone down. Keep // re-trying until the retryInterval has expired. if (Runtime.CurrentTimeMillis() - retrystartTime >= retryInterval) { Log.Error("Could not contact RM after " + retryInterval + " milliseconds."); eventHandler.Handle(new JobEvent(this.GetJob().GetID(), JobEventType.InternalError )); throw new YarnRuntimeException("Could not contact RM after " + retryInterval + " milliseconds." ); } // Throw this up to the caller, which may decide to ignore it and // continue to attempt to contact the RM. throw; } if (allocateResponse != null) { this.lastResponseID = allocateResponse.GetResponseId(); Token token = allocateResponse.GetAMRMToken(); if (token != null) { UpdateAMRMToken(token); } } } }
/// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> /// <exception cref="System.IO.IOException"/> public override AllocateResponse Allocate(float progressIndicator) { Preconditions.CheckArgument(progressIndicator >= 0, "Progress indicator should not be negative" ); AllocateResponse allocateResponse = null; IList <ResourceRequest> askList = null; IList <ContainerId> releaseList = null; AllocateRequest allocateRequest = null; IList <string> blacklistToAdd = new AList <string>(); IList <string> blacklistToRemove = new AList <string>(); try { lock (this) { askList = new AList <ResourceRequest>(ask.Count); foreach (ResourceRequest r in ask) { // create a copy of ResourceRequest as we might change it while the // RPC layer is using it to send info across askList.AddItem(ResourceRequest.NewInstance(r.GetPriority(), r.GetResourceName(), r.GetCapability(), r.GetNumContainers(), r.GetRelaxLocality(), r.GetNodeLabelExpression ())); } releaseList = new AList <ContainerId>(release); // optimistically clear this collection assuming no RPC failure ask.Clear(); release.Clear(); Sharpen.Collections.AddAll(blacklistToAdd, blacklistAdditions); Sharpen.Collections.AddAll(blacklistToRemove, blacklistRemovals); ResourceBlacklistRequest blacklistRequest = ResourceBlacklistRequest.NewInstance( blacklistToAdd, blacklistToRemove); allocateRequest = AllocateRequest.NewInstance(lastResponseId, progressIndicator, askList, releaseList, blacklistRequest); // clear blacklistAdditions and blacklistRemovals before // unsynchronized part blacklistAdditions.Clear(); blacklistRemovals.Clear(); } try { allocateResponse = rmClient.Allocate(allocateRequest); } catch (ApplicationMasterNotRegisteredException) { Log.Warn("ApplicationMaster is out of sync with ResourceManager," + " hence resyncing." ); lock (this) { Sharpen.Collections.AddAll(release, this.pendingRelease); Sharpen.Collections.AddAll(blacklistAdditions, this.blacklistedNodes); foreach (IDictionary <string, SortedDictionary <Resource, AMRMClientImpl.ResourceRequestInfo > > rr in remoteRequestsTable.Values) { foreach (IDictionary <Resource, AMRMClientImpl.ResourceRequestInfo> capabalities in rr.Values) { foreach (AMRMClientImpl.ResourceRequestInfo request in capabalities.Values) { AddResourceRequestToAsk(request.remoteRequest); } } } } // re register with RM RegisterApplicationMaster(); allocateResponse = Allocate(progressIndicator); return(allocateResponse); } lock (this) { // update these on successful RPC clusterNodeCount = allocateResponse.GetNumClusterNodes(); lastResponseId = allocateResponse.GetResponseId(); clusterAvailableResources = allocateResponse.GetAvailableResources(); if (!allocateResponse.GetNMTokens().IsEmpty()) { PopulateNMTokens(allocateResponse.GetNMTokens()); } if (allocateResponse.GetAMRMToken() != null) { UpdateAMRMToken(allocateResponse.GetAMRMToken()); } if (!pendingRelease.IsEmpty() && !allocateResponse.GetCompletedContainersStatuses ().IsEmpty()) { RemovePendingReleaseRequests(allocateResponse.GetCompletedContainersStatuses()); } } } finally { // TODO how to differentiate remote yarn exception vs error in rpc if (allocateResponse == null) { // we hit an exception in allocate() // preserve ask and release for next call to allocate() lock (this) { Sharpen.Collections.AddAll(release, releaseList); // requests could have been added or deleted during call to allocate // If requests were added/removed then there is nothing to do since // the ResourceRequest object in ask would have the actual new value. // If ask does not have this ResourceRequest then it was unchanged and // so we can add the value back safely. // This assumes that there will no concurrent calls to allocate() and // so we dont have to worry about ask being changed in the // synchronized block at the beginning of this method. foreach (ResourceRequest oldAsk in askList) { if (!ask.Contains(oldAsk)) { ask.AddItem(oldAsk); } } Sharpen.Collections.AddAll(blacklistAdditions, blacklistToAdd); Sharpen.Collections.AddAll(blacklistRemovals, blacklistToRemove); } } } return(allocateResponse); }