public virtual IList <Container> GetTransferredContainers(ApplicationAttemptId currentAttempt ) { ApplicationId appId = currentAttempt.GetApplicationId(); SchedulerApplication <T> app = applications[appId]; IList <Container> containerList = new AList <Container>(); RMApp appImpl = this.rmContext.GetRMApps()[appId]; if (appImpl.GetApplicationSubmissionContext().GetUnmanagedAM()) { return(containerList); } if (app == null) { return(containerList); } ICollection <RMContainer> liveContainers = app.GetCurrentAppAttempt().GetLiveContainers (); ContainerId amContainerId = rmContext.GetRMApps()[appId].GetCurrentAppAttempt().GetMasterContainer ().GetId(); foreach (RMContainer rmContainer in liveContainers) { if (!rmContainer.GetContainerId().Equals(amContainerId)) { containerList.AddItem(rmContainer.GetContainer()); } } return(containerList); }
/// <exception cref="System.Exception"/> protected internal virtual RMApp StoreApp(RMStateStore store, ApplicationId appId , long submitTime, long startTime) { ApplicationSubmissionContext context = new ApplicationSubmissionContextPBImpl(); context.SetApplicationId(appId); RMApp mockApp = Org.Mockito.Mockito.Mock <RMApp>(); Org.Mockito.Mockito.When(mockApp.GetApplicationId()).ThenReturn(appId); Org.Mockito.Mockito.When(mockApp.GetSubmitTime()).ThenReturn(submitTime); Org.Mockito.Mockito.When(mockApp.GetStartTime()).ThenReturn(startTime); Org.Mockito.Mockito.When(mockApp.GetApplicationSubmissionContext()).ThenReturn(context ); Org.Mockito.Mockito.When(mockApp.GetUser()).ThenReturn("test"); store.StoreNewApplication(mockApp); return(mockApp); }
/// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> /// <exception cref="System.IO.IOException"/> public virtual FinishApplicationMasterResponse FinishApplicationMaster(FinishApplicationMasterRequest request) { ApplicationAttemptId applicationAttemptId = AuthorizeRequest().GetApplicationAttemptId (); ApplicationId appId = applicationAttemptId.GetApplicationId(); RMApp rmApp = rmContext.GetRMApps()[applicationAttemptId.GetApplicationId()]; // checking whether the app exits in RMStateStore at first not to throw // ApplicationDoesNotExistInCacheException before and after // RM work-preserving restart. if (rmApp.IsAppFinalStateStored()) { Log.Info(rmApp.GetApplicationId() + " unregistered successfully. "); return(FinishApplicationMasterResponse.NewInstance(true)); } ApplicationMasterService.AllocateResponseLock Lock = responseMap[applicationAttemptId ]; if (Lock == null) { ThrowApplicationDoesNotExistInCacheException(applicationAttemptId); } // Allow only one thread in AM to do finishApp at a time. lock (Lock) { if (!HasApplicationMasterRegistered(applicationAttemptId)) { string message = "Application Master is trying to unregister before registering for: " + appId; Log.Error(message); RMAuditLogger.LogFailure(this.rmContext.GetRMApps()[appId].GetUser(), RMAuditLogger.AuditConstants .UnregisterAm, string.Empty, "ApplicationMasterService", message, appId, applicationAttemptId ); throw new ApplicationMasterNotRegisteredException(message); } this.amLivelinessMonitor.ReceivedPing(applicationAttemptId); rmContext.GetDispatcher().GetEventHandler().Handle(new RMAppAttemptUnregistrationEvent (applicationAttemptId, request.GetTrackingUrl(), request.GetFinalApplicationStatus (), request.GetDiagnostics())); // For UnmanagedAMs, return true so they don't retry return(FinishApplicationMasterResponse.NewInstance(rmApp.GetApplicationSubmissionContext ().GetUnmanagedAM())); } }
internal virtual void HandleNMContainerStatus(NMContainerStatus containerStatus, NodeId nodeId) { ApplicationAttemptId appAttemptId = containerStatus.GetContainerId().GetApplicationAttemptId (); RMApp rmApp = rmContext.GetRMApps()[appAttemptId.GetApplicationId()]; if (rmApp == null) { Log.Error("Received finished container : " + containerStatus.GetContainerId() + " for unknown application " + appAttemptId.GetApplicationId() + " Skipping."); return; } if (rmApp.GetApplicationSubmissionContext().GetUnmanagedAM()) { if (Log.IsDebugEnabled()) { Log.Debug("Ignoring container completion status for unmanaged AM " + rmApp.GetApplicationId ()); } return; } RMAppAttempt rmAppAttempt = rmApp.GetRMAppAttempt(appAttemptId); Container masterContainer = rmAppAttempt.GetMasterContainer(); if (masterContainer.GetId().Equals(containerStatus.GetContainerId()) && containerStatus .GetContainerState() == ContainerState.Complete) { ContainerStatus status = ContainerStatus.NewInstance(containerStatus.GetContainerId (), containerStatus.GetContainerState(), containerStatus.GetDiagnostics(), containerStatus .GetContainerExitStatus()); // sending master container finished event. RMAppAttemptContainerFinishedEvent evt = new RMAppAttemptContainerFinishedEvent(appAttemptId , status, nodeId); rmContext.GetDispatcher().GetEventHandler().Handle(evt); } }
public virtual void RecoverContainersOnNode(IList <NMContainerStatus> containerReports , RMNode nm) { lock (this) { if (!rmContext.IsWorkPreservingRecoveryEnabled() || containerReports == null || ( containerReports != null && containerReports.IsEmpty())) { return; } foreach (NMContainerStatus container in containerReports) { ApplicationId appId = container.GetContainerId().GetApplicationAttemptId().GetApplicationId (); RMApp rmApp = rmContext.GetRMApps()[appId]; if (rmApp == null) { Log.Error("Skip recovering container " + container + " for unknown application."); KillOrphanContainerOnNode(nm, container); continue; } // Unmanaged AM recovery is addressed in YARN-1815 if (rmApp.GetApplicationSubmissionContext().GetUnmanagedAM()) { Log.Info("Skip recovering container " + container + " for unmanaged AM." + rmApp. GetApplicationId()); KillOrphanContainerOnNode(nm, container); continue; } SchedulerApplication <T> schedulerApp = applications[appId]; if (schedulerApp == null) { Log.Info("Skip recovering container " + container + " for unknown SchedulerApplication. Application current state is " + rmApp.GetState()); KillOrphanContainerOnNode(nm, container); continue; } Log.Info("Recovering container " + container); SchedulerApplicationAttempt schedulerAttempt = schedulerApp.GetCurrentAppAttempt( ); if (!rmApp.GetApplicationSubmissionContext().GetKeepContainersAcrossApplicationAttempts ()) { // Do not recover containers for stopped attempt or previous attempt. if (schedulerAttempt.IsStopped() || !schedulerAttempt.GetApplicationAttemptId().Equals (container.GetContainerId().GetApplicationAttemptId())) { Log.Info("Skip recovering container " + container + " for already stopped attempt." ); KillOrphanContainerOnNode(nm, container); continue; } } // create container RMContainer rmContainer = RecoverAndCreateContainer(container, nm); // recover RMContainer rmContainer.Handle(new RMContainerRecoverEvent(container.GetContainerId(), container )); // recover scheduler node nodes[nm.GetNodeID()].RecoverContainer(rmContainer); // recover queue: update headroom etc. Queue queue = schedulerAttempt.GetQueue(); queue.RecoverContainer(clusterResource, schedulerAttempt, rmContainer); // recover scheduler attempt schedulerAttempt.RecoverContainer(rmContainer); // set master container for the current running AMContainer for this // attempt. RMAppAttempt appAttempt = rmApp.GetCurrentAppAttempt(); if (appAttempt != null) { Container masterContainer = appAttempt.GetMasterContainer(); // Mark current running AMContainer's RMContainer based on the master // container ID stored in AppAttempt. if (masterContainer != null && masterContainer.GetId().Equals(rmContainer.GetContainerId ())) { ((RMContainerImpl)rmContainer).SetAMContainer(true); } } lock (schedulerAttempt) { ICollection <ContainerId> releases = schedulerAttempt.GetPendingRelease(); if (releases.Contains(container.GetContainerId())) { // release the container rmContainer.Handle(new RMContainerFinishedEvent(container.GetContainerId(), SchedulerUtils .CreateAbnormalContainerStatus(container.GetContainerId(), SchedulerUtils.ReleasedContainer ), RMContainerEventType.Released)); releases.Remove(container.GetContainerId()); Log.Info(container.GetContainerId() + " is released by application."); } } } } }
public virtual int Run(string[] args) { Log.Info("Starting ZKRMStateStorePerf ver." + version); int numApp = ZkPerfNumAppDefault; int numAppAttemptPerApp = ZkPerfNumAppattemptPerApp; string hostPort = null; bool launchLocalZK = true; if (args.Length == 0) { System.Console.Error.WriteLine("Missing arguments."); return(-1); } for (int i = 0; i < args.Length; i++) { // parse command line if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-appsize")) { numApp = System.Convert.ToInt32(args[++i]); } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-appattemptsize")) { numAppAttemptPerApp = System.Convert.ToInt32(args[++i]); } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-hostPort")) { hostPort = args[++i]; launchLocalZK = false; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-workingZnode")) { workingZnode = args[++i]; } else { System.Console.Error.WriteLine("Illegal argument: " + args[i]); return(-1); } } } } } if (launchLocalZK) { try { SetUp(); } catch (Exception e) { System.Console.Error.WriteLine("failed to setup. : " + e.Message); return(-1); } } InitStore(hostPort); long submitTime = Runtime.CurrentTimeMillis(); long startTime = Runtime.CurrentTimeMillis() + 1234; AList <ApplicationId> applicationIds = new AList <ApplicationId>(); AList <RMApp> rmApps = new AList <RMApp>(); AList <ApplicationAttemptId> attemptIds = new AList <ApplicationAttemptId>(); Dictionary <ApplicationId, ICollection <ApplicationAttemptId> > appIdsToAttemptId = new Dictionary <ApplicationId, ICollection <ApplicationAttemptId> >(); RMStateStoreTestBase.TestDispatcher dispatcher = new RMStateStoreTestBase.TestDispatcher (); store.SetRMDispatcher(dispatcher); for (int i_1 = 0; i_1 < numApp; i_1++) { ApplicationId appId = ApplicationId.NewInstance(clusterTimeStamp, i_1); applicationIds.AddItem(appId); AList <ApplicationAttemptId> attemptIdsForThisApp = new AList <ApplicationAttemptId >(); for (int j = 0; j < numAppAttemptPerApp; j++) { ApplicationAttemptId attemptId = ApplicationAttemptId.NewInstance(appId, j); attemptIdsForThisApp.AddItem(attemptId); } appIdsToAttemptId[appId] = new LinkedHashSet(attemptIdsForThisApp); Sharpen.Collections.AddAll(attemptIds, attemptIdsForThisApp); } foreach (ApplicationId appId_1 in applicationIds) { RMApp app = null; try { app = StoreApp(store, appId_1, submitTime, startTime); } catch (Exception e) { System.Console.Error.WriteLine("failed to create Application Znode. : " + e.Message ); return(-1); } WaitNotify(dispatcher); rmApps.AddItem(app); } foreach (ApplicationAttemptId attemptId_1 in attemptIds) { Org.Apache.Hadoop.Security.Token.Token <AMRMTokenIdentifier> tokenId = GenerateAMRMToken (attemptId_1, appTokenMgr); SecretKey clientTokenKey = clientToAMTokenMgr.CreateMasterKey(attemptId_1); try { StoreAttempt(store, attemptId_1, ContainerId.NewContainerId(attemptId_1, 0L).ToString (), tokenId, clientTokenKey, dispatcher); } catch (Exception e) { System.Console.Error.WriteLine("failed to create AppAttempt Znode. : " + e.Message ); return(-1); } } long storeStart = Runtime.CurrentTimeMillis(); try { store.LoadState(); } catch (Exception e) { System.Console.Error.WriteLine("failed to locaState from ZKRMStateStore. : " + e. Message); return(-1); } long storeEnd = Runtime.CurrentTimeMillis(); long loadTime = storeEnd - storeStart; string resultMsg = "ZKRMStateStore takes " + loadTime + " msec to loadState."; Log.Info(resultMsg); System.Console.Out.WriteLine(resultMsg); // cleanup try { foreach (RMApp app in rmApps) { ApplicationStateData appState = ApplicationStateData.NewInstance(app.GetSubmitTime (), app.GetStartTime(), app.GetApplicationSubmissionContext(), app.GetUser()); ApplicationId appId = app.GetApplicationId(); IDictionary m = Org.Mockito.Mockito.Mock <IDictionary>(); Org.Mockito.Mockito.When(m.Keys).ThenReturn(appIdsToAttemptId[appId_1]); appState.attempts = m; store.RemoveApplicationStateInternal(appState); } } catch (Exception e) { System.Console.Error.WriteLine("failed to cleanup. : " + e.Message); return(-1); } return(0); }
public virtual void TestFencedState() { TestZKRMStateStore.TestZKRMStateStoreTester zkTester = new TestZKRMStateStore.TestZKRMStateStoreTester (this); RMStateStore store = zkTester.GetRMStateStore(); // Move state to FENCED from ACTIVE store.UpdateFencedState(); NUnit.Framework.Assert.AreEqual("RMStateStore should have been in fenced state", true, store.IsFencedState()); long submitTime = Runtime.CurrentTimeMillis(); long startTime = submitTime + 1000; // Add a new app RMApp mockApp = Org.Mockito.Mockito.Mock <RMApp>(); ApplicationSubmissionContext context = new ApplicationSubmissionContextPBImpl(); Org.Mockito.Mockito.When(mockApp.GetSubmitTime()).ThenReturn(submitTime); Org.Mockito.Mockito.When(mockApp.GetStartTime()).ThenReturn(startTime); Org.Mockito.Mockito.When(mockApp.GetApplicationSubmissionContext()).ThenReturn(context ); Org.Mockito.Mockito.When(mockApp.GetUser()).ThenReturn("test"); store.StoreNewApplication(mockApp); NUnit.Framework.Assert.AreEqual("RMStateStore should have been in fenced state", true, store.IsFencedState()); // Add a new attempt ClientToAMTokenSecretManagerInRM clientToAMTokenMgr = new ClientToAMTokenSecretManagerInRM (); ApplicationAttemptId attemptId = ConverterUtils.ToApplicationAttemptId("appattempt_1234567894321_0001_000001" ); SecretKey clientTokenMasterKey = clientToAMTokenMgr.CreateMasterKey(attemptId); RMAppAttemptMetrics mockRmAppAttemptMetrics = Org.Mockito.Mockito.Mock <RMAppAttemptMetrics >(); Container container = new ContainerPBImpl(); container.SetId(ConverterUtils.ToContainerId("container_1234567891234_0001_01_000001" )); RMAppAttempt mockAttempt = Org.Mockito.Mockito.Mock <RMAppAttempt>(); Org.Mockito.Mockito.When(mockAttempt.GetAppAttemptId()).ThenReturn(attemptId); Org.Mockito.Mockito.When(mockAttempt.GetMasterContainer()).ThenReturn(container); Org.Mockito.Mockito.When(mockAttempt.GetClientTokenMasterKey()).ThenReturn(clientTokenMasterKey ); Org.Mockito.Mockito.When(mockAttempt.GetRMAppAttemptMetrics()).ThenReturn(mockRmAppAttemptMetrics ); Org.Mockito.Mockito.When(mockRmAppAttemptMetrics.GetAggregateAppResourceUsage()). ThenReturn(new AggregateAppResourceUsage(0, 0)); store.StoreNewApplicationAttempt(mockAttempt); NUnit.Framework.Assert.AreEqual("RMStateStore should have been in fenced state", true, store.IsFencedState()); long finishTime = submitTime + 1000; // Update attempt ApplicationAttemptStateData newAttemptState = ApplicationAttemptStateData.NewInstance (attemptId, container, store.GetCredentialsFromAppAttempt(mockAttempt), startTime , RMAppAttemptState.Finished, "testUrl", "test", FinalApplicationStatus.Succeeded , 100, finishTime, 0, 0); store.UpdateApplicationAttemptState(newAttemptState); NUnit.Framework.Assert.AreEqual("RMStateStore should have been in fenced state", true, store.IsFencedState()); // Update app ApplicationStateData appState = ApplicationStateData.NewInstance(submitTime, startTime , context, "test"); store.UpdateApplicationState(appState); NUnit.Framework.Assert.AreEqual("RMStateStore should have been in fenced state", true, store.IsFencedState()); // Remove app store.RemoveApplication(mockApp); NUnit.Framework.Assert.AreEqual("RMStateStore should have been in fenced state", true, store.IsFencedState()); // store RM delegation token; RMDelegationTokenIdentifier dtId1 = new RMDelegationTokenIdentifier(new Text("owner1" ), new Text("renewer1"), new Text("realuser1")); long renewDate1 = Runtime.CurrentTimeMillis(); dtId1.SetSequenceNumber(1111); store.StoreRMDelegationToken(dtId1, renewDate1); NUnit.Framework.Assert.AreEqual("RMStateStore should have been in fenced state", true, store.IsFencedState()); store.UpdateRMDelegationToken(dtId1, renewDate1); NUnit.Framework.Assert.AreEqual("RMStateStore should have been in fenced state", true, store.IsFencedState()); // remove delegation key; store.RemoveRMDelegationToken(dtId1); NUnit.Framework.Assert.AreEqual("RMStateStore should have been in fenced state", true, store.IsFencedState()); // store delegation master key; DelegationKey key = new DelegationKey(1234, 4321, Sharpen.Runtime.GetBytesForString ("keyBytes")); store.StoreRMDTMasterKey(key); NUnit.Framework.Assert.AreEqual("RMStateStore should have been in fenced state", true, store.IsFencedState()); // remove delegation master key; store.RemoveRMDTMasterKey(key); NUnit.Framework.Assert.AreEqual("RMStateStore should have been in fenced state", true, store.IsFencedState()); // store or update AMRMToken; store.StoreOrUpdateAMRMTokenSecretManager(null, false); NUnit.Framework.Assert.AreEqual("RMStateStore should have been in fenced state", true, store.IsFencedState()); store.Close(); }
/// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> /// <exception cref="System.IO.IOException"/> public virtual AllocateResponse Allocate(AllocateRequest request) { AMRMTokenIdentifier amrmTokenIdentifier = AuthorizeRequest(); ApplicationAttemptId appAttemptId = amrmTokenIdentifier.GetApplicationAttemptId(); ApplicationId applicationId = appAttemptId.GetApplicationId(); this.amLivelinessMonitor.ReceivedPing(appAttemptId); /* check if its in cache */ ApplicationMasterService.AllocateResponseLock Lock = responseMap[appAttemptId]; if (Lock == null) { string message = "Application attempt " + appAttemptId + " doesn't exist in ApplicationMasterService cache."; Log.Error(message); throw new ApplicationAttemptNotFoundException(message); } lock (Lock) { AllocateResponse lastResponse = Lock.GetAllocateResponse(); if (!HasApplicationMasterRegistered(appAttemptId)) { string message = "AM is not registered for known application attempt: " + appAttemptId + " or RM had restarted after AM registered . AM should re-register."; Log.Info(message); RMAuditLogger.LogFailure(this.rmContext.GetRMApps()[appAttemptId.GetApplicationId ()].GetUser(), RMAuditLogger.AuditConstants.AmAllocate, string.Empty, "ApplicationMasterService" , message, applicationId, appAttemptId); throw new ApplicationMasterNotRegisteredException(message); } if ((request.GetResponseId() + 1) == lastResponse.GetResponseId()) { /* old heartbeat */ return(lastResponse); } else { if (request.GetResponseId() + 1 < lastResponse.GetResponseId()) { string message = "Invalid responseId in AllocateRequest from application attempt: " + appAttemptId + ", expect responseId to be " + (lastResponse.GetResponseId() + 1); throw new InvalidApplicationMasterRequestException(message); } } //filter illegal progress values float filteredProgress = request.GetProgress(); if (float.IsNaN(filteredProgress) || filteredProgress == float.NegativeInfinity || filteredProgress < 0) { request.SetProgress(0); } else { if (filteredProgress > 1 || filteredProgress == float.PositiveInfinity) { request.SetProgress(1); } } // Send the status update to the appAttempt. this.rmContext.GetDispatcher().GetEventHandler().Handle(new RMAppAttemptStatusupdateEvent (appAttemptId, request.GetProgress())); IList <ResourceRequest> ask = request.GetAskList(); IList <ContainerId> release = request.GetReleaseList(); ResourceBlacklistRequest blacklistRequest = request.GetResourceBlacklistRequest(); IList <string> blacklistAdditions = (blacklistRequest != null) ? blacklistRequest. GetBlacklistAdditions() : Sharpen.Collections.EmptyList; IList <string> blacklistRemovals = (blacklistRequest != null) ? blacklistRequest.GetBlacklistRemovals () : Sharpen.Collections.EmptyList; RMApp app = this.rmContext.GetRMApps()[applicationId]; // set label expression for Resource Requests if resourceName=ANY ApplicationSubmissionContext asc = app.GetApplicationSubmissionContext(); foreach (ResourceRequest req in ask) { if (null == req.GetNodeLabelExpression() && ResourceRequest.Any.Equals(req.GetResourceName ())) { req.SetNodeLabelExpression(asc.GetNodeLabelExpression()); } } // sanity check try { RMServerUtils.NormalizeAndValidateRequests(ask, rScheduler.GetMaximumResourceCapability (), app.GetQueue(), rScheduler, rmContext); } catch (InvalidResourceRequestException e) { Log.Warn("Invalid resource ask by application " + appAttemptId, e); throw; } try { RMServerUtils.ValidateBlacklistRequest(blacklistRequest); } catch (InvalidResourceBlacklistRequestException e) { Log.Warn("Invalid blacklist request by application " + appAttemptId, e); throw; } // In the case of work-preserving AM restart, it's possible for the // AM to release containers from the earlier attempt. if (!app.GetApplicationSubmissionContext().GetKeepContainersAcrossApplicationAttempts ()) { try { RMServerUtils.ValidateContainerReleaseRequest(release, appAttemptId); } catch (InvalidContainerReleaseException e) { Log.Warn("Invalid container release by application " + appAttemptId, e); throw; } } // Send new requests to appAttempt. Allocation allocation = this.rScheduler.Allocate(appAttemptId, ask, release, blacklistAdditions , blacklistRemovals); if (!blacklistAdditions.IsEmpty() || !blacklistRemovals.IsEmpty()) { Log.Info("blacklist are updated in Scheduler." + "blacklistAdditions: " + blacklistAdditions + ", " + "blacklistRemovals: " + blacklistRemovals); } RMAppAttempt appAttempt = app.GetRMAppAttempt(appAttemptId); AllocateResponse allocateResponse = recordFactory.NewRecordInstance <AllocateResponse >(); if (!allocation.GetContainers().IsEmpty()) { allocateResponse.SetNMTokens(allocation.GetNMTokens()); } // update the response with the deltas of node status changes IList <RMNode> updatedNodes = new AList <RMNode>(); if (app.PullRMNodeUpdates(updatedNodes) > 0) { IList <NodeReport> updatedNodeReports = new AList <NodeReport>(); foreach (RMNode rmNode in updatedNodes) { SchedulerNodeReport schedulerNodeReport = rScheduler.GetNodeReport(rmNode.GetNodeID ()); Resource used = BuilderUtils.NewResource(0, 0); int numContainers = 0; if (schedulerNodeReport != null) { used = schedulerNodeReport.GetUsedResource(); numContainers = schedulerNodeReport.GetNumContainers(); } NodeId nodeId = rmNode.GetNodeID(); NodeReport report = BuilderUtils.NewNodeReport(nodeId, rmNode.GetState(), rmNode. GetHttpAddress(), rmNode.GetRackName(), used, rmNode.GetTotalCapability(), numContainers , rmNode.GetHealthReport(), rmNode.GetLastHealthReportTime(), rmNode.GetNodeLabels ()); updatedNodeReports.AddItem(report); } allocateResponse.SetUpdatedNodes(updatedNodeReports); } allocateResponse.SetAllocatedContainers(allocation.GetContainers()); allocateResponse.SetCompletedContainersStatuses(appAttempt.PullJustFinishedContainers ()); allocateResponse.SetResponseId(lastResponse.GetResponseId() + 1); allocateResponse.SetAvailableResources(allocation.GetResourceLimit()); allocateResponse.SetNumClusterNodes(this.rScheduler.GetNumClusterNodes()); // add preemption to the allocateResponse message (if any) allocateResponse.SetPreemptionMessage(GeneratePreemptionMessage(allocation)); // update AMRMToken if the token is rolled-up MasterKeyData nextMasterKey = this.rmContext.GetAMRMTokenSecretManager().GetNextMasterKeyData (); if (nextMasterKey != null && nextMasterKey.GetMasterKey().GetKeyId() != amrmTokenIdentifier .GetKeyId()) { RMAppAttemptImpl appAttemptImpl = (RMAppAttemptImpl)appAttempt; Org.Apache.Hadoop.Security.Token.Token <AMRMTokenIdentifier> amrmToken = appAttempt .GetAMRMToken(); if (nextMasterKey.GetMasterKey().GetKeyId() != appAttemptImpl.GetAMRMTokenKeyId()) { Log.Info("The AMRMToken has been rolled-over. Send new AMRMToken back" + " to application: " + applicationId); amrmToken = rmContext.GetAMRMTokenSecretManager().CreateAndGetAMRMToken(appAttemptId ); appAttemptImpl.SetAMRMToken(amrmToken); } allocateResponse.SetAMRMToken(Org.Apache.Hadoop.Yarn.Api.Records.Token.NewInstance (amrmToken.GetIdentifier(), amrmToken.GetKind().ToString(), amrmToken.GetPassword (), amrmToken.GetService().ToString())); } /* * As we are updating the response inside the lock object so we don't * need to worry about unregister call occurring in between (which * removes the lock object). */ Lock.SetAllocateResponse(allocateResponse); return(allocateResponse); } }
/// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> /// <exception cref="System.IO.IOException"/> public virtual RegisterApplicationMasterResponse RegisterApplicationMaster(RegisterApplicationMasterRequest request) { AMRMTokenIdentifier amrmTokenIdentifier = AuthorizeRequest(); ApplicationAttemptId applicationAttemptId = amrmTokenIdentifier.GetApplicationAttemptId (); ApplicationId appID = applicationAttemptId.GetApplicationId(); ApplicationMasterService.AllocateResponseLock Lock = responseMap[applicationAttemptId ]; if (Lock == null) { RMAuditLogger.LogFailure(this.rmContext.GetRMApps()[appID].GetUser(), RMAuditLogger.AuditConstants .RegisterAm, "Application doesn't exist in cache " + applicationAttemptId, "ApplicationMasterService" , "Error in registering application master", appID, applicationAttemptId); ThrowApplicationDoesNotExistInCacheException(applicationAttemptId); } // Allow only one thread in AM to do registerApp at a time. lock (Lock) { AllocateResponse lastResponse = Lock.GetAllocateResponse(); if (HasApplicationMasterRegistered(applicationAttemptId)) { string message = "Application Master is already registered : " + appID; Log.Warn(message); RMAuditLogger.LogFailure(this.rmContext.GetRMApps()[appID].GetUser(), RMAuditLogger.AuditConstants .RegisterAm, string.Empty, "ApplicationMasterService", message, appID, applicationAttemptId ); throw new InvalidApplicationMasterRequestException(message); } this.amLivelinessMonitor.ReceivedPing(applicationAttemptId); RMApp app = this.rmContext.GetRMApps()[appID]; // Setting the response id to 0 to identify if the // application master is register for the respective attemptid lastResponse.SetResponseId(0); Lock.SetAllocateResponse(lastResponse); Log.Info("AM registration " + applicationAttemptId); this.rmContext.GetDispatcher().GetEventHandler().Handle(new RMAppAttemptRegistrationEvent (applicationAttemptId, request.GetHost(), request.GetRpcPort(), request.GetTrackingUrl ())); RMAuditLogger.LogSuccess(app.GetUser(), RMAuditLogger.AuditConstants.RegisterAm, "ApplicationMasterService", appID, applicationAttemptId); // Pick up min/max resource from scheduler... RegisterApplicationMasterResponse response = recordFactory.NewRecordInstance <RegisterApplicationMasterResponse >(); response.SetMaximumResourceCapability(rScheduler.GetMaximumResourceCapability(app .GetQueue())); response.SetApplicationACLs(app.GetRMAppAttempt(applicationAttemptId).GetSubmissionContext ().GetAMContainerSpec().GetApplicationACLs()); response.SetQueue(app.GetQueue()); if (UserGroupInformation.IsSecurityEnabled()) { Log.Info("Setting client token master key"); response.SetClientToAMTokenMasterKey(ByteBuffer.Wrap(rmContext.GetClientToAMTokenSecretManager ().GetMasterKey(applicationAttemptId).GetEncoded())); } // For work-preserving AM restart, retrieve previous attempts' containers // and corresponding NM tokens. if (app.GetApplicationSubmissionContext().GetKeepContainersAcrossApplicationAttempts ()) { IList <Container> transferredContainers = ((AbstractYarnScheduler)rScheduler).GetTransferredContainers (applicationAttemptId); if (!transferredContainers.IsEmpty()) { response.SetContainersFromPreviousAttempts(transferredContainers); IList <NMToken> nmTokens = new AList <NMToken>(); foreach (Container container in transferredContainers) { try { NMToken token = rmContext.GetNMTokenSecretManager().CreateAndGetNMToken(app.GetUser (), applicationAttemptId, container); if (null != token) { nmTokens.AddItem(token); } } catch (ArgumentException e) { // if it's a DNS issue, throw UnknowHostException directly and // that // will be automatically retried by RMProxy in RPC layer. if (e.InnerException is UnknownHostException) { throw (UnknownHostException)e.InnerException; } } } response.SetNMTokensFromPreviousAttempts(nmTokens); Log.Info("Application " + appID + " retrieved " + transferredContainers.Count + " containers from previous" + " attempts and " + nmTokens.Count + " NM tokens."); } } response.SetSchedulerResourceTypes(rScheduler.GetSchedulingResourceTypes()); return(response); } }
/// <exception cref="System.Exception"/> internal virtual void TestRMAppStateStore(RMStateStoreTestBase.RMStateStoreHelper stateStoreHelper, RMStateStoreTestBase.StoreStateVerifier verifier) { long submitTime = Runtime.CurrentTimeMillis(); long startTime = Runtime.CurrentTimeMillis() + 1234; Configuration conf = new YarnConfiguration(); RMStateStore store = stateStoreHelper.GetRMStateStore(); RMStateStoreTestBase.TestDispatcher dispatcher = new RMStateStoreTestBase.TestDispatcher (); store.SetRMDispatcher(dispatcher); RMContext rmContext = Org.Mockito.Mockito.Mock <RMContext>(); Org.Mockito.Mockito.When(rmContext.GetStateStore()).ThenReturn(store); AMRMTokenSecretManager appTokenMgr = Org.Mockito.Mockito.Spy(new AMRMTokenSecretManager (conf, rmContext)); MasterKeyData masterKeyData = appTokenMgr.CreateNewMasterKey(); Org.Mockito.Mockito.When(appTokenMgr.GetMasterKey()).ThenReturn(masterKeyData); ClientToAMTokenSecretManagerInRM clientToAMTokenMgr = new ClientToAMTokenSecretManagerInRM (); ApplicationAttemptId attemptId1 = ConverterUtils.ToApplicationAttemptId("appattempt_1352994193343_0001_000001" ); ApplicationId appId1 = attemptId1.GetApplicationId(); StoreApp(store, appId1, submitTime, startTime); verifier.AfterStoreApp(store, appId1); // create application token and client token key for attempt1 Org.Apache.Hadoop.Security.Token.Token <AMRMTokenIdentifier> appAttemptToken1 = GenerateAMRMToken (attemptId1, appTokenMgr); SecretKey clientTokenKey1 = clientToAMTokenMgr.CreateMasterKey(attemptId1); ContainerId containerId1 = StoreAttempt(store, attemptId1, "container_1352994193343_0001_01_000001" , appAttemptToken1, clientTokenKey1, dispatcher); string appAttemptIdStr2 = "appattempt_1352994193343_0001_000002"; ApplicationAttemptId attemptId2 = ConverterUtils.ToApplicationAttemptId(appAttemptIdStr2 ); // create application token and client token key for attempt2 Org.Apache.Hadoop.Security.Token.Token <AMRMTokenIdentifier> appAttemptToken2 = GenerateAMRMToken (attemptId2, appTokenMgr); SecretKey clientTokenKey2 = clientToAMTokenMgr.CreateMasterKey(attemptId2); ContainerId containerId2 = StoreAttempt(store, attemptId2, "container_1352994193343_0001_02_000001" , appAttemptToken2, clientTokenKey2, dispatcher); ApplicationAttemptId attemptIdRemoved = ConverterUtils.ToApplicationAttemptId("appattempt_1352994193343_0002_000001" ); ApplicationId appIdRemoved = attemptIdRemoved.GetApplicationId(); StoreApp(store, appIdRemoved, submitTime, startTime); StoreAttempt(store, attemptIdRemoved, "container_1352994193343_0002_01_000001", null , null, dispatcher); verifier.AfterStoreAppAttempt(store, attemptIdRemoved); RMApp mockRemovedApp = Org.Mockito.Mockito.Mock <RMApp>(); RMAppAttemptMetrics mockRmAppAttemptMetrics = Org.Mockito.Mockito.Mock <RMAppAttemptMetrics >(); Dictionary <ApplicationAttemptId, RMAppAttempt> attempts = new Dictionary <ApplicationAttemptId , RMAppAttempt>(); ApplicationSubmissionContext context = new ApplicationSubmissionContextPBImpl(); context.SetApplicationId(appIdRemoved); Org.Mockito.Mockito.When(mockRemovedApp.GetSubmitTime()).ThenReturn(submitTime); Org.Mockito.Mockito.When(mockRemovedApp.GetApplicationSubmissionContext()).ThenReturn (context); Org.Mockito.Mockito.When(mockRemovedApp.GetAppAttempts()).ThenReturn(attempts); Org.Mockito.Mockito.When(mockRemovedApp.GetUser()).ThenReturn("user1"); RMAppAttempt mockRemovedAttempt = Org.Mockito.Mockito.Mock <RMAppAttempt>(); Org.Mockito.Mockito.When(mockRemovedAttempt.GetAppAttemptId()).ThenReturn(attemptIdRemoved ); Org.Mockito.Mockito.When(mockRemovedAttempt.GetRMAppAttemptMetrics()).ThenReturn( mockRmAppAttemptMetrics); Org.Mockito.Mockito.When(mockRmAppAttemptMetrics.GetAggregateAppResourceUsage()). ThenReturn(new AggregateAppResourceUsage(0, 0)); attempts[attemptIdRemoved] = mockRemovedAttempt; store.RemoveApplication(mockRemovedApp); // remove application directory recursively. StoreApp(store, appIdRemoved, submitTime, startTime); StoreAttempt(store, attemptIdRemoved, "container_1352994193343_0002_01_000001", null , null, dispatcher); store.RemoveApplication(mockRemovedApp); // let things settle down Sharpen.Thread.Sleep(1000); store.Close(); // give tester a chance to modify app state in the store ModifyAppState(); // load state store = stateStoreHelper.GetRMStateStore(); store.SetRMDispatcher(dispatcher); RMStateStore.RMState state = store.LoadState(); IDictionary <ApplicationId, ApplicationStateData> rmAppState = state.GetApplicationState (); ApplicationStateData appState = rmAppState[appId1]; // app is loaded NUnit.Framework.Assert.IsNotNull(appState); // app is loaded correctly NUnit.Framework.Assert.AreEqual(submitTime, appState.GetSubmitTime()); NUnit.Framework.Assert.AreEqual(startTime, appState.GetStartTime()); // submission context is loaded correctly NUnit.Framework.Assert.AreEqual(appId1, appState.GetApplicationSubmissionContext( ).GetApplicationId()); ApplicationAttemptStateData attemptState = appState.GetAttempt(attemptId1); // attempt1 is loaded correctly NUnit.Framework.Assert.IsNotNull(attemptState); NUnit.Framework.Assert.AreEqual(attemptId1, attemptState.GetAttemptId()); NUnit.Framework.Assert.AreEqual(-1000, attemptState.GetAMContainerExitStatus()); // attempt1 container is loaded correctly NUnit.Framework.Assert.AreEqual(containerId1, attemptState.GetMasterContainer().GetId ()); // attempt1 client token master key is loaded correctly Assert.AssertArrayEquals(clientTokenKey1.GetEncoded(), attemptState.GetAppAttemptTokens ().GetSecretKey(RMStateStore.AmClientTokenMasterKeyName)); attemptState = appState.GetAttempt(attemptId2); // attempt2 is loaded correctly NUnit.Framework.Assert.IsNotNull(attemptState); NUnit.Framework.Assert.AreEqual(attemptId2, attemptState.GetAttemptId()); // attempt2 container is loaded correctly NUnit.Framework.Assert.AreEqual(containerId2, attemptState.GetMasterContainer().GetId ()); // attempt2 client token master key is loaded correctly Assert.AssertArrayEquals(clientTokenKey2.GetEncoded(), attemptState.GetAppAttemptTokens ().GetSecretKey(RMStateStore.AmClientTokenMasterKeyName)); //******* update application/attempt state *******// ApplicationStateData appState2 = ApplicationStateData.NewInstance(appState.GetSubmitTime (), appState.GetStartTime(), appState.GetUser(), appState.GetApplicationSubmissionContext (), RMAppState.Finished, "appDiagnostics", 1234); appState2.attempts.PutAll(appState.attempts); store.UpdateApplicationState(appState2); ApplicationAttemptStateData oldAttemptState = attemptState; ApplicationAttemptStateData newAttemptState = ApplicationAttemptStateData.NewInstance (oldAttemptState.GetAttemptId(), oldAttemptState.GetMasterContainer(), oldAttemptState .GetAppAttemptTokens(), oldAttemptState.GetStartTime(), RMAppAttemptState.Finished , "myTrackingUrl", "attemptDiagnostics", FinalApplicationStatus.Succeeded, 100, oldAttemptState.GetFinishTime(), 0, 0); store.UpdateApplicationAttemptState(newAttemptState); // test updating the state of an app/attempt whose initial state was not // saved. ApplicationId dummyAppId = ApplicationId.NewInstance(1234, 10); ApplicationSubmissionContext dummyContext = new ApplicationSubmissionContextPBImpl (); dummyContext.SetApplicationId(dummyAppId); ApplicationStateData dummyApp = ApplicationStateData.NewInstance(appState.GetSubmitTime (), appState.GetStartTime(), appState.GetUser(), dummyContext, RMAppState.Finished , "appDiagnostics", 1234); store.UpdateApplicationState(dummyApp); ApplicationAttemptId dummyAttemptId = ApplicationAttemptId.NewInstance(dummyAppId , 6); ApplicationAttemptStateData dummyAttempt = ApplicationAttemptStateData.NewInstance (dummyAttemptId, oldAttemptState.GetMasterContainer(), oldAttemptState.GetAppAttemptTokens (), oldAttemptState.GetStartTime(), RMAppAttemptState.Finished, "myTrackingUrl", "attemptDiagnostics", FinalApplicationStatus.Succeeded, 111, oldAttemptState.GetFinishTime (), 0, 0); store.UpdateApplicationAttemptState(dummyAttempt); // let things settle down Sharpen.Thread.Sleep(1000); store.Close(); // check updated application state. store = stateStoreHelper.GetRMStateStore(); store.SetRMDispatcher(dispatcher); RMStateStore.RMState newRMState = store.LoadState(); IDictionary <ApplicationId, ApplicationStateData> newRMAppState = newRMState.GetApplicationState (); NUnit.Framework.Assert.IsNotNull(newRMAppState[dummyApp.GetApplicationSubmissionContext ().GetApplicationId()]); ApplicationStateData updatedAppState = newRMAppState[appId1]; NUnit.Framework.Assert.AreEqual(appState.GetApplicationSubmissionContext().GetApplicationId (), updatedAppState.GetApplicationSubmissionContext().GetApplicationId()); NUnit.Framework.Assert.AreEqual(appState.GetSubmitTime(), updatedAppState.GetSubmitTime ()); NUnit.Framework.Assert.AreEqual(appState.GetStartTime(), updatedAppState.GetStartTime ()); NUnit.Framework.Assert.AreEqual(appState.GetUser(), updatedAppState.GetUser()); // new app state fields NUnit.Framework.Assert.AreEqual(RMAppState.Finished, updatedAppState.GetState()); NUnit.Framework.Assert.AreEqual("appDiagnostics", updatedAppState.GetDiagnostics( )); NUnit.Framework.Assert.AreEqual(1234, updatedAppState.GetFinishTime()); // check updated attempt state NUnit.Framework.Assert.IsNotNull(newRMAppState[dummyApp.GetApplicationSubmissionContext ().GetApplicationId()].GetAttempt(dummyAttemptId)); ApplicationAttemptStateData updatedAttemptState = updatedAppState.GetAttempt(newAttemptState .GetAttemptId()); NUnit.Framework.Assert.AreEqual(oldAttemptState.GetAttemptId(), updatedAttemptState .GetAttemptId()); NUnit.Framework.Assert.AreEqual(containerId2, updatedAttemptState.GetMasterContainer ().GetId()); Assert.AssertArrayEquals(clientTokenKey2.GetEncoded(), attemptState.GetAppAttemptTokens ().GetSecretKey(RMStateStore.AmClientTokenMasterKeyName)); // new attempt state fields NUnit.Framework.Assert.AreEqual(RMAppAttemptState.Finished, updatedAttemptState.GetState ()); NUnit.Framework.Assert.AreEqual("myTrackingUrl", updatedAttemptState.GetFinalTrackingUrl ()); NUnit.Framework.Assert.AreEqual("attemptDiagnostics", updatedAttemptState.GetDiagnostics ()); NUnit.Framework.Assert.AreEqual(100, updatedAttemptState.GetAMContainerExitStatus ()); NUnit.Framework.Assert.AreEqual(FinalApplicationStatus.Succeeded, updatedAttemptState .GetFinalApplicationStatus()); // assert store is in expected state after everything is cleaned NUnit.Framework.Assert.IsTrue(stateStoreHelper.IsFinalStateValid()); store.Close(); }