protected override void GenerateApplicationTable(HtmlBlock.Block html, UserGroupInformation callerUGI, ICollection <ApplicationAttemptReport> attempts) { // Application Attempt Table Hamlet.TBODY <Hamlet.TABLE <Org.Apache.Hadoop.Yarn.Webapp.Hamlet.Hamlet> > tbody = html .Table("#attempts").Thead().Tr().Th(".id", "Attempt ID").Th(".started", "Started" ).Th(".node", "Node").Th(".logs", "Logs").Th(".blacklistednodes", "Blacklisted Nodes" ).().().Tbody(); RMApp rmApp = this.rm.GetRMContext().GetRMApps()[this.appID]; if (rmApp == null) { return; } StringBuilder attemptsTableData = new StringBuilder("[\n"); foreach (ApplicationAttemptReport appAttemptReport in attempts) { RMAppAttempt rmAppAttempt = rmApp.GetRMAppAttempt(appAttemptReport.GetApplicationAttemptId ()); if (rmAppAttempt == null) { continue; } AppAttemptInfo attemptInfo = new AppAttemptInfo(this.rm, rmAppAttempt, rmApp.GetUser (), WebAppUtils.GetHttpSchemePrefix(conf)); string blacklistedNodesCount = "N/A"; ICollection <string> nodes = RMAppAttemptBlock.GetBlacklistedNodes(rm, rmAppAttempt .GetAppAttemptId()); if (nodes != null) { blacklistedNodesCount = nodes.Count.ToString(); } string nodeLink = attemptInfo.GetNodeHttpAddress(); if (nodeLink != null) { nodeLink = WebAppUtils.GetHttpSchemePrefix(conf) + nodeLink; } string logsLink = attemptInfo.GetLogsLink(); attemptsTableData.Append("[\"<a href='").Append(Url("appattempt", rmAppAttempt.GetAppAttemptId ().ToString())).Append("'>").Append(rmAppAttempt.GetAppAttemptId().ToString()).Append ("</a>\",\"").Append(attemptInfo.GetStartTime()).Append("\",\"<a ").Append(nodeLink == null ? "#" : "href='" + nodeLink).Append("'>").Append(nodeLink == null ? "N/A" : StringEscapeUtils.EscapeJavaScript(StringEscapeUtils.EscapeHtml(nodeLink))).Append ("</a>\",\"<a ").Append(logsLink == null ? "#" : "href='" + logsLink).Append("'>" ).Append(logsLink == null ? "N/A" : "Logs").Append("</a>\",").Append("\"").Append (blacklistedNodesCount).Append("\"],\n"); } if (attemptsTableData[attemptsTableData.Length - 2] == ',') { attemptsTableData.Delete(attemptsTableData.Length - 2, attemptsTableData.Length - 1); } attemptsTableData.Append("]"); html.Script().$type("text/javascript").("var attemptsTableData=" + attemptsTableData ).(); tbody.().(); }
public virtual void TestAMLaunchAndCleanup() { Logger rootLogger = LogManager.GetRootLogger(); rootLogger.SetLevel(Level.Debug); TestApplicationMasterLauncher.MyContainerManagerImpl containerManager = new TestApplicationMasterLauncher.MyContainerManagerImpl (); MockRMWithCustomAMLauncher rm = new MockRMWithCustomAMLauncher(containerManager); rm.Start(); MockNM nm1 = rm.RegisterNode("127.0.0.1:1234", 5120); RMApp app = rm.SubmitApp(2000); // kick the scheduling nm1.NodeHeartbeat(true); int waitCount = 0; while (containerManager.launched == false && waitCount++ < 20) { Log.Info("Waiting for AM Launch to happen.."); Sharpen.Thread.Sleep(1000); } NUnit.Framework.Assert.IsTrue(containerManager.launched); RMAppAttempt attempt = app.GetCurrentAppAttempt(); ApplicationAttemptId appAttemptId = attempt.GetAppAttemptId(); NUnit.Framework.Assert.AreEqual(appAttemptId.ToString(), containerManager.attemptIdAtContainerManager ); NUnit.Framework.Assert.AreEqual(app.GetSubmitTime(), containerManager.submitTimeAtContainerManager ); NUnit.Framework.Assert.AreEqual(app.GetRMAppAttempt(appAttemptId).GetMasterContainer ().GetId().ToString(), containerManager.containerIdAtContainerManager); NUnit.Framework.Assert.AreEqual(nm1.GetNodeId().ToString(), containerManager.nmHostAtContainerManager ); NUnit.Framework.Assert.AreEqual(YarnConfiguration.DefaultRmAmMaxAttempts, containerManager .maxAppAttempts); MockAM am = new MockAM(rm.GetRMContext(), rm.GetApplicationMasterService(), appAttemptId ); am.RegisterAppAttempt(); am.UnregisterAppAttempt(); //complete the AM container to finish the app normally nm1.NodeHeartbeat(attempt.GetAppAttemptId(), 1, ContainerState.Complete); am.WaitForState(RMAppAttemptState.Finished); waitCount = 0; while (containerManager.cleanedup == false && waitCount++ < 20) { Log.Info("Waiting for AM Cleanup to happen.."); Sharpen.Thread.Sleep(1000); } NUnit.Framework.Assert.IsTrue(containerManager.cleanedup); am.WaitForState(RMAppAttemptState.Finished); rm.Stop(); }
/// <exception cref="System.Exception"/> public virtual void WaitForState(RMAppAttemptState finalState) { RMApp app = context.GetRMApps()[attemptId.GetApplicationId()]; RMAppAttempt attempt = app.GetRMAppAttempt(attemptId); int timeoutSecs = 0; while (!finalState.Equals(attempt.GetAppAttemptState()) && timeoutSecs++ < 40) { System.Console.Out.WriteLine("AppAttempt : " + attemptId + " State is : " + attempt .GetAppAttemptState() + " Waiting for state : " + finalState); Sharpen.Thread.Sleep(1000); } System.Console.Out.WriteLine("AppAttempt State is : " + attempt.GetAppAttemptState ()); NUnit.Framework.Assert.AreEqual("AppAttempt state is not correct (timedout)", finalState , attempt.GetAppAttemptState()); }
public virtual void SetUp() { CapacityScheduler spyCs = new CapacityScheduler(); cs = Org.Mockito.Mockito.Spy(spyCs); scheduler = cs; rmContext = TestUtils.GetMockRMContext(); spyRMContext = Org.Mockito.Mockito.Spy(rmContext); ConcurrentMap <ApplicationId, RMApp> spyApps = Org.Mockito.Mockito.Spy(new ConcurrentHashMap <ApplicationId, RMApp>()); RMApp rmApp = Org.Mockito.Mockito.Mock <RMApp>(); Org.Mockito.Mockito.When(rmApp.GetRMAppAttempt((ApplicationAttemptId)Matchers.Any ())).ThenReturn(null); Org.Mockito.Mockito.DoReturn(rmApp).When(spyApps)[(ApplicationId)Matchers.Any()]; Org.Mockito.Mockito.When(spyRMContext.GetRMApps()).ThenReturn(spyApps); Org.Mockito.Mockito.When(spyRMContext.GetScheduler()).ThenReturn(scheduler); CapacitySchedulerConfiguration csConf = new CapacitySchedulerConfiguration(); ReservationSystemTestUtil.SetupQueueConfiguration(csConf); cs.SetConf(csConf); csContext = Org.Mockito.Mockito.Mock <CapacitySchedulerContext>(); Org.Mockito.Mockito.When(csContext.GetConfiguration()).ThenReturn(csConf); Org.Mockito.Mockito.When(csContext.GetConf()).ThenReturn(csConf); Org.Mockito.Mockito.When(csContext.GetMinimumResourceCapability()).ThenReturn(minAlloc ); Org.Mockito.Mockito.When(csContext.GetMaximumResourceCapability()).ThenReturn(maxAlloc ); Org.Mockito.Mockito.When(csContext.GetClusterResource()).ThenReturn(Resources.CreateResource (100 * 16 * Gb, 100 * 32)); Org.Mockito.Mockito.When(scheduler.GetClusterResource()).ThenReturn(Resources.CreateResource (125 * Gb, 125)); Org.Mockito.Mockito.When(csContext.GetResourceCalculator()).ThenReturn(new DefaultResourceCalculator ()); RMContainerTokenSecretManager containerTokenSecretManager = new RMContainerTokenSecretManager (csConf); containerTokenSecretManager.RollMasterKey(); Org.Mockito.Mockito.When(csContext.GetContainerTokenSecretManager()).ThenReturn(containerTokenSecretManager ); cs.SetRMContext(spyRMContext); cs.Init(csConf); cs.Start(); SetupPlanFollower(); }
public virtual void SetUp() { conf = CreateConfiguration(); ReservationSystemTestUtil.SetupFSAllocationFile(AllocFile); ReservationSystemTestUtil testUtil = new ReservationSystemTestUtil(); // Setup rmContext = TestUtils.GetMockRMContext(); spyRMContext = Org.Mockito.Mockito.Spy(rmContext); fs = ReservationSystemTestUtil.SetupFairScheduler(testUtil, spyRMContext, conf, 125 ); scheduler = fs; ConcurrentMap <ApplicationId, RMApp> spyApps = Org.Mockito.Mockito.Spy(new ConcurrentHashMap <ApplicationId, RMApp>()); RMApp rmApp = Org.Mockito.Mockito.Mock <RMApp>(); Org.Mockito.Mockito.When(rmApp.GetRMAppAttempt((ApplicationAttemptId)Matchers.Any ())).ThenReturn(null); Org.Mockito.Mockito.DoReturn(rmApp).When(spyApps)[(ApplicationId)Matchers.Any()]; Org.Mockito.Mockito.When(spyRMContext.GetRMApps()).ThenReturn(spyApps); ReservationSystemTestUtil.SetupFSAllocationFile(AllocFile); SetupPlanFollower(); }
internal virtual void HandleNMContainerStatus(NMContainerStatus containerStatus, NodeId nodeId) { ApplicationAttemptId appAttemptId = containerStatus.GetContainerId().GetApplicationAttemptId (); RMApp rmApp = rmContext.GetRMApps()[appAttemptId.GetApplicationId()]; if (rmApp == null) { Log.Error("Received finished container : " + containerStatus.GetContainerId() + " for unknown application " + appAttemptId.GetApplicationId() + " Skipping."); return; } if (rmApp.GetApplicationSubmissionContext().GetUnmanagedAM()) { if (Log.IsDebugEnabled()) { Log.Debug("Ignoring container completion status for unmanaged AM " + rmApp.GetApplicationId ()); } return; } RMAppAttempt rmAppAttempt = rmApp.GetRMAppAttempt(appAttemptId); Container masterContainer = rmAppAttempt.GetMasterContainer(); if (masterContainer.GetId().Equals(containerStatus.GetContainerId()) && containerStatus .GetContainerState() == ContainerState.Complete) { ContainerStatus status = ContainerStatus.NewInstance(containerStatus.GetContainerId (), containerStatus.GetContainerState(), containerStatus.GetDiagnostics(), containerStatus .GetContainerExitStatus()); // sending master container finished event. RMAppAttemptContainerFinishedEvent evt = new RMAppAttemptContainerFinishedEvent(appAttemptId , status, nodeId); rmContext.GetDispatcher().GetEventHandler().Handle(evt); } }
/// <exception cref="System.Exception"/> public virtual void TestAMRestartWithExistingContainers() { YarnConfiguration conf = new YarnConfiguration(); conf.SetInt(YarnConfiguration.RmAmMaxAttempts, 2); MockRM rm1 = new MockRM(conf); rm1.Start(); RMApp app1 = rm1.SubmitApp(200, "name", "user", new Dictionary <ApplicationAccessType , string>(), false, "default", -1, null, "MAPREDUCE", false, true); MockNM nm1 = new MockNM("127.0.0.1:1234", 10240, rm1.GetResourceTrackerService()); nm1.RegisterNode(); MockNM nm2 = new MockNM("127.0.0.1:2351", 4089, rm1.GetResourceTrackerService()); nm2.RegisterNode(); MockAM am1 = MockRM.LaunchAndRegisterAM(app1, rm1, nm1); int NumContainers = 3; // allocate NUM_CONTAINERS containers am1.Allocate("127.0.0.1", 1024, NumContainers, new AList <ContainerId>()); nm1.NodeHeartbeat(true); // wait for containers to be allocated. IList <Container> containers = am1.Allocate(new AList <ResourceRequest>(), new AList <ContainerId>()).GetAllocatedContainers(); while (containers.Count != NumContainers) { nm1.NodeHeartbeat(true); Sharpen.Collections.AddAll(containers, am1.Allocate(new AList <ResourceRequest>(), new AList <ContainerId>()).GetAllocatedContainers()); Sharpen.Thread.Sleep(200); } // launch the 2nd container, for testing running container transferred. nm1.NodeHeartbeat(am1.GetApplicationAttemptId(), 2, ContainerState.Running); ContainerId containerId2 = ContainerId.NewContainerId(am1.GetApplicationAttemptId (), 2); rm1.WaitForState(nm1, containerId2, RMContainerState.Running); // launch the 3rd container, for testing container allocated by previous // attempt is completed by the next new attempt/ nm1.NodeHeartbeat(am1.GetApplicationAttemptId(), 3, ContainerState.Running); ContainerId containerId3 = ContainerId.NewContainerId(am1.GetApplicationAttemptId (), 3); rm1.WaitForState(nm1, containerId3, RMContainerState.Running); // 4th container still in AQUIRED state. for testing Acquired container is // always killed. ContainerId containerId4 = ContainerId.NewContainerId(am1.GetApplicationAttemptId (), 4); rm1.WaitForState(nm1, containerId4, RMContainerState.Acquired); // 5th container is in Allocated state. for testing allocated container is // always killed. am1.Allocate("127.0.0.1", 1024, 1, new AList <ContainerId>()); nm1.NodeHeartbeat(true); ContainerId containerId5 = ContainerId.NewContainerId(am1.GetApplicationAttemptId (), 5); rm1.WaitForContainerAllocated(nm1, containerId5); rm1.WaitForState(nm1, containerId5, RMContainerState.Allocated); // 6th container is in Reserved state. am1.Allocate("127.0.0.1", 6000, 1, new AList <ContainerId>()); ContainerId containerId6 = ContainerId.NewContainerId(am1.GetApplicationAttemptId (), 6); nm1.NodeHeartbeat(true); SchedulerApplicationAttempt schedulerAttempt = ((AbstractYarnScheduler)rm1.GetResourceScheduler ()).GetCurrentAttemptForContainer(containerId6); while (schedulerAttempt.GetReservedContainers().IsEmpty()) { System.Console.Out.WriteLine("Waiting for container " + containerId6 + " to be reserved." ); nm1.NodeHeartbeat(true); Sharpen.Thread.Sleep(200); } // assert containerId6 is reserved. NUnit.Framework.Assert.AreEqual(containerId6, schedulerAttempt.GetReservedContainers ()[0].GetContainerId()); // fail the AM by sending CONTAINER_FINISHED event without registering. nm1.NodeHeartbeat(am1.GetApplicationAttemptId(), 1, ContainerState.Complete); am1.WaitForState(RMAppAttemptState.Failed); // wait for some time. previous AM's running containers should still remain // in scheduler even though am failed Sharpen.Thread.Sleep(3000); rm1.WaitForState(nm1, containerId2, RMContainerState.Running); // acquired/allocated containers are cleaned up. NUnit.Framework.Assert.IsNull(rm1.GetResourceScheduler().GetRMContainer(containerId4 )); NUnit.Framework.Assert.IsNull(rm1.GetResourceScheduler().GetRMContainer(containerId5 )); // wait for app to start a new attempt. rm1.WaitForState(app1.GetApplicationId(), RMAppState.Accepted); // assert this is a new AM. ApplicationAttemptId newAttemptId = app1.GetCurrentAppAttempt().GetAppAttemptId(); NUnit.Framework.Assert.IsFalse(newAttemptId.Equals(am1.GetApplicationAttemptId()) ); // launch the new AM RMAppAttempt attempt2 = app1.GetCurrentAppAttempt(); nm1.NodeHeartbeat(true); MockAM am2 = rm1.SendAMLaunched(attempt2.GetAppAttemptId()); RegisterApplicationMasterResponse registerResponse = am2.RegisterAppAttempt(); // Assert two containers are running: container2 and container3; NUnit.Framework.Assert.AreEqual(2, registerResponse.GetContainersFromPreviousAttempts ().Count); bool containerId2Exists = false; bool containerId3Exists = false; foreach (Container container in registerResponse.GetContainersFromPreviousAttempts ()) { if (container.GetId().Equals(containerId2)) { containerId2Exists = true; } if (container.GetId().Equals(containerId3)) { containerId3Exists = true; } } NUnit.Framework.Assert.IsTrue(containerId2Exists && containerId3Exists); rm1.WaitForState(app1.GetApplicationId(), RMAppState.Running); // complete container by sending the container complete event which has earlier // attempt's attemptId nm1.NodeHeartbeat(am1.GetApplicationAttemptId(), 3, ContainerState.Complete); // Even though the completed container containerId3 event was sent to the // earlier failed attempt, new RMAppAttempt can also capture this container // info. // completed containerId4 is also transferred to the new attempt. RMAppAttempt newAttempt = app1.GetRMAppAttempt(am2.GetApplicationAttemptId()); // 4 containers finished, acquired/allocated/reserved/completed. WaitForContainersToFinish(4, newAttempt); bool container3Exists = false; bool container4Exists = false; bool container5Exists = false; bool container6Exists = false; foreach (ContainerStatus status in newAttempt.GetJustFinishedContainers()) { if (status.GetContainerId().Equals(containerId3)) { // containerId3 is the container ran by previous attempt but finished by the // new attempt. container3Exists = true; } if (status.GetContainerId().Equals(containerId4)) { // containerId4 is the Acquired Container killed by the previous attempt, // it's now inside new attempt's finished container list. container4Exists = true; } if (status.GetContainerId().Equals(containerId5)) { // containerId5 is the Allocated container killed by previous failed attempt. container5Exists = true; } if (status.GetContainerId().Equals(containerId6)) { // containerId6 is the reserved container killed by previous failed attempt. container6Exists = true; } } NUnit.Framework.Assert.IsTrue(container3Exists && container4Exists && container5Exists && container6Exists); // New SchedulerApplicationAttempt also has the containers info. rm1.WaitForState(nm1, containerId2, RMContainerState.Running); // record the scheduler attempt for testing. SchedulerApplicationAttempt schedulerNewAttempt = ((AbstractYarnScheduler)rm1.GetResourceScheduler ()).GetCurrentAttemptForContainer(containerId2); // finish this application MockRM.FinishAMAndVerifyAppState(app1, rm1, nm1, am2); // the 2nd attempt released the 1st attempt's running container, when the // 2nd attempt finishes. NUnit.Framework.Assert.IsFalse(schedulerNewAttempt.GetLiveContainers().Contains(containerId2 )); // all 4 normal containers finished. System.Console.Out.WriteLine("New attempt's just finished containers: " + newAttempt .GetJustFinishedContainers()); WaitForContainersToFinish(5, newAttempt); rm1.Stop(); }
/// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> /// <exception cref="System.IO.IOException"/> public virtual AllocateResponse Allocate(AllocateRequest request) { AMRMTokenIdentifier amrmTokenIdentifier = AuthorizeRequest(); ApplicationAttemptId appAttemptId = amrmTokenIdentifier.GetApplicationAttemptId(); ApplicationId applicationId = appAttemptId.GetApplicationId(); this.amLivelinessMonitor.ReceivedPing(appAttemptId); /* check if its in cache */ ApplicationMasterService.AllocateResponseLock Lock = responseMap[appAttemptId]; if (Lock == null) { string message = "Application attempt " + appAttemptId + " doesn't exist in ApplicationMasterService cache."; Log.Error(message); throw new ApplicationAttemptNotFoundException(message); } lock (Lock) { AllocateResponse lastResponse = Lock.GetAllocateResponse(); if (!HasApplicationMasterRegistered(appAttemptId)) { string message = "AM is not registered for known application attempt: " + appAttemptId + " or RM had restarted after AM registered . AM should re-register."; Log.Info(message); RMAuditLogger.LogFailure(this.rmContext.GetRMApps()[appAttemptId.GetApplicationId ()].GetUser(), RMAuditLogger.AuditConstants.AmAllocate, string.Empty, "ApplicationMasterService" , message, applicationId, appAttemptId); throw new ApplicationMasterNotRegisteredException(message); } if ((request.GetResponseId() + 1) == lastResponse.GetResponseId()) { /* old heartbeat */ return(lastResponse); } else { if (request.GetResponseId() + 1 < lastResponse.GetResponseId()) { string message = "Invalid responseId in AllocateRequest from application attempt: " + appAttemptId + ", expect responseId to be " + (lastResponse.GetResponseId() + 1); throw new InvalidApplicationMasterRequestException(message); } } //filter illegal progress values float filteredProgress = request.GetProgress(); if (float.IsNaN(filteredProgress) || filteredProgress == float.NegativeInfinity || filteredProgress < 0) { request.SetProgress(0); } else { if (filteredProgress > 1 || filteredProgress == float.PositiveInfinity) { request.SetProgress(1); } } // Send the status update to the appAttempt. this.rmContext.GetDispatcher().GetEventHandler().Handle(new RMAppAttemptStatusupdateEvent (appAttemptId, request.GetProgress())); IList <ResourceRequest> ask = request.GetAskList(); IList <ContainerId> release = request.GetReleaseList(); ResourceBlacklistRequest blacklistRequest = request.GetResourceBlacklistRequest(); IList <string> blacklistAdditions = (blacklistRequest != null) ? blacklistRequest. GetBlacklistAdditions() : Sharpen.Collections.EmptyList; IList <string> blacklistRemovals = (blacklistRequest != null) ? blacklistRequest.GetBlacklistRemovals () : Sharpen.Collections.EmptyList; RMApp app = this.rmContext.GetRMApps()[applicationId]; // set label expression for Resource Requests if resourceName=ANY ApplicationSubmissionContext asc = app.GetApplicationSubmissionContext(); foreach (ResourceRequest req in ask) { if (null == req.GetNodeLabelExpression() && ResourceRequest.Any.Equals(req.GetResourceName ())) { req.SetNodeLabelExpression(asc.GetNodeLabelExpression()); } } // sanity check try { RMServerUtils.NormalizeAndValidateRequests(ask, rScheduler.GetMaximumResourceCapability (), app.GetQueue(), rScheduler, rmContext); } catch (InvalidResourceRequestException e) { Log.Warn("Invalid resource ask by application " + appAttemptId, e); throw; } try { RMServerUtils.ValidateBlacklistRequest(blacklistRequest); } catch (InvalidResourceBlacklistRequestException e) { Log.Warn("Invalid blacklist request by application " + appAttemptId, e); throw; } // In the case of work-preserving AM restart, it's possible for the // AM to release containers from the earlier attempt. if (!app.GetApplicationSubmissionContext().GetKeepContainersAcrossApplicationAttempts ()) { try { RMServerUtils.ValidateContainerReleaseRequest(release, appAttemptId); } catch (InvalidContainerReleaseException e) { Log.Warn("Invalid container release by application " + appAttemptId, e); throw; } } // Send new requests to appAttempt. Allocation allocation = this.rScheduler.Allocate(appAttemptId, ask, release, blacklistAdditions , blacklistRemovals); if (!blacklistAdditions.IsEmpty() || !blacklistRemovals.IsEmpty()) { Log.Info("blacklist are updated in Scheduler." + "blacklistAdditions: " + blacklistAdditions + ", " + "blacklistRemovals: " + blacklistRemovals); } RMAppAttempt appAttempt = app.GetRMAppAttempt(appAttemptId); AllocateResponse allocateResponse = recordFactory.NewRecordInstance <AllocateResponse >(); if (!allocation.GetContainers().IsEmpty()) { allocateResponse.SetNMTokens(allocation.GetNMTokens()); } // update the response with the deltas of node status changes IList <RMNode> updatedNodes = new AList <RMNode>(); if (app.PullRMNodeUpdates(updatedNodes) > 0) { IList <NodeReport> updatedNodeReports = new AList <NodeReport>(); foreach (RMNode rmNode in updatedNodes) { SchedulerNodeReport schedulerNodeReport = rScheduler.GetNodeReport(rmNode.GetNodeID ()); Resource used = BuilderUtils.NewResource(0, 0); int numContainers = 0; if (schedulerNodeReport != null) { used = schedulerNodeReport.GetUsedResource(); numContainers = schedulerNodeReport.GetNumContainers(); } NodeId nodeId = rmNode.GetNodeID(); NodeReport report = BuilderUtils.NewNodeReport(nodeId, rmNode.GetState(), rmNode. GetHttpAddress(), rmNode.GetRackName(), used, rmNode.GetTotalCapability(), numContainers , rmNode.GetHealthReport(), rmNode.GetLastHealthReportTime(), rmNode.GetNodeLabels ()); updatedNodeReports.AddItem(report); } allocateResponse.SetUpdatedNodes(updatedNodeReports); } allocateResponse.SetAllocatedContainers(allocation.GetContainers()); allocateResponse.SetCompletedContainersStatuses(appAttempt.PullJustFinishedContainers ()); allocateResponse.SetResponseId(lastResponse.GetResponseId() + 1); allocateResponse.SetAvailableResources(allocation.GetResourceLimit()); allocateResponse.SetNumClusterNodes(this.rScheduler.GetNumClusterNodes()); // add preemption to the allocateResponse message (if any) allocateResponse.SetPreemptionMessage(GeneratePreemptionMessage(allocation)); // update AMRMToken if the token is rolled-up MasterKeyData nextMasterKey = this.rmContext.GetAMRMTokenSecretManager().GetNextMasterKeyData (); if (nextMasterKey != null && nextMasterKey.GetMasterKey().GetKeyId() != amrmTokenIdentifier .GetKeyId()) { RMAppAttemptImpl appAttemptImpl = (RMAppAttemptImpl)appAttempt; Org.Apache.Hadoop.Security.Token.Token <AMRMTokenIdentifier> amrmToken = appAttempt .GetAMRMToken(); if (nextMasterKey.GetMasterKey().GetKeyId() != appAttemptImpl.GetAMRMTokenKeyId()) { Log.Info("The AMRMToken has been rolled-over. Send new AMRMToken back" + " to application: " + applicationId); amrmToken = rmContext.GetAMRMTokenSecretManager().CreateAndGetAMRMToken(appAttemptId ); appAttemptImpl.SetAMRMToken(amrmToken); } allocateResponse.SetAMRMToken(Org.Apache.Hadoop.Yarn.Api.Records.Token.NewInstance (amrmToken.GetIdentifier(), amrmToken.GetKind().ToString(), amrmToken.GetPassword (), amrmToken.GetService().ToString())); } /* * As we are updating the response inside the lock object so we don't * need to worry about unregister call occurring in between (which * removes the lock object). */ Lock.SetAllocateResponse(allocateResponse); return(allocateResponse); } }
/// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> /// <exception cref="System.IO.IOException"/> public virtual RegisterApplicationMasterResponse RegisterApplicationMaster(RegisterApplicationMasterRequest request) { AMRMTokenIdentifier amrmTokenIdentifier = AuthorizeRequest(); ApplicationAttemptId applicationAttemptId = amrmTokenIdentifier.GetApplicationAttemptId (); ApplicationId appID = applicationAttemptId.GetApplicationId(); ApplicationMasterService.AllocateResponseLock Lock = responseMap[applicationAttemptId ]; if (Lock == null) { RMAuditLogger.LogFailure(this.rmContext.GetRMApps()[appID].GetUser(), RMAuditLogger.AuditConstants .RegisterAm, "Application doesn't exist in cache " + applicationAttemptId, "ApplicationMasterService" , "Error in registering application master", appID, applicationAttemptId); ThrowApplicationDoesNotExistInCacheException(applicationAttemptId); } // Allow only one thread in AM to do registerApp at a time. lock (Lock) { AllocateResponse lastResponse = Lock.GetAllocateResponse(); if (HasApplicationMasterRegistered(applicationAttemptId)) { string message = "Application Master is already registered : " + appID; Log.Warn(message); RMAuditLogger.LogFailure(this.rmContext.GetRMApps()[appID].GetUser(), RMAuditLogger.AuditConstants .RegisterAm, string.Empty, "ApplicationMasterService", message, appID, applicationAttemptId ); throw new InvalidApplicationMasterRequestException(message); } this.amLivelinessMonitor.ReceivedPing(applicationAttemptId); RMApp app = this.rmContext.GetRMApps()[appID]; // Setting the response id to 0 to identify if the // application master is register for the respective attemptid lastResponse.SetResponseId(0); Lock.SetAllocateResponse(lastResponse); Log.Info("AM registration " + applicationAttemptId); this.rmContext.GetDispatcher().GetEventHandler().Handle(new RMAppAttemptRegistrationEvent (applicationAttemptId, request.GetHost(), request.GetRpcPort(), request.GetTrackingUrl ())); RMAuditLogger.LogSuccess(app.GetUser(), RMAuditLogger.AuditConstants.RegisterAm, "ApplicationMasterService", appID, applicationAttemptId); // Pick up min/max resource from scheduler... RegisterApplicationMasterResponse response = recordFactory.NewRecordInstance <RegisterApplicationMasterResponse >(); response.SetMaximumResourceCapability(rScheduler.GetMaximumResourceCapability(app .GetQueue())); response.SetApplicationACLs(app.GetRMAppAttempt(applicationAttemptId).GetSubmissionContext ().GetAMContainerSpec().GetApplicationACLs()); response.SetQueue(app.GetQueue()); if (UserGroupInformation.IsSecurityEnabled()) { Log.Info("Setting client token master key"); response.SetClientToAMTokenMasterKey(ByteBuffer.Wrap(rmContext.GetClientToAMTokenSecretManager ().GetMasterKey(applicationAttemptId).GetEncoded())); } // For work-preserving AM restart, retrieve previous attempts' containers // and corresponding NM tokens. if (app.GetApplicationSubmissionContext().GetKeepContainersAcrossApplicationAttempts ()) { IList <Container> transferredContainers = ((AbstractYarnScheduler)rScheduler).GetTransferredContainers (applicationAttemptId); if (!transferredContainers.IsEmpty()) { response.SetContainersFromPreviousAttempts(transferredContainers); IList <NMToken> nmTokens = new AList <NMToken>(); foreach (Container container in transferredContainers) { try { NMToken token = rmContext.GetNMTokenSecretManager().CreateAndGetNMToken(app.GetUser (), applicationAttemptId, container); if (null != token) { nmTokens.AddItem(token); } } catch (ArgumentException e) { // if it's a DNS issue, throw UnknowHostException directly and // that // will be automatically retried by RMProxy in RPC layer. if (e.InnerException is UnknownHostException) { throw (UnknownHostException)e.InnerException; } } } response.SetNMTokensFromPreviousAttempts(nmTokens); Log.Info("Application " + appID + " retrieved " + transferredContainers.Count + " containers from previous" + " attempts and " + nmTokens.Count + " NM tokens."); } } response.SetSchedulerResourceTypes(rScheduler.GetSchedulingResourceTypes()); return(response); } }
public virtual void TestReleaseWhileRunning() { DrainDispatcher drainDispatcher = new DrainDispatcher(); EventHandler <RMAppAttemptEvent> appAttemptEventHandler = Org.Mockito.Mockito.Mock <EventHandler>(); EventHandler generic = Org.Mockito.Mockito.Mock <EventHandler>(); drainDispatcher.Register(typeof(RMAppAttemptEventType), appAttemptEventHandler); drainDispatcher.Register(typeof(RMNodeEventType), generic); drainDispatcher.Init(new YarnConfiguration()); drainDispatcher.Start(); NodeId nodeId = BuilderUtils.NewNodeId("host", 3425); ApplicationId appId = BuilderUtils.NewApplicationId(1, 1); ApplicationAttemptId appAttemptId = BuilderUtils.NewApplicationAttemptId(appId, 1 ); ContainerId containerId = BuilderUtils.NewContainerId(appAttemptId, 1); ContainerAllocationExpirer expirer = Org.Mockito.Mockito.Mock <ContainerAllocationExpirer >(); Resource resource = BuilderUtils.NewResource(512, 1); Priority priority = BuilderUtils.NewPriority(5); Container container = BuilderUtils.NewContainer(containerId, nodeId, "host:3465", resource, priority, null); ConcurrentMap <ApplicationId, RMApp> rmApps = Org.Mockito.Mockito.Spy(new ConcurrentHashMap <ApplicationId, RMApp>()); RMApp rmApp = Org.Mockito.Mockito.Mock <RMApp>(); Org.Mockito.Mockito.When(rmApp.GetRMAppAttempt((ApplicationAttemptId)Matchers.Any ())).ThenReturn(null); Org.Mockito.Mockito.DoReturn(rmApp).When(rmApps)[(ApplicationId)Matchers.Any()]; RMApplicationHistoryWriter writer = Org.Mockito.Mockito.Mock <RMApplicationHistoryWriter >(); SystemMetricsPublisher publisher = Org.Mockito.Mockito.Mock <SystemMetricsPublisher >(); RMContext rmContext = Org.Mockito.Mockito.Mock <RMContext>(); Org.Mockito.Mockito.When(rmContext.GetDispatcher()).ThenReturn(drainDispatcher); Org.Mockito.Mockito.When(rmContext.GetContainerAllocationExpirer()).ThenReturn(expirer ); Org.Mockito.Mockito.When(rmContext.GetRMApplicationHistoryWriter()).ThenReturn(writer ); Org.Mockito.Mockito.When(rmContext.GetRMApps()).ThenReturn(rmApps); Org.Mockito.Mockito.When(rmContext.GetSystemMetricsPublisher()).ThenReturn(publisher ); Org.Mockito.Mockito.When(rmContext.GetYarnConfiguration()).ThenReturn(new YarnConfiguration ()); RMContainer rmContainer = new RMContainerImpl(container, appAttemptId, nodeId, "user" , rmContext); NUnit.Framework.Assert.AreEqual(RMContainerState.New, rmContainer.GetState()); NUnit.Framework.Assert.AreEqual(resource, rmContainer.GetAllocatedResource()); NUnit.Framework.Assert.AreEqual(nodeId, rmContainer.GetAllocatedNode()); NUnit.Framework.Assert.AreEqual(priority, rmContainer.GetAllocatedPriority()); Org.Mockito.Mockito.Verify(writer).ContainerStarted(Matchers.Any <RMContainer>()); Org.Mockito.Mockito.Verify(publisher).ContainerCreated(Matchers.Any <RMContainer>( ), Matchers.AnyLong()); rmContainer.Handle(new RMContainerEvent(containerId, RMContainerEventType.Start)); drainDispatcher.Await(); NUnit.Framework.Assert.AreEqual(RMContainerState.Allocated, rmContainer.GetState( )); rmContainer.Handle(new RMContainerEvent(containerId, RMContainerEventType.Acquired )); drainDispatcher.Await(); NUnit.Framework.Assert.AreEqual(RMContainerState.Acquired, rmContainer.GetState() ); rmContainer.Handle(new RMContainerEvent(containerId, RMContainerEventType.Launched )); drainDispatcher.Await(); NUnit.Framework.Assert.AreEqual(RMContainerState.Running, rmContainer.GetState()); NUnit.Framework.Assert.AreEqual("http://host:3465/node/containerlogs/container_1_0001_01_000001/user" , rmContainer.GetLogURL()); // In RUNNING state. Verify RELEASED and associated actions. Org.Mockito.Mockito.Reset(appAttemptEventHandler); ContainerStatus containerStatus = SchedulerUtils.CreateAbnormalContainerStatus(containerId , SchedulerUtils.ReleasedContainer); rmContainer.Handle(new RMContainerFinishedEvent(containerId, containerStatus, RMContainerEventType .Released)); drainDispatcher.Await(); NUnit.Framework.Assert.AreEqual(RMContainerState.Released, rmContainer.GetState() ); NUnit.Framework.Assert.AreEqual(SchedulerUtils.ReleasedContainer, rmContainer.GetDiagnosticsInfo ()); NUnit.Framework.Assert.AreEqual(ContainerExitStatus.Aborted, rmContainer.GetContainerExitStatus ()); NUnit.Framework.Assert.AreEqual(ContainerState.Complete, rmContainer.GetContainerState ()); Org.Mockito.Mockito.Verify(writer).ContainerFinished(Matchers.Any <RMContainer>()); Org.Mockito.Mockito.Verify(publisher).ContainerFinished(Matchers.Any <RMContainer> (), Matchers.AnyLong()); ArgumentCaptor <RMAppAttemptContainerFinishedEvent> captor = ArgumentCaptor.ForClass <RMAppAttemptContainerFinishedEvent>(); Org.Mockito.Mockito.Verify(appAttemptEventHandler).Handle(captor.Capture()); RMAppAttemptContainerFinishedEvent cfEvent = captor.GetValue(); NUnit.Framework.Assert.AreEqual(appAttemptId, cfEvent.GetApplicationAttemptId()); NUnit.Framework.Assert.AreEqual(containerStatus, cfEvent.GetContainerStatus()); NUnit.Framework.Assert.AreEqual(RMAppAttemptEventType.ContainerFinished, cfEvent. GetType()); // In RELEASED state. A FINIHSED event may come in. rmContainer.Handle(new RMContainerFinishedEvent(containerId, SchedulerUtils.CreateAbnormalContainerStatus (containerId, "FinishedContainer"), RMContainerEventType.Finished)); NUnit.Framework.Assert.AreEqual(RMContainerState.Released, rmContainer.GetState() ); }