protected internal virtual void ReleaseContainers(IList <ContainerId> containers, SchedulerApplicationAttempt attempt) { foreach (ContainerId containerId in containers) { RMContainer rmContainer = GetRMContainer(containerId); if (rmContainer == null) { if (Runtime.CurrentTimeMillis() - ResourceManager.GetClusterTimeStamp() < nmExpireInterval) { Log.Info(containerId + " doesn't exist. Add the container" + " to the release request cache as it maybe on recovery." ); lock (attempt) { attempt.GetPendingRelease().AddItem(containerId); } } else { RMAuditLogger.LogFailure(attempt.GetUser(), RMAuditLogger.AuditConstants.ReleaseContainer , "Unauthorized access or invalid container", "Scheduler", "Trying to release container not owned by app or with invalid id." , attempt.GetApplicationId(), containerId); } } CompletedContainer(rmContainer, SchedulerUtils.CreateAbnormalContainerStatus(containerId , SchedulerUtils.ReleasedContainer), RMContainerEventType.Released); } }
public virtual void TestMove() { string user = "******"; Queue parentQueue = CreateQueue("parent", null); Queue oldQueue = CreateQueue("old", parentQueue); Queue newQueue = CreateQueue("new", parentQueue); QueueMetrics parentMetrics = parentQueue.GetMetrics(); QueueMetrics oldMetrics = oldQueue.GetMetrics(); QueueMetrics newMetrics = newQueue.GetMetrics(); ApplicationAttemptId appAttId = CreateAppAttemptId(0, 0); RMContext rmContext = Org.Mockito.Mockito.Mock <RMContext>(); Org.Mockito.Mockito.When(rmContext.GetEpoch()).ThenReturn(3L); SchedulerApplicationAttempt app = new SchedulerApplicationAttempt(appAttId, user, oldQueue, oldQueue.GetActiveUsersManager(), rmContext); oldMetrics.SubmitApp(user); // confirm that containerId is calculated based on epoch. NUnit.Framework.Assert.AreEqual(unchecked ((long)(0x30000000001L)), app.GetNewContainerId ()); // Resource request Resource requestedResource = Resource.NewInstance(1536, 2); Priority requestedPriority = Priority.NewInstance(2); ResourceRequest request = ResourceRequest.NewInstance(requestedPriority, ResourceRequest .Any, requestedResource, 3); app.UpdateResourceRequests(Arrays.AsList(request)); // Allocated container RMContainer container1 = CreateRMContainer(appAttId, 1, requestedResource); app.liveContainers[container1.GetContainerId()] = container1; SchedulerNode node = CreateNode(); app.appSchedulingInfo.Allocate(NodeType.OffSwitch, node, requestedPriority, request , container1.GetContainer()); // Reserved container Priority prio1 = Priority.NewInstance(1); Resource reservedResource = Resource.NewInstance(2048, 3); RMContainer container2 = CreateReservedRMContainer(appAttId, 1, reservedResource, node.GetNodeID(), prio1); IDictionary <NodeId, RMContainer> reservations = new Dictionary <NodeId, RMContainer >(); reservations[node.GetNodeID()] = container2; app.reservedContainers[prio1] = reservations; oldMetrics.ReserveResource(user, reservedResource); CheckQueueMetrics(oldMetrics, 1, 1, 1536, 2, 2048, 3, 3072, 4); CheckQueueMetrics(newMetrics, 0, 0, 0, 0, 0, 0, 0, 0); CheckQueueMetrics(parentMetrics, 1, 1, 1536, 2, 2048, 3, 3072, 4); app.Move(newQueue); CheckQueueMetrics(oldMetrics, 0, 0, 0, 0, 0, 0, 0, 0); CheckQueueMetrics(newMetrics, 1, 1, 1536, 2, 2048, 3, 3072, 4); CheckQueueMetrics(parentMetrics, 1, 1, 1536, 2, 2048, 3, 3072, 4); }
public virtual IList <ResourceRequest> GetPendingResourceRequestsForAttempt(ApplicationAttemptId attemptId) { SchedulerApplicationAttempt attempt = GetApplicationAttempt(attemptId); if (attempt != null) { return(attempt.GetAppSchedulingInfo().GetAllResourceRequests()); } return(null); }
public virtual ApplicationResourceUsageReport GetAppResourceUsageReport(ApplicationAttemptId appAttemptId) { SchedulerApplicationAttempt attempt = GetApplicationAttempt(appAttemptId); if (attempt == null) { if (Log.IsDebugEnabled()) { Log.Debug("Request for appInfo of unknown attempt " + appAttemptId); } return(null); } return(attempt.GetResourceUsageReport()); }
public virtual void TransferStateFromPreviousAttempt(SchedulerApplicationAttempt appAttempt) { lock (this) { this.liveContainers = appAttempt.GetLiveContainersMap(); // this.reReservations = appAttempt.reReservations; this.currentConsumption = appAttempt.GetCurrentConsumption(); this.resourceLimit = appAttempt.GetResourceLimit(); // this.currentReservation = appAttempt.currentReservation; // this.newlyAllocatedContainers = appAttempt.newlyAllocatedContainers; // this.schedulingOpportunities = appAttempt.schedulingOpportunities; this.lastScheduledContainer = appAttempt.GetLastScheduledContainer(); this.appSchedulingInfo.TransferStateFromPreviousAppSchedulingInfo(appAttempt.appSchedulingInfo ); } }
/// <summary> /// Recover resource request back from RMContainer when a container is /// preempted before AM pulled the same. /// </summary> /// <remarks> /// Recover resource request back from RMContainer when a container is /// preempted before AM pulled the same. If container is pulled by /// AM, then RMContainer will not have resource request to recover. /// </remarks> /// <param name="rmContainer"/> protected internal virtual void RecoverResourceRequestForContainer(RMContainer rmContainer ) { IList <ResourceRequest> requests = rmContainer.GetResourceRequests(); // If container state is moved to ACQUIRED, request will be empty. if (requests == null) { return; } // Add resource request back to Scheduler. SchedulerApplicationAttempt schedulerAttempt = GetCurrentAttemptForContainer(rmContainer .GetContainerId()); if (schedulerAttempt != null) { schedulerAttempt.RecoverResourceRequests(requests); } }
protected internal virtual void ContainerLaunchedOnNode(ContainerId containerId, SchedulerNode node) { lock (this) { // Get the application for the finished container SchedulerApplicationAttempt application = GetCurrentAttemptForContainer(containerId ); if (application == null) { Log.Info("Unknown application " + containerId.GetApplicationAttemptId().GetApplicationId () + " launched container " + containerId + " on node: " + node); this.rmContext.GetDispatcher().GetEventHandler().Handle(new RMNodeCleanContainerEvent (node.GetNodeID(), containerId)); return; } application.ContainerLaunchedOnNode(containerId, node.GetNodeID()); } }
public static bool IsBlacklisted(SchedulerApplicationAttempt application, SchedulerNode node, Log Log) { if (application.IsBlacklisted(node.GetNodeName())) { if (Log.IsDebugEnabled()) { Log.Debug("Skipping 'host' " + node.GetNodeName() + " for " + application.GetApplicationId () + " since it has been blacklisted"); } return(true); } if (application.IsBlacklisted(node.GetRackName())) { if (Log.IsDebugEnabled()) { Log.Debug("Skipping 'rack' " + node.GetRackName() + " for " + application.GetApplicationId () + " since it has been blacklisted"); } return(true); } return(false); }
public virtual void RecoverContainersOnNode(IList <NMContainerStatus> containerReports , RMNode nm) { lock (this) { if (!rmContext.IsWorkPreservingRecoveryEnabled() || containerReports == null || ( containerReports != null && containerReports.IsEmpty())) { return; } foreach (NMContainerStatus container in containerReports) { ApplicationId appId = container.GetContainerId().GetApplicationAttemptId().GetApplicationId (); RMApp rmApp = rmContext.GetRMApps()[appId]; if (rmApp == null) { Log.Error("Skip recovering container " + container + " for unknown application."); KillOrphanContainerOnNode(nm, container); continue; } // Unmanaged AM recovery is addressed in YARN-1815 if (rmApp.GetApplicationSubmissionContext().GetUnmanagedAM()) { Log.Info("Skip recovering container " + container + " for unmanaged AM." + rmApp. GetApplicationId()); KillOrphanContainerOnNode(nm, container); continue; } SchedulerApplication <T> schedulerApp = applications[appId]; if (schedulerApp == null) { Log.Info("Skip recovering container " + container + " for unknown SchedulerApplication. Application current state is " + rmApp.GetState()); KillOrphanContainerOnNode(nm, container); continue; } Log.Info("Recovering container " + container); SchedulerApplicationAttempt schedulerAttempt = schedulerApp.GetCurrentAppAttempt( ); if (!rmApp.GetApplicationSubmissionContext().GetKeepContainersAcrossApplicationAttempts ()) { // Do not recover containers for stopped attempt or previous attempt. if (schedulerAttempt.IsStopped() || !schedulerAttempt.GetApplicationAttemptId().Equals (container.GetContainerId().GetApplicationAttemptId())) { Log.Info("Skip recovering container " + container + " for already stopped attempt." ); KillOrphanContainerOnNode(nm, container); continue; } } // create container RMContainer rmContainer = RecoverAndCreateContainer(container, nm); // recover RMContainer rmContainer.Handle(new RMContainerRecoverEvent(container.GetContainerId(), container )); // recover scheduler node nodes[nm.GetNodeID()].RecoverContainer(rmContainer); // recover queue: update headroom etc. Queue queue = schedulerAttempt.GetQueue(); queue.RecoverContainer(clusterResource, schedulerAttempt, rmContainer); // recover scheduler attempt schedulerAttempt.RecoverContainer(rmContainer); // set master container for the current running AMContainer for this // attempt. RMAppAttempt appAttempt = rmApp.GetCurrentAppAttempt(); if (appAttempt != null) { Container masterContainer = appAttempt.GetMasterContainer(); // Mark current running AMContainer's RMContainer based on the master // container ID stored in AppAttempt. if (masterContainer != null && masterContainer.GetId().Equals(rmContainer.GetContainerId ())) { ((RMContainerImpl)rmContainer).SetAMContainer(true); } } lock (schedulerAttempt) { ICollection <ContainerId> releases = schedulerAttempt.GetPendingRelease(); if (releases.Contains(container.GetContainerId())) { // release the container rmContainer.Handle(new RMContainerFinishedEvent(container.GetContainerId(), SchedulerUtils .CreateAbnormalContainerStatus(container.GetContainerId(), SchedulerUtils.ReleasedContainer ), RMContainerEventType.Released)); releases.Remove(container.GetContainerId()); Log.Info(container.GetContainerId() + " is released by application."); } } } } }
public virtual RMContainer GetRMContainer(ContainerId containerId) { SchedulerApplicationAttempt attempt = GetCurrentAttemptForContainer(containerId); return((attempt == null) ? null : attempt.GetRMContainer(containerId)); }
public SchedulerAppReport(SchedulerApplicationAttempt app) { this.live = app.GetLiveContainers(); this.reserved = app.GetReservedContainers(); this.pending = app.IsPending(); }
/// <summary>Unreserve resources on this node.</summary> public abstract void UnreserveResource(SchedulerApplicationAttempt attempt);
/// <summary>Reserve container for the attempt on this node.</summary> public abstract void ReserveResource(SchedulerApplicationAttempt attempt, Priority priority, RMContainer container);