/// <exception cref="System.IO.IOException"/> public virtual IList <Container> GetResources() { lock (this) { if (Log.IsDebugEnabled()) { Log.Debug("getResources begin:" + " application=" + applicationId + " #ask=" + ask .Count); foreach (ResourceRequest request in ask) { Log.Debug("getResources:" + " application=" + applicationId + " ask-request=" + request ); } } // Get resources from the ResourceManager Allocation allocation = resourceManager.GetResourceScheduler().Allocate(applicationAttemptId , new AList <ResourceRequest>(ask), new AList <ContainerId>(), null, null); System.Console.Out.WriteLine("-=======" + applicationAttemptId); System.Console.Out.WriteLine("----------" + resourceManager.GetRMContext().GetRMApps ()[applicationId].GetRMAppAttempt(applicationAttemptId)); IList <Container> containers = allocation.GetContainers(); // Clear state for next interaction with ResourceManager ask.Clear(); if (Log.IsDebugEnabled()) { Log.Debug("getResources() for " + applicationId + ":" + " ask=" + ask.Count + " recieved=" + containers.Count); } return(containers); } }
/// <exception cref="System.Exception"/> public virtual void TestBlackListNodes() { Configuration conf = new Configuration(); conf.SetClass(YarnConfiguration.RmScheduler, typeof(FifoScheduler), typeof(ResourceScheduler )); MockRM rm = new MockRM(conf); rm.Start(); FifoScheduler fs = (FifoScheduler)rm.GetResourceScheduler(); int rack_num_0 = 0; int rack_num_1 = 1; // Add 4 nodes in 2 racks // host_0_0 in rack0 string host_0_0 = "127.0.0.1"; RMNode n1 = MockNodes.NewNodeInfo(rack_num_0, MockNodes.NewResource(4 * Gb), 1, host_0_0 ); fs.Handle(new NodeAddedSchedulerEvent(n1)); // host_0_1 in rack0 string host_0_1 = "127.0.0.2"; RMNode n2 = MockNodes.NewNodeInfo(rack_num_0, MockNodes.NewResource(4 * Gb), 1, host_0_1 ); fs.Handle(new NodeAddedSchedulerEvent(n2)); // host_1_0 in rack1 string host_1_0 = "127.0.0.3"; RMNode n3 = MockNodes.NewNodeInfo(rack_num_1, MockNodes.NewResource(4 * Gb), 1, host_1_0 ); fs.Handle(new NodeAddedSchedulerEvent(n3)); // host_1_1 in rack1 string host_1_1 = "127.0.0.4"; RMNode n4 = MockNodes.NewNodeInfo(rack_num_1, MockNodes.NewResource(4 * Gb), 1, host_1_1 ); fs.Handle(new NodeAddedSchedulerEvent(n4)); // Add one application ApplicationId appId1 = BuilderUtils.NewApplicationId(100, 1); ApplicationAttemptId appAttemptId1 = BuilderUtils.NewApplicationAttemptId(appId1, 1); CreateMockRMApp(appAttemptId1, rm.GetRMContext()); SchedulerEvent appEvent = new AppAddedSchedulerEvent(appId1, "queue", "user"); fs.Handle(appEvent); SchedulerEvent attemptEvent = new AppAttemptAddedSchedulerEvent(appAttemptId1, false ); fs.Handle(attemptEvent); IList <ContainerId> emptyId = new AList <ContainerId>(); IList <ResourceRequest> emptyAsk = new AList <ResourceRequest>(); // Allow rack-locality for rack_1, but blacklist host_1_0 // Set up resource requests // Ask for a 1 GB container for app 1 IList <ResourceRequest> ask1 = new AList <ResourceRequest>(); ask1.AddItem(BuilderUtils.NewResourceRequest(BuilderUtils.NewPriority(0), "rack1" , BuilderUtils.NewResource(Gb, 1), 1)); ask1.AddItem(BuilderUtils.NewResourceRequest(BuilderUtils.NewPriority(0), ResourceRequest .Any, BuilderUtils.NewResource(Gb, 1), 1)); fs.Allocate(appAttemptId1, ask1, emptyId, Sharpen.Collections.SingletonList(host_1_0 ), null); // Trigger container assignment fs.Handle(new NodeUpdateSchedulerEvent(n3)); // Get the allocation for the application and verify no allocation on blacklist node Allocation allocation1 = fs.Allocate(appAttemptId1, emptyAsk, emptyId, null, null ); NUnit.Framework.Assert.AreEqual("allocation1", 0, allocation1.GetContainers().Count ); // verify host_1_1 can get allocated as not in blacklist fs.Handle(new NodeUpdateSchedulerEvent(n4)); Allocation allocation2 = fs.Allocate(appAttemptId1, emptyAsk, emptyId, null, null ); NUnit.Framework.Assert.AreEqual("allocation2", 1, allocation2.GetContainers().Count ); IList <Container> containerList = allocation2.GetContainers(); foreach (Container container in containerList) { NUnit.Framework.Assert.AreEqual("Container is allocated on n4", container.GetNodeId (), n4.GetNodeID()); } // Ask for a 1 GB container again for app 1 IList <ResourceRequest> ask2 = new AList <ResourceRequest>(); // this time, rack0 is also in blacklist, so only host_1_1 is available to // be assigned ask2.AddItem(BuilderUtils.NewResourceRequest(BuilderUtils.NewPriority(0), ResourceRequest .Any, BuilderUtils.NewResource(Gb, 1), 1)); fs.Allocate(appAttemptId1, ask2, emptyId, Sharpen.Collections.SingletonList("rack0" ), null); // verify n1 is not qualified to be allocated fs.Handle(new NodeUpdateSchedulerEvent(n1)); Allocation allocation3 = fs.Allocate(appAttemptId1, emptyAsk, emptyId, null, null ); NUnit.Framework.Assert.AreEqual("allocation3", 0, allocation3.GetContainers().Count ); // verify n2 is not qualified to be allocated fs.Handle(new NodeUpdateSchedulerEvent(n2)); Allocation allocation4 = fs.Allocate(appAttemptId1, emptyAsk, emptyId, null, null ); NUnit.Framework.Assert.AreEqual("allocation4", 0, allocation4.GetContainers().Count ); // verify n3 is not qualified to be allocated fs.Handle(new NodeUpdateSchedulerEvent(n3)); Allocation allocation5 = fs.Allocate(appAttemptId1, emptyAsk, emptyId, null, null ); NUnit.Framework.Assert.AreEqual("allocation5", 0, allocation5.GetContainers().Count ); fs.Handle(new NodeUpdateSchedulerEvent(n4)); Allocation allocation6 = fs.Allocate(appAttemptId1, emptyAsk, emptyId, null, null ); NUnit.Framework.Assert.AreEqual("allocation6", 1, allocation6.GetContainers().Count ); containerList = allocation6.GetContainers(); foreach (Container container_1 in containerList) { NUnit.Framework.Assert.AreEqual("Container is allocated on n4", container_1.GetNodeId (), n4.GetNodeID()); } rm.Stop(); }
/// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> /// <exception cref="System.IO.IOException"/> public virtual AllocateResponse Allocate(AllocateRequest request) { AMRMTokenIdentifier amrmTokenIdentifier = AuthorizeRequest(); ApplicationAttemptId appAttemptId = amrmTokenIdentifier.GetApplicationAttemptId(); ApplicationId applicationId = appAttemptId.GetApplicationId(); this.amLivelinessMonitor.ReceivedPing(appAttemptId); /* check if its in cache */ ApplicationMasterService.AllocateResponseLock Lock = responseMap[appAttemptId]; if (Lock == null) { string message = "Application attempt " + appAttemptId + " doesn't exist in ApplicationMasterService cache."; Log.Error(message); throw new ApplicationAttemptNotFoundException(message); } lock (Lock) { AllocateResponse lastResponse = Lock.GetAllocateResponse(); if (!HasApplicationMasterRegistered(appAttemptId)) { string message = "AM is not registered for known application attempt: " + appAttemptId + " or RM had restarted after AM registered . AM should re-register."; Log.Info(message); RMAuditLogger.LogFailure(this.rmContext.GetRMApps()[appAttemptId.GetApplicationId ()].GetUser(), RMAuditLogger.AuditConstants.AmAllocate, string.Empty, "ApplicationMasterService" , message, applicationId, appAttemptId); throw new ApplicationMasterNotRegisteredException(message); } if ((request.GetResponseId() + 1) == lastResponse.GetResponseId()) { /* old heartbeat */ return(lastResponse); } else { if (request.GetResponseId() + 1 < lastResponse.GetResponseId()) { string message = "Invalid responseId in AllocateRequest from application attempt: " + appAttemptId + ", expect responseId to be " + (lastResponse.GetResponseId() + 1); throw new InvalidApplicationMasterRequestException(message); } } //filter illegal progress values float filteredProgress = request.GetProgress(); if (float.IsNaN(filteredProgress) || filteredProgress == float.NegativeInfinity || filteredProgress < 0) { request.SetProgress(0); } else { if (filteredProgress > 1 || filteredProgress == float.PositiveInfinity) { request.SetProgress(1); } } // Send the status update to the appAttempt. this.rmContext.GetDispatcher().GetEventHandler().Handle(new RMAppAttemptStatusupdateEvent (appAttemptId, request.GetProgress())); IList <ResourceRequest> ask = request.GetAskList(); IList <ContainerId> release = request.GetReleaseList(); ResourceBlacklistRequest blacklistRequest = request.GetResourceBlacklistRequest(); IList <string> blacklistAdditions = (blacklistRequest != null) ? blacklistRequest. GetBlacklistAdditions() : Sharpen.Collections.EmptyList; IList <string> blacklistRemovals = (blacklistRequest != null) ? blacklistRequest.GetBlacklistRemovals () : Sharpen.Collections.EmptyList; RMApp app = this.rmContext.GetRMApps()[applicationId]; // set label expression for Resource Requests if resourceName=ANY ApplicationSubmissionContext asc = app.GetApplicationSubmissionContext(); foreach (ResourceRequest req in ask) { if (null == req.GetNodeLabelExpression() && ResourceRequest.Any.Equals(req.GetResourceName ())) { req.SetNodeLabelExpression(asc.GetNodeLabelExpression()); } } // sanity check try { RMServerUtils.NormalizeAndValidateRequests(ask, rScheduler.GetMaximumResourceCapability (), app.GetQueue(), rScheduler, rmContext); } catch (InvalidResourceRequestException e) { Log.Warn("Invalid resource ask by application " + appAttemptId, e); throw; } try { RMServerUtils.ValidateBlacklistRequest(blacklistRequest); } catch (InvalidResourceBlacklistRequestException e) { Log.Warn("Invalid blacklist request by application " + appAttemptId, e); throw; } // In the case of work-preserving AM restart, it's possible for the // AM to release containers from the earlier attempt. if (!app.GetApplicationSubmissionContext().GetKeepContainersAcrossApplicationAttempts ()) { try { RMServerUtils.ValidateContainerReleaseRequest(release, appAttemptId); } catch (InvalidContainerReleaseException e) { Log.Warn("Invalid container release by application " + appAttemptId, e); throw; } } // Send new requests to appAttempt. Allocation allocation = this.rScheduler.Allocate(appAttemptId, ask, release, blacklistAdditions , blacklistRemovals); if (!blacklistAdditions.IsEmpty() || !blacklistRemovals.IsEmpty()) { Log.Info("blacklist are updated in Scheduler." + "blacklistAdditions: " + blacklistAdditions + ", " + "blacklistRemovals: " + blacklistRemovals); } RMAppAttempt appAttempt = app.GetRMAppAttempt(appAttemptId); AllocateResponse allocateResponse = recordFactory.NewRecordInstance <AllocateResponse >(); if (!allocation.GetContainers().IsEmpty()) { allocateResponse.SetNMTokens(allocation.GetNMTokens()); } // update the response with the deltas of node status changes IList <RMNode> updatedNodes = new AList <RMNode>(); if (app.PullRMNodeUpdates(updatedNodes) > 0) { IList <NodeReport> updatedNodeReports = new AList <NodeReport>(); foreach (RMNode rmNode in updatedNodes) { SchedulerNodeReport schedulerNodeReport = rScheduler.GetNodeReport(rmNode.GetNodeID ()); Resource used = BuilderUtils.NewResource(0, 0); int numContainers = 0; if (schedulerNodeReport != null) { used = schedulerNodeReport.GetUsedResource(); numContainers = schedulerNodeReport.GetNumContainers(); } NodeId nodeId = rmNode.GetNodeID(); NodeReport report = BuilderUtils.NewNodeReport(nodeId, rmNode.GetState(), rmNode. GetHttpAddress(), rmNode.GetRackName(), used, rmNode.GetTotalCapability(), numContainers , rmNode.GetHealthReport(), rmNode.GetLastHealthReportTime(), rmNode.GetNodeLabels ()); updatedNodeReports.AddItem(report); } allocateResponse.SetUpdatedNodes(updatedNodeReports); } allocateResponse.SetAllocatedContainers(allocation.GetContainers()); allocateResponse.SetCompletedContainersStatuses(appAttempt.PullJustFinishedContainers ()); allocateResponse.SetResponseId(lastResponse.GetResponseId() + 1); allocateResponse.SetAvailableResources(allocation.GetResourceLimit()); allocateResponse.SetNumClusterNodes(this.rScheduler.GetNumClusterNodes()); // add preemption to the allocateResponse message (if any) allocateResponse.SetPreemptionMessage(GeneratePreemptionMessage(allocation)); // update AMRMToken if the token is rolled-up MasterKeyData nextMasterKey = this.rmContext.GetAMRMTokenSecretManager().GetNextMasterKeyData (); if (nextMasterKey != null && nextMasterKey.GetMasterKey().GetKeyId() != amrmTokenIdentifier .GetKeyId()) { RMAppAttemptImpl appAttemptImpl = (RMAppAttemptImpl)appAttempt; Org.Apache.Hadoop.Security.Token.Token <AMRMTokenIdentifier> amrmToken = appAttempt .GetAMRMToken(); if (nextMasterKey.GetMasterKey().GetKeyId() != appAttemptImpl.GetAMRMTokenKeyId()) { Log.Info("The AMRMToken has been rolled-over. Send new AMRMToken back" + " to application: " + applicationId); amrmToken = rmContext.GetAMRMTokenSecretManager().CreateAndGetAMRMToken(appAttemptId ); appAttemptImpl.SetAMRMToken(amrmToken); } allocateResponse.SetAMRMToken(Org.Apache.Hadoop.Yarn.Api.Records.Token.NewInstance (amrmToken.GetIdentifier(), amrmToken.GetKind().ToString(), amrmToken.GetPassword (), amrmToken.GetService().ToString())); } /* * As we are updating the response inside the lock object so we don't * need to worry about unregister call occurring in between (which * removes the lock object). */ Lock.SetAllocateResponse(allocateResponse); return(allocateResponse); } }