public override void SetResourceBlacklistRequest(ResourceBlacklistRequest blacklistRequest ) { MaybeInitBuilder(); if (blacklistRequest == null) { builder.ClearBlacklistRequest(); } this.blacklistRequest = blacklistRequest; }
public virtual void TestValidateResourceBlacklistRequest() { TestAMAuthorization.MyContainerManager containerManager = new TestAMAuthorization.MyContainerManager (); TestAMAuthorization.MockRMWithAMS rm = new TestAMAuthorization.MockRMWithAMS(new YarnConfiguration(), containerManager); rm.Start(); MockNM nm1 = rm.RegisterNode("localhost:1234", 5120); IDictionary <ApplicationAccessType, string> acls = new Dictionary <ApplicationAccessType , string>(2); acls[ApplicationAccessType.ViewApp] = "*"; RMApp app = rm.SubmitApp(1024, "appname", "appuser", acls); nm1.NodeHeartbeat(true); RMAppAttempt attempt = app.GetCurrentAppAttempt(); ApplicationAttemptId applicationAttemptId = attempt.GetAppAttemptId(); WaitForLaunchedState(attempt); // Create a client to the RM. Configuration conf = rm.GetConfig(); YarnRPC rpc = YarnRPC.Create(conf); UserGroupInformation currentUser = UserGroupInformation.CreateRemoteUser(applicationAttemptId .ToString()); Credentials credentials = containerManager.GetContainerCredentials(); IPEndPoint rmBindAddress = rm.GetApplicationMasterService().GetBindAddress(); Org.Apache.Hadoop.Security.Token.Token <TokenIdentifier> amRMToken = TestAMAuthorization.MockRMWithAMS .SetupAndReturnAMRMToken(rmBindAddress, credentials.GetAllTokens()); currentUser.AddToken(amRMToken); ApplicationMasterProtocol client = currentUser.DoAs(new _PrivilegedAction_626(rpc , rmBindAddress, conf)); RegisterApplicationMasterRequest request = Org.Apache.Hadoop.Yarn.Util.Records.NewRecord <RegisterApplicationMasterRequest>(); client.RegisterApplicationMaster(request); ResourceBlacklistRequest blacklistRequest = ResourceBlacklistRequest.NewInstance( Sharpen.Collections.SingletonList(ResourceRequest.Any), null); AllocateRequest allocateRequest = AllocateRequest.NewInstance(0, 0.0f, null, null , blacklistRequest); bool error = false; try { client.Allocate(allocateRequest); } catch (InvalidResourceBlacklistRequestException) { error = true; } rm.Stop(); NUnit.Framework.Assert.IsTrue("Didn't not catch InvalidResourceBlacklistRequestException" , error); }
/* * @throw <code>InvalidResourceBlacklistRequestException </code> if the * resource is not able to be added to the blacklist. */ /// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.InvalidResourceBlacklistRequestException /// "/> public static void ValidateBlacklistRequest(ResourceBlacklistRequest blacklistRequest ) { if (blacklistRequest != null) { IList <string> plus = blacklistRequest.GetBlacklistAdditions(); if (plus != null && plus.Contains(ResourceRequest.Any)) { throw new InvalidResourceBlacklistRequestException("Cannot add " + ResourceRequest .Any + " to the blacklist!"); } } }
public override ResourceBlacklistRequest GetResourceBlacklistRequest() { YarnServiceProtos.AllocateRequestProtoOrBuilder p = viaProto ? proto : builder; if (this.blacklistRequest != null) { return(this.blacklistRequest); } if (!p.HasBlacklistRequest()) { return(null); } this.blacklistRequest = ConvertFromProtoFormat(p.GetBlacklistRequest()); return(this.blacklistRequest); }
/// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> /// <exception cref="System.IO.IOException"/> protected internal virtual AllocateResponse MakeRemoteRequest() { ApplyRequestLimits(); ResourceBlacklistRequest blacklistRequest = ResourceBlacklistRequest.NewInstance( new AList <string>(blacklistAdditions), new AList <string>(blacklistRemovals)); AllocateRequest allocateRequest = AllocateRequest.NewInstance(lastResponseID, base .GetApplicationProgress(), new AList <ResourceRequest>(ask), new AList <ContainerId >(release), blacklistRequest); AllocateResponse allocateResponse = scheduler.Allocate(allocateRequest); lastResponseID = allocateResponse.GetResponseId(); availableResources = allocateResponse.GetAvailableResources(); lastClusterNmCount = clusterNmCount; clusterNmCount = allocateResponse.GetNumClusterNodes(); int numCompletedContainers = allocateResponse.GetCompletedContainersStatuses().Count; if (ask.Count > 0 || release.Count > 0) { Log.Info("getResources() for " + applicationId + ":" + " ask=" + ask.Count + " release= " + release.Count + " newContainers=" + allocateResponse.GetAllocatedContainers() .Count + " finishedContainers=" + numCompletedContainers + " resourcelimit=" + availableResources + " knownNMs=" + clusterNmCount); } ask.Clear(); release.Clear(); if (numCompletedContainers > 0) { // re-send limited requests when a container completes to trigger asking // for more containers Sharpen.Collections.AddAll(requestLimitsToUpdate, requestLimits.Keys); } if (blacklistAdditions.Count > 0 || blacklistRemovals.Count > 0) { Log.Info("Update the blacklist for " + applicationId + ": blacklistAdditions=" + blacklistAdditions.Count + " blacklistRemovals=" + blacklistRemovals.Count); } blacklistAdditions.Clear(); blacklistRemovals.Clear(); return(allocateResponse); }
/// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> /// <exception cref="System.IO.IOException"/> public virtual void TestAllocateOnHA() { AllocateRequest request = AllocateRequest.NewInstance(0, 50f, new AList <ResourceRequest >(), new AList <ContainerId>(), ResourceBlacklistRequest.NewInstance(new AList <string >(), new AList <string>())); AllocateResponse response = amClient.Allocate(request); NUnit.Framework.Assert.AreEqual(response, this.cluster.CreateFakeAllocateResponse ()); }
public static AllocateRequest NewInstance(int responseID, float appProgress, IList <ResourceRequest> resourceAsk, IList <ContainerId> containersToBeReleased, ResourceBlacklistRequest resourceBlacklistRequest, IList <ContainerResourceIncreaseRequest> increaseRequests ) { AllocateRequest allocateRequest = Org.Apache.Hadoop.Yarn.Util.Records.NewRecord <AllocateRequest >(); allocateRequest.SetResponseId(responseID); allocateRequest.SetProgress(appProgress); allocateRequest.SetAskList(resourceAsk); allocateRequest.SetReleaseList(containersToBeReleased); allocateRequest.SetResourceBlacklistRequest(resourceBlacklistRequest); allocateRequest.SetIncreaseRequests(increaseRequests); return(allocateRequest); }
public static AllocateRequest NewInstance(int responseID, float appProgress, IList <ResourceRequest> resourceAsk, IList <ContainerId> containersToBeReleased, ResourceBlacklistRequest resourceBlacklistRequest) { return(NewInstance(responseID, appProgress, resourceAsk, containersToBeReleased, resourceBlacklistRequest, null)); }
public abstract void SetResourceBlacklistRequest(ResourceBlacklistRequest resourceBlacklistRequest );
/// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> /// <exception cref="System.IO.IOException"/> public virtual AllocateResponse Allocate(AllocateRequest request) { AMRMTokenIdentifier amrmTokenIdentifier = AuthorizeRequest(); ApplicationAttemptId appAttemptId = amrmTokenIdentifier.GetApplicationAttemptId(); ApplicationId applicationId = appAttemptId.GetApplicationId(); this.amLivelinessMonitor.ReceivedPing(appAttemptId); /* check if its in cache */ ApplicationMasterService.AllocateResponseLock Lock = responseMap[appAttemptId]; if (Lock == null) { string message = "Application attempt " + appAttemptId + " doesn't exist in ApplicationMasterService cache."; Log.Error(message); throw new ApplicationAttemptNotFoundException(message); } lock (Lock) { AllocateResponse lastResponse = Lock.GetAllocateResponse(); if (!HasApplicationMasterRegistered(appAttemptId)) { string message = "AM is not registered for known application attempt: " + appAttemptId + " or RM had restarted after AM registered . AM should re-register."; Log.Info(message); RMAuditLogger.LogFailure(this.rmContext.GetRMApps()[appAttemptId.GetApplicationId ()].GetUser(), RMAuditLogger.AuditConstants.AmAllocate, string.Empty, "ApplicationMasterService" , message, applicationId, appAttemptId); throw new ApplicationMasterNotRegisteredException(message); } if ((request.GetResponseId() + 1) == lastResponse.GetResponseId()) { /* old heartbeat */ return(lastResponse); } else { if (request.GetResponseId() + 1 < lastResponse.GetResponseId()) { string message = "Invalid responseId in AllocateRequest from application attempt: " + appAttemptId + ", expect responseId to be " + (lastResponse.GetResponseId() + 1); throw new InvalidApplicationMasterRequestException(message); } } //filter illegal progress values float filteredProgress = request.GetProgress(); if (float.IsNaN(filteredProgress) || filteredProgress == float.NegativeInfinity || filteredProgress < 0) { request.SetProgress(0); } else { if (filteredProgress > 1 || filteredProgress == float.PositiveInfinity) { request.SetProgress(1); } } // Send the status update to the appAttempt. this.rmContext.GetDispatcher().GetEventHandler().Handle(new RMAppAttemptStatusupdateEvent (appAttemptId, request.GetProgress())); IList <ResourceRequest> ask = request.GetAskList(); IList <ContainerId> release = request.GetReleaseList(); ResourceBlacklistRequest blacklistRequest = request.GetResourceBlacklistRequest(); IList <string> blacklistAdditions = (blacklistRequest != null) ? blacklistRequest. GetBlacklistAdditions() : Sharpen.Collections.EmptyList; IList <string> blacklistRemovals = (blacklistRequest != null) ? blacklistRequest.GetBlacklistRemovals () : Sharpen.Collections.EmptyList; RMApp app = this.rmContext.GetRMApps()[applicationId]; // set label expression for Resource Requests if resourceName=ANY ApplicationSubmissionContext asc = app.GetApplicationSubmissionContext(); foreach (ResourceRequest req in ask) { if (null == req.GetNodeLabelExpression() && ResourceRequest.Any.Equals(req.GetResourceName ())) { req.SetNodeLabelExpression(asc.GetNodeLabelExpression()); } } // sanity check try { RMServerUtils.NormalizeAndValidateRequests(ask, rScheduler.GetMaximumResourceCapability (), app.GetQueue(), rScheduler, rmContext); } catch (InvalidResourceRequestException e) { Log.Warn("Invalid resource ask by application " + appAttemptId, e); throw; } try { RMServerUtils.ValidateBlacklistRequest(blacklistRequest); } catch (InvalidResourceBlacklistRequestException e) { Log.Warn("Invalid blacklist request by application " + appAttemptId, e); throw; } // In the case of work-preserving AM restart, it's possible for the // AM to release containers from the earlier attempt. if (!app.GetApplicationSubmissionContext().GetKeepContainersAcrossApplicationAttempts ()) { try { RMServerUtils.ValidateContainerReleaseRequest(release, appAttemptId); } catch (InvalidContainerReleaseException e) { Log.Warn("Invalid container release by application " + appAttemptId, e); throw; } } // Send new requests to appAttempt. Allocation allocation = this.rScheduler.Allocate(appAttemptId, ask, release, blacklistAdditions , blacklistRemovals); if (!blacklistAdditions.IsEmpty() || !blacklistRemovals.IsEmpty()) { Log.Info("blacklist are updated in Scheduler." + "blacklistAdditions: " + blacklistAdditions + ", " + "blacklistRemovals: " + blacklistRemovals); } RMAppAttempt appAttempt = app.GetRMAppAttempt(appAttemptId); AllocateResponse allocateResponse = recordFactory.NewRecordInstance <AllocateResponse >(); if (!allocation.GetContainers().IsEmpty()) { allocateResponse.SetNMTokens(allocation.GetNMTokens()); } // update the response with the deltas of node status changes IList <RMNode> updatedNodes = new AList <RMNode>(); if (app.PullRMNodeUpdates(updatedNodes) > 0) { IList <NodeReport> updatedNodeReports = new AList <NodeReport>(); foreach (RMNode rmNode in updatedNodes) { SchedulerNodeReport schedulerNodeReport = rScheduler.GetNodeReport(rmNode.GetNodeID ()); Resource used = BuilderUtils.NewResource(0, 0); int numContainers = 0; if (schedulerNodeReport != null) { used = schedulerNodeReport.GetUsedResource(); numContainers = schedulerNodeReport.GetNumContainers(); } NodeId nodeId = rmNode.GetNodeID(); NodeReport report = BuilderUtils.NewNodeReport(nodeId, rmNode.GetState(), rmNode. GetHttpAddress(), rmNode.GetRackName(), used, rmNode.GetTotalCapability(), numContainers , rmNode.GetHealthReport(), rmNode.GetLastHealthReportTime(), rmNode.GetNodeLabels ()); updatedNodeReports.AddItem(report); } allocateResponse.SetUpdatedNodes(updatedNodeReports); } allocateResponse.SetAllocatedContainers(allocation.GetContainers()); allocateResponse.SetCompletedContainersStatuses(appAttempt.PullJustFinishedContainers ()); allocateResponse.SetResponseId(lastResponse.GetResponseId() + 1); allocateResponse.SetAvailableResources(allocation.GetResourceLimit()); allocateResponse.SetNumClusterNodes(this.rScheduler.GetNumClusterNodes()); // add preemption to the allocateResponse message (if any) allocateResponse.SetPreemptionMessage(GeneratePreemptionMessage(allocation)); // update AMRMToken if the token is rolled-up MasterKeyData nextMasterKey = this.rmContext.GetAMRMTokenSecretManager().GetNextMasterKeyData (); if (nextMasterKey != null && nextMasterKey.GetMasterKey().GetKeyId() != amrmTokenIdentifier .GetKeyId()) { RMAppAttemptImpl appAttemptImpl = (RMAppAttemptImpl)appAttempt; Org.Apache.Hadoop.Security.Token.Token <AMRMTokenIdentifier> amrmToken = appAttempt .GetAMRMToken(); if (nextMasterKey.GetMasterKey().GetKeyId() != appAttemptImpl.GetAMRMTokenKeyId()) { Log.Info("The AMRMToken has been rolled-over. Send new AMRMToken back" + " to application: " + applicationId); amrmToken = rmContext.GetAMRMTokenSecretManager().CreateAndGetAMRMToken(appAttemptId ); appAttemptImpl.SetAMRMToken(amrmToken); } allocateResponse.SetAMRMToken(Org.Apache.Hadoop.Yarn.Api.Records.Token.NewInstance (amrmToken.GetIdentifier(), amrmToken.GetKind().ToString(), amrmToken.GetPassword (), amrmToken.GetService().ToString())); } /* * As we are updating the response inside the lock object so we don't * need to worry about unregister call occurring in between (which * removes the lock object). */ Lock.SetAllocateResponse(allocateResponse); return(allocateResponse); } }
private YarnProtos.ResourceBlacklistRequestProto ConvertToProtoFormat(ResourceBlacklistRequest t) { return(((ResourceBlacklistRequestPBImpl)t).GetProto()); }
/// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> /// <exception cref="System.IO.IOException"/> public override AllocateResponse Allocate(float progressIndicator) { Preconditions.CheckArgument(progressIndicator >= 0, "Progress indicator should not be negative" ); AllocateResponse allocateResponse = null; IList <ResourceRequest> askList = null; IList <ContainerId> releaseList = null; AllocateRequest allocateRequest = null; IList <string> blacklistToAdd = new AList <string>(); IList <string> blacklistToRemove = new AList <string>(); try { lock (this) { askList = new AList <ResourceRequest>(ask.Count); foreach (ResourceRequest r in ask) { // create a copy of ResourceRequest as we might change it while the // RPC layer is using it to send info across askList.AddItem(ResourceRequest.NewInstance(r.GetPriority(), r.GetResourceName(), r.GetCapability(), r.GetNumContainers(), r.GetRelaxLocality(), r.GetNodeLabelExpression ())); } releaseList = new AList <ContainerId>(release); // optimistically clear this collection assuming no RPC failure ask.Clear(); release.Clear(); Sharpen.Collections.AddAll(blacklistToAdd, blacklistAdditions); Sharpen.Collections.AddAll(blacklistToRemove, blacklistRemovals); ResourceBlacklistRequest blacklistRequest = ResourceBlacklistRequest.NewInstance( blacklistToAdd, blacklistToRemove); allocateRequest = AllocateRequest.NewInstance(lastResponseId, progressIndicator, askList, releaseList, blacklistRequest); // clear blacklistAdditions and blacklistRemovals before // unsynchronized part blacklistAdditions.Clear(); blacklistRemovals.Clear(); } try { allocateResponse = rmClient.Allocate(allocateRequest); } catch (ApplicationMasterNotRegisteredException) { Log.Warn("ApplicationMaster is out of sync with ResourceManager," + " hence resyncing." ); lock (this) { Sharpen.Collections.AddAll(release, this.pendingRelease); Sharpen.Collections.AddAll(blacklistAdditions, this.blacklistedNodes); foreach (IDictionary <string, SortedDictionary <Resource, AMRMClientImpl.ResourceRequestInfo > > rr in remoteRequestsTable.Values) { foreach (IDictionary <Resource, AMRMClientImpl.ResourceRequestInfo> capabalities in rr.Values) { foreach (AMRMClientImpl.ResourceRequestInfo request in capabalities.Values) { AddResourceRequestToAsk(request.remoteRequest); } } } } // re register with RM RegisterApplicationMaster(); allocateResponse = Allocate(progressIndicator); return(allocateResponse); } lock (this) { // update these on successful RPC clusterNodeCount = allocateResponse.GetNumClusterNodes(); lastResponseId = allocateResponse.GetResponseId(); clusterAvailableResources = allocateResponse.GetAvailableResources(); if (!allocateResponse.GetNMTokens().IsEmpty()) { PopulateNMTokens(allocateResponse.GetNMTokens()); } if (allocateResponse.GetAMRMToken() != null) { UpdateAMRMToken(allocateResponse.GetAMRMToken()); } if (!pendingRelease.IsEmpty() && !allocateResponse.GetCompletedContainersStatuses ().IsEmpty()) { RemovePendingReleaseRequests(allocateResponse.GetCompletedContainersStatuses()); } } } finally { // TODO how to differentiate remote yarn exception vs error in rpc if (allocateResponse == null) { // we hit an exception in allocate() // preserve ask and release for next call to allocate() lock (this) { Sharpen.Collections.AddAll(release, releaseList); // requests could have been added or deleted during call to allocate // If requests were added/removed then there is nothing to do since // the ResourceRequest object in ask would have the actual new value. // If ask does not have this ResourceRequest then it was unchanged and // so we can add the value back safely. // This assumes that there will no concurrent calls to allocate() and // so we dont have to worry about ask being changed in the // synchronized block at the beginning of this method. foreach (ResourceRequest oldAsk in askList) { if (!ask.Contains(oldAsk)) { ask.AddItem(oldAsk); } } Sharpen.Collections.AddAll(blacklistAdditions, blacklistToAdd); Sharpen.Collections.AddAll(blacklistRemovals, blacklistToRemove); } } } return(allocateResponse); }