/// <exception cref="System.IO.IOException"/> public virtual void TransitionToStandby(HAServiceProtocol.StateChangeRequestInfo reqInfo) { lock (this) { // call refreshAdminAcls before HA state transition // for the case that adminAcls have been updated in previous active RM try { RefreshAdminAcls(false); } catch (YarnException ex) { throw new ServiceFailedException("Can not execute refreshAdminAcls", ex); } UserGroupInformation user = CheckAccess("transitionToStandby"); CheckHaStateChange(reqInfo); try { rm.TransitionToStandby(true); RMAuditLogger.LogSuccess(user.GetShortUserName(), "transitionToStandby", "RMHAProtocolService" ); } catch (Exception e) { RMAuditLogger.LogFailure(user.GetShortUserName(), "transitionToStandby", string.Empty , "RMHAProtocolService", "Exception transitioning to standby"); throw new ServiceFailedException("Error when transitioning to Standby mode", e); } } }
/// <summary>Test the AuditLog format for successful events.</summary> private void TestSuccessLogFormatHelper(bool checkIP, ApplicationId appId, ApplicationAttemptId attemptId, ContainerId containerId) { string sLog = RMAuditLogger.CreateSuccessLog(User, Operation, Target, appId, attemptId , containerId); StringBuilder expLog = new StringBuilder(); expLog.Append("USER=test\t"); if (checkIP) { IPAddress ip = Org.Apache.Hadoop.Ipc.Server.GetRemoteIp(); expLog.Append(RMAuditLogger.Keys.Ip.ToString() + "=" + ip.GetHostAddress() + "\t" ); } expLog.Append("OPERATION=oper\tTARGET=tgt\tRESULT=SUCCESS"); if (appId != null) { expLog.Append("\tAPPID=app_1"); } if (attemptId != null) { expLog.Append("\tAPPATTEMPTID=app_attempt_1"); } if (containerId != null) { expLog.Append("\tCONTAINERID=container_1"); } NUnit.Framework.Assert.AreEqual(expLog.ToString(), sLog); }
/// <summary>Test the AuditLog format for failure events.</summary> private void TestFailureLogFormatHelper(bool checkIP, ApplicationId appId, ApplicationAttemptId attemptId, ContainerId containerId) { string fLog = RMAuditLogger.CreateFailureLog(User, Operation, Perm, Target, Desc, appId, attemptId, containerId); StringBuilder expLog = new StringBuilder(); expLog.Append("USER=test\t"); if (checkIP) { IPAddress ip = Org.Apache.Hadoop.Ipc.Server.GetRemoteIp(); expLog.Append(RMAuditLogger.Keys.Ip.ToString() + "=" + ip.GetHostAddress() + "\t" ); } expLog.Append("OPERATION=oper\tTARGET=tgt\tRESULT=FAILURE\t"); expLog.Append("DESCRIPTION=description of an audit log"); expLog.Append("\tPERMISSIONS=admin group"); if (appId != null) { expLog.Append("\tAPPID=app_1"); } if (attemptId != null) { expLog.Append("\tAPPATTEMPTID=app_attempt_1"); } if (containerId != null) { expLog.Append("\tCONTAINERID=container_1"); } NUnit.Framework.Assert.AreEqual(expLog.ToString(), fLog); }
/// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> /// <exception cref="Org.Apache.Hadoop.Ipc.StandbyException"/> public virtual RefreshQueuesResponse RefreshQueues(RefreshQueuesRequest request) { string argName = "refreshQueues"; string msg = "refresh queues."; UserGroupInformation user = CheckAcls(argName); CheckRMStatus(user.GetShortUserName(), argName, msg); RefreshQueuesResponse response = recordFactory.NewRecordInstance <RefreshQueuesResponse >(); try { rmContext.GetScheduler().Reinitialize(GetConfig(), this.rmContext); // refresh the reservation system ReservationSystem rSystem = rmContext.GetReservationSystem(); if (rSystem != null) { rSystem.Reinitialize(GetConfig(), rmContext); } RMAuditLogger.LogSuccess(user.GetShortUserName(), argName, "AdminService"); return(response); } catch (IOException ioe) { throw LogAndWrapException(ioe, user.GetShortUserName(), argName, msg); } }
/// <summary> /// Utility method to verify if the current user has access based on the /// passed /// <see cref="Org.Apache.Hadoop.Security.Authorize.AccessControlList"/> /// </summary> /// <param name="authorizer"> /// the /// <see cref="Org.Apache.Hadoop.Security.Authorize.AccessControlList"/> /// to check against /// </param> /// <param name="method">the method name to be logged</param> /// <param name="module">like AdminService or NodeLabelManager</param> /// <param name="Log">the logger to use</param> /// <returns> /// /// <see cref="Org.Apache.Hadoop.Security.UserGroupInformation"/> /// of the current user /// </returns> /// <exception cref="System.IO.IOException"/> public static UserGroupInformation VerifyAdminAccess(YarnAuthorizationProvider authorizer , string method, string module, Log Log) { UserGroupInformation user; try { user = UserGroupInformation.GetCurrentUser(); } catch (IOException ioe) { Log.Warn("Couldn't get current user", ioe); RMAuditLogger.LogFailure("UNKNOWN", method, string.Empty, "AdminService", "Couldn't get current user" ); throw; } if (!authorizer.IsAdmin(user)) { Log.Warn("User " + user.GetShortUserName() + " doesn't have permission" + " to call '" + method + "'"); RMAuditLogger.LogFailure(user.GetShortUserName(), method, string.Empty, module, RMAuditLogger.AuditConstants .UnauthorizedUser); throw new AccessControlException("User " + user.GetShortUserName() + " doesn't have permission" + " to call '" + method + "'"); } if (Log.IsTraceEnabled()) { Log.Trace(method + " invoked by user " + user.GetShortUserName()); } return(user); }
/// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> private YarnException LogAndWrapException(IOException ioe, string user, string argName , string msg) { Log.Info("Exception " + msg, ioe); RMAuditLogger.LogFailure(user, argName, string.Empty, "AdminService", "Exception " + msg); return(RPCUtil.GetRemoteException(ioe)); }
/// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> /// <exception cref="System.IO.IOException"/> public virtual UpdateNodeResourceResponse UpdateNodeResource(UpdateNodeResourceRequest request) { string argName = "updateNodeResource"; UserGroupInformation user = CheckAcls(argName); CheckRMStatus(user.GetShortUserName(), argName, "update node resource."); IDictionary <NodeId, ResourceOption> nodeResourceMap = request.GetNodeResourceMap( ); ICollection <NodeId> nodeIds = nodeResourceMap.Keys; // verify nodes are all valid first. // if any invalid nodes, throw exception instead of partially updating // valid nodes. foreach (NodeId nodeId in nodeIds) { RMNode node = this.rmContext.GetRMNodes()[nodeId]; if (node == null) { Log.Error("Resource update get failed on all nodes due to change " + "resource on an unrecognized node: " + nodeId); throw RPCUtil.GetRemoteException("Resource update get failed on all nodes due to change resource " + "on an unrecognized node: " + nodeId); } } // do resource update on each node. // Notice: it is still possible to have invalid NodeIDs as nodes decommission // may happen just at the same time. This time, only log and skip absent // nodes without throwing any exceptions. bool allSuccess = true; foreach (KeyValuePair <NodeId, ResourceOption> entry in nodeResourceMap) { ResourceOption newResourceOption = entry.Value; NodeId nodeId_1 = entry.Key; RMNode node = this.rmContext.GetRMNodes()[nodeId_1]; if (node == null) { Log.Warn("Resource update get failed on an unrecognized node: " + nodeId_1); allSuccess = false; } else { // update resource to RMNode this.rmContext.GetDispatcher().GetEventHandler().Handle(new RMNodeResourceUpdateEvent (nodeId_1, newResourceOption)); Log.Info("Update resource on node(" + node.GetNodeID() + ") with resource(" + newResourceOption .ToString() + ")"); } } if (allSuccess) { RMAuditLogger.LogSuccess(user.GetShortUserName(), argName, "AdminService"); } UpdateNodeResourceResponse response = UpdateNodeResourceResponse.NewInstance(); return(response); }
/// <exception cref="Org.Apache.Hadoop.Ipc.StandbyException"/> private void CheckRMStatus(string user, string argName, string msg) { if (!IsRMActive()) { RMAuditLogger.LogFailure(user, argName, string.Empty, "AdminService", "ResourceManager is not active. Can not " + msg); ThrowStandbyException(); } }
/// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> /// <exception cref="System.IO.IOException"/> public virtual RefreshUserToGroupsMappingsResponse RefreshUserToGroupsMappings(RefreshUserToGroupsMappingsRequest request) { string argName = "refreshUserToGroupsMappings"; UserGroupInformation user = CheckAcls(argName); CheckRMStatus(user.GetShortUserName(), argName, "refresh user-groups."); Groups.GetUserToGroupsMappingService(GetConfiguration(new Configuration(false), YarnConfiguration .CoreSiteConfigurationFile)).Refresh(); RMAuditLogger.LogSuccess(user.GetShortUserName(), argName, "AdminService"); return(recordFactory.NewRecordInstance <RefreshUserToGroupsMappingsResponse>()); }
/// <summary>Test the AuditLog format for successful events passing nulls.</summary> private void TestSuccessLogNulls(bool checkIP) { string sLog = RMAuditLogger.CreateSuccessLog(null, null, null, null, null, null); StringBuilder expLog = new StringBuilder(); expLog.Append("USER=null\t"); if (checkIP) { IPAddress ip = Org.Apache.Hadoop.Ipc.Server.GetRemoteIp(); expLog.Append(RMAuditLogger.Keys.Ip.ToString() + "=" + ip.GetHostAddress() + "\t" ); } expLog.Append("OPERATION=null\tTARGET=null\tRESULT=SUCCESS"); NUnit.Framework.Assert.AreEqual(expLog.ToString(), sLog); }
protected internal virtual void WriteAuditLog(ApplicationId appId) { RMApp app = rmContext.GetRMApps()[appId]; string operation = "UNKONWN"; bool success = false; switch (app.GetState()) { case RMAppState.Failed: { operation = RMAuditLogger.AuditConstants.FinishFailedApp; break; } case RMAppState.Finished: { operation = RMAuditLogger.AuditConstants.FinishSuccessApp; success = true; break; } case RMAppState.Killed: { operation = RMAuditLogger.AuditConstants.FinishKilledApp; success = true; break; } default: { break; } } if (success) { RMAuditLogger.LogSuccess(app.GetUser(), operation, "RMAppManager", app.GetApplicationId ()); } else { StringBuilder diag = app.GetDiagnostics(); string msg = diag == null ? null : diag.ToString(); RMAuditLogger.LogFailure(app.GetUser(), operation, msg, "RMAppManager", "App failed with state: " + app.GetState(), appId); } }
/// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> /// <exception cref="System.IO.IOException"/> private RefreshAdminAclsResponse RefreshAdminAcls(bool checkRMHAState) { string argName = "refreshAdminAcls"; UserGroupInformation user = CheckAcls(argName); if (checkRMHAState) { CheckRMStatus(user.GetShortUserName(), argName, "refresh Admin ACLs."); } Configuration conf = GetConfiguration(new Configuration(false), YarnConfiguration .YarnSiteConfigurationFile); authorizer.SetAdmins(GetAdminAclList(conf), UserGroupInformation.GetCurrentUser() ); RMAuditLogger.LogSuccess(user.GetShortUserName(), argName, "AdminService"); return(recordFactory.NewRecordInstance <RefreshAdminAclsResponse>()); }
/// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> /// <exception cref="System.IO.IOException"/> public virtual RefreshSuperUserGroupsConfigurationResponse RefreshSuperUserGroupsConfiguration (RefreshSuperUserGroupsConfigurationRequest request) { string argName = "refreshSuperUserGroupsConfiguration"; UserGroupInformation user = CheckAcls(argName); CheckRMStatus(user.GetShortUserName(), argName, "refresh super-user-groups."); // Accept hadoop common configs in core-site.xml as well as RM specific // configurations in yarn-site.xml Configuration conf = GetConfiguration(new Configuration(false), YarnConfiguration .CoreSiteConfigurationFile, YarnConfiguration.YarnSiteConfigurationFile); RMServerUtils.ProcessRMProxyUsersConf(conf); ProxyUsers.RefreshSuperUserGroupsConfiguration(conf); RMAuditLogger.LogSuccess(user.GetShortUserName(), argName, "AdminService"); return(recordFactory.NewRecordInstance <RefreshSuperUserGroupsConfigurationResponse >()); }
/// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> /// <exception cref="System.IO.IOException"/> public virtual FinishApplicationMasterResponse FinishApplicationMaster(FinishApplicationMasterRequest request) { ApplicationAttemptId applicationAttemptId = AuthorizeRequest().GetApplicationAttemptId (); ApplicationId appId = applicationAttemptId.GetApplicationId(); RMApp rmApp = rmContext.GetRMApps()[applicationAttemptId.GetApplicationId()]; // checking whether the app exits in RMStateStore at first not to throw // ApplicationDoesNotExistInCacheException before and after // RM work-preserving restart. if (rmApp.IsAppFinalStateStored()) { Log.Info(rmApp.GetApplicationId() + " unregistered successfully. "); return(FinishApplicationMasterResponse.NewInstance(true)); } ApplicationMasterService.AllocateResponseLock Lock = responseMap[applicationAttemptId ]; if (Lock == null) { ThrowApplicationDoesNotExistInCacheException(applicationAttemptId); } // Allow only one thread in AM to do finishApp at a time. lock (Lock) { if (!HasApplicationMasterRegistered(applicationAttemptId)) { string message = "Application Master is trying to unregister before registering for: " + appId; Log.Error(message); RMAuditLogger.LogFailure(this.rmContext.GetRMApps()[appId].GetUser(), RMAuditLogger.AuditConstants .UnregisterAm, string.Empty, "ApplicationMasterService", message, appId, applicationAttemptId ); throw new ApplicationMasterNotRegisteredException(message); } this.amLivelinessMonitor.ReceivedPing(applicationAttemptId); rmContext.GetDispatcher().GetEventHandler().Handle(new RMAppAttemptUnregistrationEvent (applicationAttemptId, request.GetTrackingUrl(), request.GetFinalApplicationStatus (), request.GetDiagnostics())); // For UnmanagedAMs, return true so they don't retry return(FinishApplicationMasterResponse.NewInstance(rmApp.GetApplicationSubmissionContext ().GetUnmanagedAM())); } }
/// <exception cref="System.IO.IOException"/> public virtual void TransitionToActive(HAServiceProtocol.StateChangeRequestInfo reqInfo ) { lock (this) { // call refreshAdminAcls before HA state transition // for the case that adminAcls have been updated in previous active RM try { RefreshAdminAcls(false); } catch (YarnException ex) { throw new ServiceFailedException("Can not execute refreshAdminAcls", ex); } UserGroupInformation user = CheckAccess("transitionToActive"); CheckHaStateChange(reqInfo); try { rm.TransitionToActive(); } catch (Exception e) { RMAuditLogger.LogFailure(user.GetShortUserName(), "transitionToActive", string.Empty , "RMHAProtocolService", "Exception transitioning to active"); throw new ServiceFailedException("Error when transitioning to Active mode", e); } try { // call all refresh*s for active RM to get the updated configurations. RefreshAll(); } catch (Exception e) { Log.Error("RefreshAll failed so firing fatal event", e); rmContext.GetDispatcher().GetEventHandler().Handle(new RMFatalEvent(RMFatalEventType .TransitionToActiveFailed, e)); throw new ServiceFailedException("Error on refreshAll during transistion to Active" , e); } RMAuditLogger.LogSuccess(user.GetShortUserName(), "transitionToActive", "RMHAProtocolService" ); } }
/// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> /// <exception cref="Org.Apache.Hadoop.Ipc.StandbyException"/> public virtual RefreshNodesResponse RefreshNodes(RefreshNodesRequest request) { string argName = "refreshNodes"; string msg = "refresh nodes."; UserGroupInformation user = CheckAcls("refreshNodes"); CheckRMStatus(user.GetShortUserName(), argName, msg); try { Configuration conf = GetConfiguration(new Configuration(false), YarnConfiguration .YarnSiteConfigurationFile); rmContext.GetNodesListManager().RefreshNodes(conf); RMAuditLogger.LogSuccess(user.GetShortUserName(), argName, "AdminService"); return(recordFactory.NewRecordInstance <RefreshNodesResponse>()); } catch (IOException ioe) { throw LogAndWrapException(ioe, user.GetShortUserName(), argName, msg); } }
/// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> /// <exception cref="System.IO.IOException"/> public virtual ReplaceLabelsOnNodeResponse ReplaceLabelsOnNode(ReplaceLabelsOnNodeRequest request) { string argName = "replaceLabelsOnNode"; string msg = "set node to labels."; UserGroupInformation user = CheckAcls(argName); CheckRMStatus(user.GetShortUserName(), argName, msg); ReplaceLabelsOnNodeResponse response = recordFactory.NewRecordInstance <ReplaceLabelsOnNodeResponse >(); try { rmContext.GetNodeLabelManager().ReplaceLabelsOnNode(request.GetNodeToLabels()); RMAuditLogger.LogSuccess(user.GetShortUserName(), argName, "AdminService"); return(response); } catch (IOException ioe) { throw LogAndWrapException(ioe, user.GetShortUserName(), argName, msg); } }
/// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> /// <exception cref="System.IO.IOException"/> public virtual AddToClusterNodeLabelsResponse AddToClusterNodeLabels(AddToClusterNodeLabelsRequest request) { string argName = "addToClusterNodeLabels"; string msg = "add labels."; UserGroupInformation user = CheckAcls(argName); CheckRMStatus(user.GetShortUserName(), argName, msg); AddToClusterNodeLabelsResponse response = recordFactory.NewRecordInstance <AddToClusterNodeLabelsResponse >(); try { rmContext.GetNodeLabelManager().AddToCluserNodeLabels(request.GetNodeLabels()); RMAuditLogger.LogSuccess(user.GetShortUserName(), argName, "AdminService"); return(response); } catch (IOException ioe) { throw LogAndWrapException(ioe, user.GetShortUserName(), argName, msg); } }
public virtual void TestKeyValLogFormat() { StringBuilder actLog = new StringBuilder(); StringBuilder expLog = new StringBuilder(); // add the first k=v pair and check RMAuditLogger.Start(RMAuditLogger.Keys.User, User, actLog); expLog.Append("USER=test"); NUnit.Framework.Assert.AreEqual(expLog.ToString(), actLog.ToString()); // append another k1=v1 pair to already added k=v and test RMAuditLogger.Add(RMAuditLogger.Keys.Operation, Operation, actLog); expLog.Append("\tOPERATION=oper"); NUnit.Framework.Assert.AreEqual(expLog.ToString(), actLog.ToString()); // append another k1=null pair and test RMAuditLogger.Add(RMAuditLogger.Keys.Appid, (string)null, actLog); expLog.Append("\tAPPID=null"); NUnit.Framework.Assert.AreEqual(expLog.ToString(), actLog.ToString()); // now add the target and check of the final string RMAuditLogger.Add(RMAuditLogger.Keys.Target, Target, actLog); expLog.Append("\tTARGET=tgt"); NUnit.Framework.Assert.AreEqual(expLog.ToString(), actLog.ToString()); }
/// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> /// <exception cref="System.IO.IOException"/> public virtual RefreshServiceAclsResponse RefreshServiceAcls(RefreshServiceAclsRequest request) { if (!GetConfig().GetBoolean(CommonConfigurationKeysPublic.HadoopSecurityAuthorization , false)) { throw RPCUtil.GetRemoteException(new IOException("Service Authorization (" + CommonConfigurationKeysPublic .HadoopSecurityAuthorization + ") not enabled.")); } string argName = "refreshServiceAcls"; UserGroupInformation user = CheckAcls(argName); CheckRMStatus(user.GetShortUserName(), argName, "refresh Service ACLs."); PolicyProvider policyProvider = RMPolicyProvider.GetInstance(); Configuration conf = GetConfiguration(new Configuration(false), YarnConfiguration .HadoopPolicyConfigurationFile); RefreshServiceAcls(conf, policyProvider); rmContext.GetClientRMService().RefreshServiceAcls(conf, policyProvider); rmContext.GetApplicationMasterService().RefreshServiceAcls(conf, policyProvider); rmContext.GetResourceTrackerService().RefreshServiceAcls(conf, policyProvider); RMAuditLogger.LogSuccess(user.GetShortUserName(), argName, "AdminService"); return(recordFactory.NewRecordInstance <RefreshServiceAclsResponse>()); }
/// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> /// <exception cref="System.IO.IOException"/> public virtual AllocateResponse Allocate(AllocateRequest request) { AMRMTokenIdentifier amrmTokenIdentifier = AuthorizeRequest(); ApplicationAttemptId appAttemptId = amrmTokenIdentifier.GetApplicationAttemptId(); ApplicationId applicationId = appAttemptId.GetApplicationId(); this.amLivelinessMonitor.ReceivedPing(appAttemptId); /* check if its in cache */ ApplicationMasterService.AllocateResponseLock Lock = responseMap[appAttemptId]; if (Lock == null) { string message = "Application attempt " + appAttemptId + " doesn't exist in ApplicationMasterService cache."; Log.Error(message); throw new ApplicationAttemptNotFoundException(message); } lock (Lock) { AllocateResponse lastResponse = Lock.GetAllocateResponse(); if (!HasApplicationMasterRegistered(appAttemptId)) { string message = "AM is not registered for known application attempt: " + appAttemptId + " or RM had restarted after AM registered . AM should re-register."; Log.Info(message); RMAuditLogger.LogFailure(this.rmContext.GetRMApps()[appAttemptId.GetApplicationId ()].GetUser(), RMAuditLogger.AuditConstants.AmAllocate, string.Empty, "ApplicationMasterService" , message, applicationId, appAttemptId); throw new ApplicationMasterNotRegisteredException(message); } if ((request.GetResponseId() + 1) == lastResponse.GetResponseId()) { /* old heartbeat */ return(lastResponse); } else { if (request.GetResponseId() + 1 < lastResponse.GetResponseId()) { string message = "Invalid responseId in AllocateRequest from application attempt: " + appAttemptId + ", expect responseId to be " + (lastResponse.GetResponseId() + 1); throw new InvalidApplicationMasterRequestException(message); } } //filter illegal progress values float filteredProgress = request.GetProgress(); if (float.IsNaN(filteredProgress) || filteredProgress == float.NegativeInfinity || filteredProgress < 0) { request.SetProgress(0); } else { if (filteredProgress > 1 || filteredProgress == float.PositiveInfinity) { request.SetProgress(1); } } // Send the status update to the appAttempt. this.rmContext.GetDispatcher().GetEventHandler().Handle(new RMAppAttemptStatusupdateEvent (appAttemptId, request.GetProgress())); IList <ResourceRequest> ask = request.GetAskList(); IList <ContainerId> release = request.GetReleaseList(); ResourceBlacklistRequest blacklistRequest = request.GetResourceBlacklistRequest(); IList <string> blacklistAdditions = (blacklistRequest != null) ? blacklistRequest. GetBlacklistAdditions() : Sharpen.Collections.EmptyList; IList <string> blacklistRemovals = (blacklistRequest != null) ? blacklistRequest.GetBlacklistRemovals () : Sharpen.Collections.EmptyList; RMApp app = this.rmContext.GetRMApps()[applicationId]; // set label expression for Resource Requests if resourceName=ANY ApplicationSubmissionContext asc = app.GetApplicationSubmissionContext(); foreach (ResourceRequest req in ask) { if (null == req.GetNodeLabelExpression() && ResourceRequest.Any.Equals(req.GetResourceName ())) { req.SetNodeLabelExpression(asc.GetNodeLabelExpression()); } } // sanity check try { RMServerUtils.NormalizeAndValidateRequests(ask, rScheduler.GetMaximumResourceCapability (), app.GetQueue(), rScheduler, rmContext); } catch (InvalidResourceRequestException e) { Log.Warn("Invalid resource ask by application " + appAttemptId, e); throw; } try { RMServerUtils.ValidateBlacklistRequest(blacklistRequest); } catch (InvalidResourceBlacklistRequestException e) { Log.Warn("Invalid blacklist request by application " + appAttemptId, e); throw; } // In the case of work-preserving AM restart, it's possible for the // AM to release containers from the earlier attempt. if (!app.GetApplicationSubmissionContext().GetKeepContainersAcrossApplicationAttempts ()) { try { RMServerUtils.ValidateContainerReleaseRequest(release, appAttemptId); } catch (InvalidContainerReleaseException e) { Log.Warn("Invalid container release by application " + appAttemptId, e); throw; } } // Send new requests to appAttempt. Allocation allocation = this.rScheduler.Allocate(appAttemptId, ask, release, blacklistAdditions , blacklistRemovals); if (!blacklistAdditions.IsEmpty() || !blacklistRemovals.IsEmpty()) { Log.Info("blacklist are updated in Scheduler." + "blacklistAdditions: " + blacklistAdditions + ", " + "blacklistRemovals: " + blacklistRemovals); } RMAppAttempt appAttempt = app.GetRMAppAttempt(appAttemptId); AllocateResponse allocateResponse = recordFactory.NewRecordInstance <AllocateResponse >(); if (!allocation.GetContainers().IsEmpty()) { allocateResponse.SetNMTokens(allocation.GetNMTokens()); } // update the response with the deltas of node status changes IList <RMNode> updatedNodes = new AList <RMNode>(); if (app.PullRMNodeUpdates(updatedNodes) > 0) { IList <NodeReport> updatedNodeReports = new AList <NodeReport>(); foreach (RMNode rmNode in updatedNodes) { SchedulerNodeReport schedulerNodeReport = rScheduler.GetNodeReport(rmNode.GetNodeID ()); Resource used = BuilderUtils.NewResource(0, 0); int numContainers = 0; if (schedulerNodeReport != null) { used = schedulerNodeReport.GetUsedResource(); numContainers = schedulerNodeReport.GetNumContainers(); } NodeId nodeId = rmNode.GetNodeID(); NodeReport report = BuilderUtils.NewNodeReport(nodeId, rmNode.GetState(), rmNode. GetHttpAddress(), rmNode.GetRackName(), used, rmNode.GetTotalCapability(), numContainers , rmNode.GetHealthReport(), rmNode.GetLastHealthReportTime(), rmNode.GetNodeLabels ()); updatedNodeReports.AddItem(report); } allocateResponse.SetUpdatedNodes(updatedNodeReports); } allocateResponse.SetAllocatedContainers(allocation.GetContainers()); allocateResponse.SetCompletedContainersStatuses(appAttempt.PullJustFinishedContainers ()); allocateResponse.SetResponseId(lastResponse.GetResponseId() + 1); allocateResponse.SetAvailableResources(allocation.GetResourceLimit()); allocateResponse.SetNumClusterNodes(this.rScheduler.GetNumClusterNodes()); // add preemption to the allocateResponse message (if any) allocateResponse.SetPreemptionMessage(GeneratePreemptionMessage(allocation)); // update AMRMToken if the token is rolled-up MasterKeyData nextMasterKey = this.rmContext.GetAMRMTokenSecretManager().GetNextMasterKeyData (); if (nextMasterKey != null && nextMasterKey.GetMasterKey().GetKeyId() != amrmTokenIdentifier .GetKeyId()) { RMAppAttemptImpl appAttemptImpl = (RMAppAttemptImpl)appAttempt; Org.Apache.Hadoop.Security.Token.Token <AMRMTokenIdentifier> amrmToken = appAttempt .GetAMRMToken(); if (nextMasterKey.GetMasterKey().GetKeyId() != appAttemptImpl.GetAMRMTokenKeyId()) { Log.Info("The AMRMToken has been rolled-over. Send new AMRMToken back" + " to application: " + applicationId); amrmToken = rmContext.GetAMRMTokenSecretManager().CreateAndGetAMRMToken(appAttemptId ); appAttemptImpl.SetAMRMToken(amrmToken); } allocateResponse.SetAMRMToken(Org.Apache.Hadoop.Yarn.Api.Records.Token.NewInstance (amrmToken.GetIdentifier(), amrmToken.GetKind().ToString(), amrmToken.GetPassword (), amrmToken.GetService().ToString())); } /* * As we are updating the response inside the lock object so we don't * need to worry about unregister call occurring in between (which * removes the lock object). */ Lock.SetAllocateResponse(allocateResponse); return(allocateResponse); } }
/// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> /// <exception cref="System.IO.IOException"/> public virtual RegisterApplicationMasterResponse RegisterApplicationMaster(RegisterApplicationMasterRequest request) { AMRMTokenIdentifier amrmTokenIdentifier = AuthorizeRequest(); ApplicationAttemptId applicationAttemptId = amrmTokenIdentifier.GetApplicationAttemptId (); ApplicationId appID = applicationAttemptId.GetApplicationId(); ApplicationMasterService.AllocateResponseLock Lock = responseMap[applicationAttemptId ]; if (Lock == null) { RMAuditLogger.LogFailure(this.rmContext.GetRMApps()[appID].GetUser(), RMAuditLogger.AuditConstants .RegisterAm, "Application doesn't exist in cache " + applicationAttemptId, "ApplicationMasterService" , "Error in registering application master", appID, applicationAttemptId); ThrowApplicationDoesNotExistInCacheException(applicationAttemptId); } // Allow only one thread in AM to do registerApp at a time. lock (Lock) { AllocateResponse lastResponse = Lock.GetAllocateResponse(); if (HasApplicationMasterRegistered(applicationAttemptId)) { string message = "Application Master is already registered : " + appID; Log.Warn(message); RMAuditLogger.LogFailure(this.rmContext.GetRMApps()[appID].GetUser(), RMAuditLogger.AuditConstants .RegisterAm, string.Empty, "ApplicationMasterService", message, appID, applicationAttemptId ); throw new InvalidApplicationMasterRequestException(message); } this.amLivelinessMonitor.ReceivedPing(applicationAttemptId); RMApp app = this.rmContext.GetRMApps()[appID]; // Setting the response id to 0 to identify if the // application master is register for the respective attemptid lastResponse.SetResponseId(0); Lock.SetAllocateResponse(lastResponse); Log.Info("AM registration " + applicationAttemptId); this.rmContext.GetDispatcher().GetEventHandler().Handle(new RMAppAttemptRegistrationEvent (applicationAttemptId, request.GetHost(), request.GetRpcPort(), request.GetTrackingUrl ())); RMAuditLogger.LogSuccess(app.GetUser(), RMAuditLogger.AuditConstants.RegisterAm, "ApplicationMasterService", appID, applicationAttemptId); // Pick up min/max resource from scheduler... RegisterApplicationMasterResponse response = recordFactory.NewRecordInstance <RegisterApplicationMasterResponse >(); response.SetMaximumResourceCapability(rScheduler.GetMaximumResourceCapability(app .GetQueue())); response.SetApplicationACLs(app.GetRMAppAttempt(applicationAttemptId).GetSubmissionContext ().GetAMContainerSpec().GetApplicationACLs()); response.SetQueue(app.GetQueue()); if (UserGroupInformation.IsSecurityEnabled()) { Log.Info("Setting client token master key"); response.SetClientToAMTokenMasterKey(ByteBuffer.Wrap(rmContext.GetClientToAMTokenSecretManager ().GetMasterKey(applicationAttemptId).GetEncoded())); } // For work-preserving AM restart, retrieve previous attempts' containers // and corresponding NM tokens. if (app.GetApplicationSubmissionContext().GetKeepContainersAcrossApplicationAttempts ()) { IList <Container> transferredContainers = ((AbstractYarnScheduler)rScheduler).GetTransferredContainers (applicationAttemptId); if (!transferredContainers.IsEmpty()) { response.SetContainersFromPreviousAttempts(transferredContainers); IList <NMToken> nmTokens = new AList <NMToken>(); foreach (Container container in transferredContainers) { try { NMToken token = rmContext.GetNMTokenSecretManager().CreateAndGetNMToken(app.GetUser (), applicationAttemptId, container); if (null != token) { nmTokens.AddItem(token); } } catch (ArgumentException e) { // if it's a DNS issue, throw UnknowHostException directly and // that // will be automatically retried by RMProxy in RPC layer. if (e.InnerException is UnknownHostException) { throw (UnknownHostException)e.InnerException; } } } response.SetNMTokensFromPreviousAttempts(nmTokens); Log.Info("Application " + appID + " retrieved " + transferredContainers.Count + " containers from previous" + " attempts and " + nmTokens.Count + " NM tokens."); } } response.SetSchedulerResourceTypes(rScheduler.GetSchedulingResourceTypes()); return(response); } }