예제 #1
0
        /// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/>
        /// <exception cref="Org.Apache.Hadoop.Ipc.StandbyException"/>
        public virtual RefreshQueuesResponse RefreshQueues(RefreshQueuesRequest request)
        {
            string argName            = "refreshQueues";
            string msg                = "refresh queues.";
            UserGroupInformation user = CheckAcls(argName);

            CheckRMStatus(user.GetShortUserName(), argName, msg);
            RefreshQueuesResponse response = recordFactory.NewRecordInstance <RefreshQueuesResponse
                                                                              >();

            try
            {
                rmContext.GetScheduler().Reinitialize(GetConfig(), this.rmContext);
                // refresh the reservation system
                ReservationSystem rSystem = rmContext.GetReservationSystem();
                if (rSystem != null)
                {
                    rSystem.Reinitialize(GetConfig(), rmContext);
                }
                RMAuditLogger.LogSuccess(user.GetShortUserName(), argName, "AdminService");
                return(response);
            }
            catch (IOException ioe)
            {
                throw LogAndWrapException(ioe, user.GetShortUserName(), argName, msg);
            }
        }
예제 #2
0
 /// <exception cref="System.IO.IOException"/>
 public virtual void TransitionToStandby(HAServiceProtocol.StateChangeRequestInfo
                                         reqInfo)
 {
     lock (this)
     {
         // call refreshAdminAcls before HA state transition
         // for the case that adminAcls have been updated in previous active RM
         try
         {
             RefreshAdminAcls(false);
         }
         catch (YarnException ex)
         {
             throw new ServiceFailedException("Can not execute refreshAdminAcls", ex);
         }
         UserGroupInformation user = CheckAccess("transitionToStandby");
         CheckHaStateChange(reqInfo);
         try
         {
             rm.TransitionToStandby(true);
             RMAuditLogger.LogSuccess(user.GetShortUserName(), "transitionToStandby", "RMHAProtocolService"
                                      );
         }
         catch (Exception e)
         {
             RMAuditLogger.LogFailure(user.GetShortUserName(), "transitionToStandby", string.Empty
                                      , "RMHAProtocolService", "Exception transitioning to standby");
             throw new ServiceFailedException("Error when transitioning to Standby mode", e);
         }
     }
 }
예제 #3
0
        /// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/>
        /// <exception cref="System.IO.IOException"/>
        public virtual UpdateNodeResourceResponse UpdateNodeResource(UpdateNodeResourceRequest
                                                                     request)
        {
            string argName            = "updateNodeResource";
            UserGroupInformation user = CheckAcls(argName);

            CheckRMStatus(user.GetShortUserName(), argName, "update node resource.");
            IDictionary <NodeId, ResourceOption> nodeResourceMap = request.GetNodeResourceMap(
                );
            ICollection <NodeId> nodeIds = nodeResourceMap.Keys;

            // verify nodes are all valid first.
            // if any invalid nodes, throw exception instead of partially updating
            // valid nodes.
            foreach (NodeId nodeId in nodeIds)
            {
                RMNode node = this.rmContext.GetRMNodes()[nodeId];
                if (node == null)
                {
                    Log.Error("Resource update get failed on all nodes due to change " + "resource on an unrecognized node: "
                              + nodeId);
                    throw RPCUtil.GetRemoteException("Resource update get failed on all nodes due to change resource "
                                                     + "on an unrecognized node: " + nodeId);
                }
            }
            // do resource update on each node.
            // Notice: it is still possible to have invalid NodeIDs as nodes decommission
            // may happen just at the same time. This time, only log and skip absent
            // nodes without throwing any exceptions.
            bool allSuccess = true;

            foreach (KeyValuePair <NodeId, ResourceOption> entry in nodeResourceMap)
            {
                ResourceOption newResourceOption = entry.Value;
                NodeId         nodeId_1          = entry.Key;
                RMNode         node = this.rmContext.GetRMNodes()[nodeId_1];
                if (node == null)
                {
                    Log.Warn("Resource update get failed on an unrecognized node: " + nodeId_1);
                    allSuccess = false;
                }
                else
                {
                    // update resource to RMNode
                    this.rmContext.GetDispatcher().GetEventHandler().Handle(new RMNodeResourceUpdateEvent
                                                                                (nodeId_1, newResourceOption));
                    Log.Info("Update resource on node(" + node.GetNodeID() + ") with resource(" + newResourceOption
                             .ToString() + ")");
                }
            }
            if (allSuccess)
            {
                RMAuditLogger.LogSuccess(user.GetShortUserName(), argName, "AdminService");
            }
            UpdateNodeResourceResponse response = UpdateNodeResourceResponse.NewInstance();

            return(response);
        }
예제 #4
0
        /// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/>
        /// <exception cref="System.IO.IOException"/>
        public virtual RefreshUserToGroupsMappingsResponse RefreshUserToGroupsMappings(RefreshUserToGroupsMappingsRequest
                                                                                       request)
        {
            string argName            = "refreshUserToGroupsMappings";
            UserGroupInformation user = CheckAcls(argName);

            CheckRMStatus(user.GetShortUserName(), argName, "refresh user-groups.");
            Groups.GetUserToGroupsMappingService(GetConfiguration(new Configuration(false), YarnConfiguration
                                                                  .CoreSiteConfigurationFile)).Refresh();
            RMAuditLogger.LogSuccess(user.GetShortUserName(), argName, "AdminService");
            return(recordFactory.NewRecordInstance <RefreshUserToGroupsMappingsResponse>());
        }
예제 #5
0
        /// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/>
        /// <exception cref="System.IO.IOException"/>
        private RefreshAdminAclsResponse RefreshAdminAcls(bool checkRMHAState)
        {
            string argName            = "refreshAdminAcls";
            UserGroupInformation user = CheckAcls(argName);

            if (checkRMHAState)
            {
                CheckRMStatus(user.GetShortUserName(), argName, "refresh Admin ACLs.");
            }
            Configuration conf = GetConfiguration(new Configuration(false), YarnConfiguration
                                                  .YarnSiteConfigurationFile);

            authorizer.SetAdmins(GetAdminAclList(conf), UserGroupInformation.GetCurrentUser()
                                 );
            RMAuditLogger.LogSuccess(user.GetShortUserName(), argName, "AdminService");
            return(recordFactory.NewRecordInstance <RefreshAdminAclsResponse>());
        }
예제 #6
0
        protected internal virtual void WriteAuditLog(ApplicationId appId)
        {
            RMApp  app       = rmContext.GetRMApps()[appId];
            string operation = "UNKONWN";
            bool   success   = false;

            switch (app.GetState())
            {
            case RMAppState.Failed:
            {
                operation = RMAuditLogger.AuditConstants.FinishFailedApp;
                break;
            }

            case RMAppState.Finished:
            {
                operation = RMAuditLogger.AuditConstants.FinishSuccessApp;
                success   = true;
                break;
            }

            case RMAppState.Killed:
            {
                operation = RMAuditLogger.AuditConstants.FinishKilledApp;
                success   = true;
                break;
            }

            default:
            {
                break;
            }
            }
            if (success)
            {
                RMAuditLogger.LogSuccess(app.GetUser(), operation, "RMAppManager", app.GetApplicationId
                                             ());
            }
            else
            {
                StringBuilder diag = app.GetDiagnostics();
                string        msg  = diag == null ? null : diag.ToString();
                RMAuditLogger.LogFailure(app.GetUser(), operation, msg, "RMAppManager", "App failed with state: "
                                         + app.GetState(), appId);
            }
        }
예제 #7
0
        /// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/>
        /// <exception cref="System.IO.IOException"/>
        public virtual RefreshSuperUserGroupsConfigurationResponse RefreshSuperUserGroupsConfiguration
            (RefreshSuperUserGroupsConfigurationRequest request)
        {
            string argName            = "refreshSuperUserGroupsConfiguration";
            UserGroupInformation user = CheckAcls(argName);

            CheckRMStatus(user.GetShortUserName(), argName, "refresh super-user-groups.");
            // Accept hadoop common configs in core-site.xml as well as RM specific
            // configurations in yarn-site.xml
            Configuration conf = GetConfiguration(new Configuration(false), YarnConfiguration
                                                  .CoreSiteConfigurationFile, YarnConfiguration.YarnSiteConfigurationFile);

            RMServerUtils.ProcessRMProxyUsersConf(conf);
            ProxyUsers.RefreshSuperUserGroupsConfiguration(conf);
            RMAuditLogger.LogSuccess(user.GetShortUserName(), argName, "AdminService");
            return(recordFactory.NewRecordInstance <RefreshSuperUserGroupsConfigurationResponse
                                                    >());
        }
예제 #8
0
 /// <exception cref="System.IO.IOException"/>
 public virtual void TransitionToActive(HAServiceProtocol.StateChangeRequestInfo reqInfo
                                        )
 {
     lock (this)
     {
         // call refreshAdminAcls before HA state transition
         // for the case that adminAcls have been updated in previous active RM
         try
         {
             RefreshAdminAcls(false);
         }
         catch (YarnException ex)
         {
             throw new ServiceFailedException("Can not execute refreshAdminAcls", ex);
         }
         UserGroupInformation user = CheckAccess("transitionToActive");
         CheckHaStateChange(reqInfo);
         try
         {
             rm.TransitionToActive();
         }
         catch (Exception e)
         {
             RMAuditLogger.LogFailure(user.GetShortUserName(), "transitionToActive", string.Empty
                                      , "RMHAProtocolService", "Exception transitioning to active");
             throw new ServiceFailedException("Error when transitioning to Active mode", e);
         }
         try
         {
             // call all refresh*s for active RM to get the updated configurations.
             RefreshAll();
         }
         catch (Exception e)
         {
             Log.Error("RefreshAll failed so firing fatal event", e);
             rmContext.GetDispatcher().GetEventHandler().Handle(new RMFatalEvent(RMFatalEventType
                                                                                 .TransitionToActiveFailed, e));
             throw new ServiceFailedException("Error on refreshAll during transistion to Active"
                                              , e);
         }
         RMAuditLogger.LogSuccess(user.GetShortUserName(), "transitionToActive", "RMHAProtocolService"
                                  );
     }
 }
예제 #9
0
        /// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/>
        /// <exception cref="Org.Apache.Hadoop.Ipc.StandbyException"/>
        public virtual RefreshNodesResponse RefreshNodes(RefreshNodesRequest request)
        {
            string argName            = "refreshNodes";
            string msg                = "refresh nodes.";
            UserGroupInformation user = CheckAcls("refreshNodes");

            CheckRMStatus(user.GetShortUserName(), argName, msg);
            try
            {
                Configuration conf = GetConfiguration(new Configuration(false), YarnConfiguration
                                                      .YarnSiteConfigurationFile);
                rmContext.GetNodesListManager().RefreshNodes(conf);
                RMAuditLogger.LogSuccess(user.GetShortUserName(), argName, "AdminService");
                return(recordFactory.NewRecordInstance <RefreshNodesResponse>());
            }
            catch (IOException ioe)
            {
                throw LogAndWrapException(ioe, user.GetShortUserName(), argName, msg);
            }
        }
예제 #10
0
        /// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/>
        /// <exception cref="System.IO.IOException"/>
        public virtual ReplaceLabelsOnNodeResponse ReplaceLabelsOnNode(ReplaceLabelsOnNodeRequest
                                                                       request)
        {
            string argName            = "replaceLabelsOnNode";
            string msg                = "set node to labels.";
            UserGroupInformation user = CheckAcls(argName);

            CheckRMStatus(user.GetShortUserName(), argName, msg);
            ReplaceLabelsOnNodeResponse response = recordFactory.NewRecordInstance <ReplaceLabelsOnNodeResponse
                                                                                    >();

            try
            {
                rmContext.GetNodeLabelManager().ReplaceLabelsOnNode(request.GetNodeToLabels());
                RMAuditLogger.LogSuccess(user.GetShortUserName(), argName, "AdminService");
                return(response);
            }
            catch (IOException ioe)
            {
                throw LogAndWrapException(ioe, user.GetShortUserName(), argName, msg);
            }
        }
예제 #11
0
        /// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/>
        /// <exception cref="System.IO.IOException"/>
        public virtual AddToClusterNodeLabelsResponse AddToClusterNodeLabels(AddToClusterNodeLabelsRequest
                                                                             request)
        {
            string argName            = "addToClusterNodeLabels";
            string msg                = "add labels.";
            UserGroupInformation user = CheckAcls(argName);

            CheckRMStatus(user.GetShortUserName(), argName, msg);
            AddToClusterNodeLabelsResponse response = recordFactory.NewRecordInstance <AddToClusterNodeLabelsResponse
                                                                                       >();

            try
            {
                rmContext.GetNodeLabelManager().AddToCluserNodeLabels(request.GetNodeLabels());
                RMAuditLogger.LogSuccess(user.GetShortUserName(), argName, "AdminService");
                return(response);
            }
            catch (IOException ioe)
            {
                throw LogAndWrapException(ioe, user.GetShortUserName(), argName, msg);
            }
        }
예제 #12
0
        /// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/>
        /// <exception cref="System.IO.IOException"/>
        public virtual RefreshServiceAclsResponse RefreshServiceAcls(RefreshServiceAclsRequest
                                                                     request)
        {
            if (!GetConfig().GetBoolean(CommonConfigurationKeysPublic.HadoopSecurityAuthorization
                                        , false))
            {
                throw RPCUtil.GetRemoteException(new IOException("Service Authorization (" + CommonConfigurationKeysPublic
                                                                 .HadoopSecurityAuthorization + ") not enabled."));
            }
            string argName            = "refreshServiceAcls";
            UserGroupInformation user = CheckAcls(argName);

            CheckRMStatus(user.GetShortUserName(), argName, "refresh Service ACLs.");
            PolicyProvider policyProvider = RMPolicyProvider.GetInstance();
            Configuration  conf           = GetConfiguration(new Configuration(false), YarnConfiguration
                                                             .HadoopPolicyConfigurationFile);

            RefreshServiceAcls(conf, policyProvider);
            rmContext.GetClientRMService().RefreshServiceAcls(conf, policyProvider);
            rmContext.GetApplicationMasterService().RefreshServiceAcls(conf, policyProvider);
            rmContext.GetResourceTrackerService().RefreshServiceAcls(conf, policyProvider);
            RMAuditLogger.LogSuccess(user.GetShortUserName(), argName, "AdminService");
            return(recordFactory.NewRecordInstance <RefreshServiceAclsResponse>());
        }
        /// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/>
        /// <exception cref="System.IO.IOException"/>
        public virtual RegisterApplicationMasterResponse RegisterApplicationMaster(RegisterApplicationMasterRequest
                                                                                   request)
        {
            AMRMTokenIdentifier  amrmTokenIdentifier  = AuthorizeRequest();
            ApplicationAttemptId applicationAttemptId = amrmTokenIdentifier.GetApplicationAttemptId
                                                            ();
            ApplicationId appID = applicationAttemptId.GetApplicationId();

            ApplicationMasterService.AllocateResponseLock Lock = responseMap[applicationAttemptId
                                                                 ];
            if (Lock == null)
            {
                RMAuditLogger.LogFailure(this.rmContext.GetRMApps()[appID].GetUser(), RMAuditLogger.AuditConstants
                                         .RegisterAm, "Application doesn't exist in cache " + applicationAttemptId, "ApplicationMasterService"
                                         , "Error in registering application master", appID, applicationAttemptId);
                ThrowApplicationDoesNotExistInCacheException(applicationAttemptId);
            }
            // Allow only one thread in AM to do registerApp at a time.
            lock (Lock)
            {
                AllocateResponse lastResponse = Lock.GetAllocateResponse();
                if (HasApplicationMasterRegistered(applicationAttemptId))
                {
                    string message = "Application Master is already registered : " + appID;
                    Log.Warn(message);
                    RMAuditLogger.LogFailure(this.rmContext.GetRMApps()[appID].GetUser(), RMAuditLogger.AuditConstants
                                             .RegisterAm, string.Empty, "ApplicationMasterService", message, appID, applicationAttemptId
                                             );
                    throw new InvalidApplicationMasterRequestException(message);
                }
                this.amLivelinessMonitor.ReceivedPing(applicationAttemptId);
                RMApp app = this.rmContext.GetRMApps()[appID];
                // Setting the response id to 0 to identify if the
                // application master is register for the respective attemptid
                lastResponse.SetResponseId(0);
                Lock.SetAllocateResponse(lastResponse);
                Log.Info("AM registration " + applicationAttemptId);
                this.rmContext.GetDispatcher().GetEventHandler().Handle(new RMAppAttemptRegistrationEvent
                                                                            (applicationAttemptId, request.GetHost(), request.GetRpcPort(), request.GetTrackingUrl
                                                                                ()));
                RMAuditLogger.LogSuccess(app.GetUser(), RMAuditLogger.AuditConstants.RegisterAm,
                                         "ApplicationMasterService", appID, applicationAttemptId);
                // Pick up min/max resource from scheduler...
                RegisterApplicationMasterResponse response = recordFactory.NewRecordInstance <RegisterApplicationMasterResponse
                                                                                              >();
                response.SetMaximumResourceCapability(rScheduler.GetMaximumResourceCapability(app
                                                                                              .GetQueue()));
                response.SetApplicationACLs(app.GetRMAppAttempt(applicationAttemptId).GetSubmissionContext
                                                ().GetAMContainerSpec().GetApplicationACLs());
                response.SetQueue(app.GetQueue());
                if (UserGroupInformation.IsSecurityEnabled())
                {
                    Log.Info("Setting client token master key");
                    response.SetClientToAMTokenMasterKey(ByteBuffer.Wrap(rmContext.GetClientToAMTokenSecretManager
                                                                             ().GetMasterKey(applicationAttemptId).GetEncoded()));
                }
                // For work-preserving AM restart, retrieve previous attempts' containers
                // and corresponding NM tokens.
                if (app.GetApplicationSubmissionContext().GetKeepContainersAcrossApplicationAttempts
                        ())
                {
                    IList <Container> transferredContainers = ((AbstractYarnScheduler)rScheduler).GetTransferredContainers
                                                                  (applicationAttemptId);
                    if (!transferredContainers.IsEmpty())
                    {
                        response.SetContainersFromPreviousAttempts(transferredContainers);
                        IList <NMToken> nmTokens = new AList <NMToken>();
                        foreach (Container container in transferredContainers)
                        {
                            try
                            {
                                NMToken token = rmContext.GetNMTokenSecretManager().CreateAndGetNMToken(app.GetUser
                                                                                                            (), applicationAttemptId, container);
                                if (null != token)
                                {
                                    nmTokens.AddItem(token);
                                }
                            }
                            catch (ArgumentException e)
                            {
                                // if it's a DNS issue, throw UnknowHostException directly and
                                // that
                                // will be automatically retried by RMProxy in RPC layer.
                                if (e.InnerException is UnknownHostException)
                                {
                                    throw (UnknownHostException)e.InnerException;
                                }
                            }
                        }
                        response.SetNMTokensFromPreviousAttempts(nmTokens);
                        Log.Info("Application " + appID + " retrieved " + transferredContainers.Count + " containers from previous"
                                 + " attempts and " + nmTokens.Count + " NM tokens.");
                    }
                }
                response.SetSchedulerResourceTypes(rScheduler.GetSchedulingResourceTypes());
                return(response);
            }
        }