Esempio n. 1
0
        // To avoid using cached client
        /// <exception cref="System.Exception"/>
        public virtual void TestAMRMMasterKeysUpdate()
        {
            AtomicReference <AMRMTokenSecretManager> spySecretMgrRef = new AtomicReference <AMRMTokenSecretManager
                                                                                            >();
            MockRM rm = new _MockRM_349(this, spySecretMgrRef, conf);

            // Skip the login.
            rm.Start();
            MockNM nm  = rm.RegisterNode("127.0.0.1:1234", 8000);
            RMApp  app = rm.SubmitApp(200);
            MockAM am  = MockRM.LaunchAndRegisterAM(app, rm, nm);
            AMRMTokenSecretManager spySecretMgr = spySecretMgrRef.Get();
            // Do allocate. Should not update AMRMToken
            AllocateResponse response = am.Allocate(Org.Apache.Hadoop.Yarn.Util.Records.NewRecord
                                                    <AllocateRequest>());

            NUnit.Framework.Assert.IsNull(response.GetAMRMToken());
            Org.Apache.Hadoop.Security.Token.Token <AMRMTokenIdentifier> oldToken = rm.GetRMContext
                                                                                        ().GetRMApps()[app.GetApplicationId()].GetRMAppAttempt(am.GetApplicationAttemptId
                                                                                                                                                   ()).GetAMRMToken();
            // roll over the master key
            // Do allocate again. the AM should get the latest AMRMToken
            rm.GetRMContext().GetAMRMTokenSecretManager().RollMasterKey();
            response = am.Allocate(Org.Apache.Hadoop.Yarn.Util.Records.NewRecord <AllocateRequest
                                                                                  >());
            NUnit.Framework.Assert.IsNotNull(response.GetAMRMToken());
            Org.Apache.Hadoop.Security.Token.Token <AMRMTokenIdentifier> amrmToken = ConverterUtils
                                                                                     .ConvertFromYarn(response.GetAMRMToken(), new Text(response.GetAMRMToken().GetService
                                                                                                                                            ()));
            NUnit.Framework.Assert.AreEqual(amrmToken.DecodeIdentifier().GetKeyId(), rm.GetRMContext
                                                ().GetAMRMTokenSecretManager().GetMasterKey().GetMasterKey().GetKeyId());
            // Do allocate again with the same old token and verify the RM sends
            // back the last generated token instead of generating it again.
            Org.Mockito.Mockito.Reset(spySecretMgr);
            UserGroupInformation ugi = UserGroupInformation.CreateUserForTesting(am.GetApplicationAttemptId
                                                                                     ().ToString(), new string[0]);

            ugi.AddTokenIdentifier(oldToken.DecodeIdentifier());
            response = am.DoAllocateAs(ugi, Org.Apache.Hadoop.Yarn.Util.Records.NewRecord <AllocateRequest
                                                                                           >());
            NUnit.Framework.Assert.IsNotNull(response.GetAMRMToken());
            Org.Mockito.Mockito.Verify(spySecretMgr, Org.Mockito.Mockito.Never()).CreateAndGetAMRMToken
                (Matchers.IsA <ApplicationAttemptId>());
            // Do allocate again with the updated token and verify we do not
            // receive a new token to use.
            response = am.Allocate(Org.Apache.Hadoop.Yarn.Util.Records.NewRecord <AllocateRequest
                                                                                  >());
            NUnit.Framework.Assert.IsNull(response.GetAMRMToken());
            // Activate the next master key. Since there is new master key generated
            // in AMRMTokenSecretManager. The AMRMToken will not get updated for AM
            rm.GetRMContext().GetAMRMTokenSecretManager().ActivateNextMasterKey();
            response = am.Allocate(Org.Apache.Hadoop.Yarn.Util.Records.NewRecord <AllocateRequest
                                                                                  >());
            NUnit.Framework.Assert.IsNull(response.GetAMRMToken());
            rm.Stop();
        }
Esempio n. 2
0
 /// <exception cref="System.Exception"/>
 protected internal override void Heartbeat()
 {
     lock (this)
     {
         AllocateRequest allocateRequest = AllocateRequest.NewInstance(this.lastResponseID
                                                                       , base.GetApplicationProgress(), new AList <ResourceRequest>(), new AList <ContainerId
                                                                                                                                                  >(), null);
         AllocateResponse allocateResponse = null;
         try
         {
             allocateResponse = scheduler.Allocate(allocateRequest);
             // Reset retry count if no exception occurred.
             retrystartTime = Runtime.CurrentTimeMillis();
         }
         catch (ApplicationAttemptNotFoundException e)
         {
             Log.Info("Event from RM: shutting down Application Master");
             // This can happen if the RM has been restarted. If it is in that state,
             // this application must clean itself up.
             eventHandler.Handle(new JobEvent(this.GetJob().GetID(), JobEventType.JobAmReboot)
                                 );
             throw new YarnRuntimeException("Resource Manager doesn't recognize AttemptId: " +
                                            this.GetContext().GetApplicationID(), e);
         }
         catch (ApplicationMasterNotRegisteredException)
         {
             Log.Info("ApplicationMaster is out of sync with ResourceManager," + " hence resync and send outstanding requests."
                      );
             this.lastResponseID = 0;
             Register();
         }
         catch (Exception e)
         {
             // This can happen when the connection to the RM has gone down. Keep
             // re-trying until the retryInterval has expired.
             if (Runtime.CurrentTimeMillis() - retrystartTime >= retryInterval)
             {
                 Log.Error("Could not contact RM after " + retryInterval + " milliseconds.");
                 eventHandler.Handle(new JobEvent(this.GetJob().GetID(), JobEventType.InternalError
                                                  ));
                 throw new YarnRuntimeException("Could not contact RM after " + retryInterval + " milliseconds."
                                                );
             }
             // Throw this up to the caller, which may decide to ignore it and
             // continue to attempt to contact the RM.
             throw;
         }
         if (allocateResponse != null)
         {
             this.lastResponseID = allocateResponse.GetResponseId();
             Token token = allocateResponse.GetAMRMToken();
             if (token != null)
             {
                 UpdateAMRMToken(token);
             }
         }
     }
 }
Esempio n. 3
0
        /// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/>
        /// <exception cref="System.IO.IOException"/>
        public override AllocateResponse Allocate(float progressIndicator)
        {
            Preconditions.CheckArgument(progressIndicator >= 0, "Progress indicator should not be negative"
                                        );
            AllocateResponse        allocateResponse  = null;
            IList <ResourceRequest> askList           = null;
            IList <ContainerId>     releaseList       = null;
            AllocateRequest         allocateRequest   = null;
            IList <string>          blacklistToAdd    = new AList <string>();
            IList <string>          blacklistToRemove = new AList <string>();

            try
            {
                lock (this)
                {
                    askList = new AList <ResourceRequest>(ask.Count);
                    foreach (ResourceRequest r in ask)
                    {
                        // create a copy of ResourceRequest as we might change it while the
                        // RPC layer is using it to send info across
                        askList.AddItem(ResourceRequest.NewInstance(r.GetPriority(), r.GetResourceName(),
                                                                    r.GetCapability(), r.GetNumContainers(), r.GetRelaxLocality(), r.GetNodeLabelExpression
                                                                        ()));
                    }
                    releaseList = new AList <ContainerId>(release);
                    // optimistically clear this collection assuming no RPC failure
                    ask.Clear();
                    release.Clear();
                    Sharpen.Collections.AddAll(blacklistToAdd, blacklistAdditions);
                    Sharpen.Collections.AddAll(blacklistToRemove, blacklistRemovals);
                    ResourceBlacklistRequest blacklistRequest = ResourceBlacklistRequest.NewInstance(
                        blacklistToAdd, blacklistToRemove);
                    allocateRequest = AllocateRequest.NewInstance(lastResponseId, progressIndicator,
                                                                  askList, releaseList, blacklistRequest);
                    // clear blacklistAdditions and blacklistRemovals before
                    // unsynchronized part
                    blacklistAdditions.Clear();
                    blacklistRemovals.Clear();
                }
                try
                {
                    allocateResponse = rmClient.Allocate(allocateRequest);
                }
                catch (ApplicationMasterNotRegisteredException)
                {
                    Log.Warn("ApplicationMaster is out of sync with ResourceManager," + " hence resyncing."
                             );
                    lock (this)
                    {
                        Sharpen.Collections.AddAll(release, this.pendingRelease);
                        Sharpen.Collections.AddAll(blacklistAdditions, this.blacklistedNodes);
                        foreach (IDictionary <string, SortedDictionary <Resource, AMRMClientImpl.ResourceRequestInfo
                                                                        > > rr in remoteRequestsTable.Values)
                        {
                            foreach (IDictionary <Resource, AMRMClientImpl.ResourceRequestInfo> capabalities in
                                     rr.Values)
                            {
                                foreach (AMRMClientImpl.ResourceRequestInfo request in capabalities.Values)
                                {
                                    AddResourceRequestToAsk(request.remoteRequest);
                                }
                            }
                        }
                    }
                    // re register with RM
                    RegisterApplicationMaster();
                    allocateResponse = Allocate(progressIndicator);
                    return(allocateResponse);
                }
                lock (this)
                {
                    // update these on successful RPC
                    clusterNodeCount          = allocateResponse.GetNumClusterNodes();
                    lastResponseId            = allocateResponse.GetResponseId();
                    clusterAvailableResources = allocateResponse.GetAvailableResources();
                    if (!allocateResponse.GetNMTokens().IsEmpty())
                    {
                        PopulateNMTokens(allocateResponse.GetNMTokens());
                    }
                    if (allocateResponse.GetAMRMToken() != null)
                    {
                        UpdateAMRMToken(allocateResponse.GetAMRMToken());
                    }
                    if (!pendingRelease.IsEmpty() && !allocateResponse.GetCompletedContainersStatuses
                            ().IsEmpty())
                    {
                        RemovePendingReleaseRequests(allocateResponse.GetCompletedContainersStatuses());
                    }
                }
            }
            finally
            {
                // TODO how to differentiate remote yarn exception vs error in rpc
                if (allocateResponse == null)
                {
                    // we hit an exception in allocate()
                    // preserve ask and release for next call to allocate()
                    lock (this)
                    {
                        Sharpen.Collections.AddAll(release, releaseList);
                        // requests could have been added or deleted during call to allocate
                        // If requests were added/removed then there is nothing to do since
                        // the ResourceRequest object in ask would have the actual new value.
                        // If ask does not have this ResourceRequest then it was unchanged and
                        // so we can add the value back safely.
                        // This assumes that there will no concurrent calls to allocate() and
                        // so we dont have to worry about ask being changed in the
                        // synchronized block at the beginning of this method.
                        foreach (ResourceRequest oldAsk in askList)
                        {
                            if (!ask.Contains(oldAsk))
                            {
                                ask.AddItem(oldAsk);
                            }
                        }
                        Sharpen.Collections.AddAll(blacklistAdditions, blacklistToAdd);
                        Sharpen.Collections.AddAll(blacklistRemovals, blacklistToRemove);
                    }
                }
            }
            return(allocateResponse);
        }