コード例 #1
0
        /// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/>
        /// <exception cref="System.IO.IOException"/>
        protected internal virtual AllocateResponse MakeRemoteRequest()
        {
            ApplyRequestLimits();
            ResourceBlacklistRequest blacklistRequest = ResourceBlacklistRequest.NewInstance(
                new AList <string>(blacklistAdditions), new AList <string>(blacklistRemovals));
            AllocateRequest allocateRequest = AllocateRequest.NewInstance(lastResponseID, base
                                                                          .GetApplicationProgress(), new AList <ResourceRequest>(ask), new AList <ContainerId
                                                                                                                                                  >(release), blacklistRequest);
            AllocateResponse allocateResponse = scheduler.Allocate(allocateRequest);

            lastResponseID     = allocateResponse.GetResponseId();
            availableResources = allocateResponse.GetAvailableResources();
            lastClusterNmCount = clusterNmCount;
            clusterNmCount     = allocateResponse.GetNumClusterNodes();
            int numCompletedContainers = allocateResponse.GetCompletedContainersStatuses().Count;

            if (ask.Count > 0 || release.Count > 0)
            {
                Log.Info("getResources() for " + applicationId + ":" + " ask=" + ask.Count + " release= "
                         + release.Count + " newContainers=" + allocateResponse.GetAllocatedContainers()
                         .Count + " finishedContainers=" + numCompletedContainers + " resourcelimit=" + availableResources
                         + " knownNMs=" + clusterNmCount);
            }
            ask.Clear();
            release.Clear();
            if (numCompletedContainers > 0)
            {
                // re-send limited requests when a container completes to trigger asking
                // for more containers
                Sharpen.Collections.AddAll(requestLimitsToUpdate, requestLimits.Keys);
            }
            if (blacklistAdditions.Count > 0 || blacklistRemovals.Count > 0)
            {
                Log.Info("Update the blacklist for " + applicationId + ": blacklistAdditions=" +
                         blacklistAdditions.Count + " blacklistRemovals=" + blacklistRemovals.Count);
            }
            blacklistAdditions.Clear();
            blacklistRemovals.Clear();
            return(allocateResponse);
        }
コード例 #2
0
        // The test verifies processing of NMContainerStatuses which are sent during
        // NM registration.
        // 1. Start the cluster-RM,NM,Submit app with 1024MB,Launch & register AM
        // 2. AM sends ResourceRequest for 1 container with memory 2048MB.
        // 3. Verify for number of container allocated by RM
        // 4. Verify Memory Usage by cluster, it should be 3072. AM memory + requested
        // memory. 1024 + 2048=3072
        // 5. Re-register NM by sending completed container status
        // 6. Verify for Memory Used, it should be 1024
        // 7. Send AM heatbeat to RM. Allocated response should contain completed
        // container.
        /// <exception cref="System.Exception"/>
        public virtual void TestProcessingNMContainerStatusesOnNMRestart()
        {
            conf.SetInt(YarnConfiguration.RmAmMaxAttempts, 1);
            MemoryRMStateStore memStore = new MemoryRMStateStore();

            memStore.Init(conf);
            // 1. Start the cluster-RM,NM,Submit app with 1024MB,Launch & register AM
            MockRM rm1 = new MockRM(conf, memStore);

            rm1.Start();
            int    nmMemory        = 8192;
            int    amMemory        = 1024;
            int    containerMemory = 2048;
            MockNM nm1             = new MockNM("127.0.0.1:1234", nmMemory, rm1.GetResourceTrackerService
                                                    ());

            nm1.RegisterNode();
            RMApp  app0 = rm1.SubmitApp(amMemory);
            MockAM am0  = MockRM.LaunchAndRegisterAM(app0, rm1, nm1);
            // 2. AM sends ResourceRequest for 1 container with memory 2048MB.
            int noOfContainers = 1;
            IList <Container> allocateContainers = am0.AllocateAndWaitForContainers(noOfContainers
                                                                                    , containerMemory, nm1);

            // 3. Verify for number of container allocated by RM
            NUnit.Framework.Assert.AreEqual(noOfContainers, allocateContainers.Count);
            Container container = allocateContainers[0];

            nm1.NodeHeartbeat(am0.GetApplicationAttemptId(), 1, ContainerState.Running);
            nm1.NodeHeartbeat(am0.GetApplicationAttemptId(), container.GetId().GetContainerId
                                  (), ContainerState.Running);
            rm1.WaitForState(app0.GetApplicationId(), RMAppState.Running);
            // 4. Verify Memory Usage by cluster, it should be 3072. AM memory +
            // requested memory. 1024 + 2048=3072
            ResourceScheduler rs = rm1.GetRMContext().GetScheduler();
            int allocatedMB      = rs.GetRootQueueMetrics().GetAllocatedMB();

            NUnit.Framework.Assert.AreEqual(amMemory + containerMemory, allocatedMB);
            // 5. Re-register NM by sending completed container status
            IList <NMContainerStatus> nMContainerStatusForApp = CreateNMContainerStatusForApp(
                am0);

            nm1.RegisterNode(nMContainerStatusForApp, Arrays.AsList(app0.GetApplicationId()));
            WaitForClusterMemory(nm1, rs, amMemory);
            // 6. Verify for Memory Used, it should be 1024
            NUnit.Framework.Assert.AreEqual(amMemory, rs.GetRootQueueMetrics().GetAllocatedMB
                                                ());
            // 7. Send AM heatbeat to RM. Allocated response should contain completed
            // container
            AllocateRequest req = AllocateRequest.NewInstance(0, 0F, new AList <ResourceRequest
                                                                                >(), new AList <ContainerId>(), null);
            AllocateResponse        allocate = am0.Allocate(req);
            IList <ContainerStatus> completedContainersStatuses = allocate.GetCompletedContainersStatuses
                                                                      ();

            NUnit.Framework.Assert.AreEqual(noOfContainers, completedContainersStatuses.Count
                                            );
            // Application clean up should happen Cluster memory used is 0
            nm1.NodeHeartbeat(am0.GetApplicationAttemptId(), 1, ContainerState.Complete);
            WaitForClusterMemory(nm1, rs, 0);
            rm1.Stop();
        }
コード例 #3
0
ファイル: AMRMClientImpl.cs プロジェクト: orf53975/hadoop.net
        /// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/>
        /// <exception cref="System.IO.IOException"/>
        public override AllocateResponse Allocate(float progressIndicator)
        {
            Preconditions.CheckArgument(progressIndicator >= 0, "Progress indicator should not be negative"
                                        );
            AllocateResponse        allocateResponse  = null;
            IList <ResourceRequest> askList           = null;
            IList <ContainerId>     releaseList       = null;
            AllocateRequest         allocateRequest   = null;
            IList <string>          blacklistToAdd    = new AList <string>();
            IList <string>          blacklistToRemove = new AList <string>();

            try
            {
                lock (this)
                {
                    askList = new AList <ResourceRequest>(ask.Count);
                    foreach (ResourceRequest r in ask)
                    {
                        // create a copy of ResourceRequest as we might change it while the
                        // RPC layer is using it to send info across
                        askList.AddItem(ResourceRequest.NewInstance(r.GetPriority(), r.GetResourceName(),
                                                                    r.GetCapability(), r.GetNumContainers(), r.GetRelaxLocality(), r.GetNodeLabelExpression
                                                                        ()));
                    }
                    releaseList = new AList <ContainerId>(release);
                    // optimistically clear this collection assuming no RPC failure
                    ask.Clear();
                    release.Clear();
                    Sharpen.Collections.AddAll(blacklistToAdd, blacklistAdditions);
                    Sharpen.Collections.AddAll(blacklistToRemove, blacklistRemovals);
                    ResourceBlacklistRequest blacklistRequest = ResourceBlacklistRequest.NewInstance(
                        blacklistToAdd, blacklistToRemove);
                    allocateRequest = AllocateRequest.NewInstance(lastResponseId, progressIndicator,
                                                                  askList, releaseList, blacklistRequest);
                    // clear blacklistAdditions and blacklistRemovals before
                    // unsynchronized part
                    blacklistAdditions.Clear();
                    blacklistRemovals.Clear();
                }
                try
                {
                    allocateResponse = rmClient.Allocate(allocateRequest);
                }
                catch (ApplicationMasterNotRegisteredException)
                {
                    Log.Warn("ApplicationMaster is out of sync with ResourceManager," + " hence resyncing."
                             );
                    lock (this)
                    {
                        Sharpen.Collections.AddAll(release, this.pendingRelease);
                        Sharpen.Collections.AddAll(blacklistAdditions, this.blacklistedNodes);
                        foreach (IDictionary <string, SortedDictionary <Resource, AMRMClientImpl.ResourceRequestInfo
                                                                        > > rr in remoteRequestsTable.Values)
                        {
                            foreach (IDictionary <Resource, AMRMClientImpl.ResourceRequestInfo> capabalities in
                                     rr.Values)
                            {
                                foreach (AMRMClientImpl.ResourceRequestInfo request in capabalities.Values)
                                {
                                    AddResourceRequestToAsk(request.remoteRequest);
                                }
                            }
                        }
                    }
                    // re register with RM
                    RegisterApplicationMaster();
                    allocateResponse = Allocate(progressIndicator);
                    return(allocateResponse);
                }
                lock (this)
                {
                    // update these on successful RPC
                    clusterNodeCount          = allocateResponse.GetNumClusterNodes();
                    lastResponseId            = allocateResponse.GetResponseId();
                    clusterAvailableResources = allocateResponse.GetAvailableResources();
                    if (!allocateResponse.GetNMTokens().IsEmpty())
                    {
                        PopulateNMTokens(allocateResponse.GetNMTokens());
                    }
                    if (allocateResponse.GetAMRMToken() != null)
                    {
                        UpdateAMRMToken(allocateResponse.GetAMRMToken());
                    }
                    if (!pendingRelease.IsEmpty() && !allocateResponse.GetCompletedContainersStatuses
                            ().IsEmpty())
                    {
                        RemovePendingReleaseRequests(allocateResponse.GetCompletedContainersStatuses());
                    }
                }
            }
            finally
            {
                // TODO how to differentiate remote yarn exception vs error in rpc
                if (allocateResponse == null)
                {
                    // we hit an exception in allocate()
                    // preserve ask and release for next call to allocate()
                    lock (this)
                    {
                        Sharpen.Collections.AddAll(release, releaseList);
                        // requests could have been added or deleted during call to allocate
                        // If requests were added/removed then there is nothing to do since
                        // the ResourceRequest object in ask would have the actual new value.
                        // If ask does not have this ResourceRequest then it was unchanged and
                        // so we can add the value back safely.
                        // This assumes that there will no concurrent calls to allocate() and
                        // so we dont have to worry about ask being changed in the
                        // synchronized block at the beginning of this method.
                        foreach (ResourceRequest oldAsk in askList)
                        {
                            if (!ask.Contains(oldAsk))
                            {
                                ask.AddItem(oldAsk);
                            }
                        }
                        Sharpen.Collections.AddAll(blacklistAdditions, blacklistToAdd);
                        Sharpen.Collections.AddAll(blacklistRemovals, blacklistToRemove);
                    }
                }
            }
            return(allocateResponse);
        }
コード例 #4
0
        // Test does major 6 steps verification.
        // Step-1 : AMRMClient send allocate request for 2 container requests
        // Step-2 : 2 containers are allocated by RM.
        // Step-3 : AM Send 1 containerRequest(cRequest3) and 1 releaseRequests to
        // RM
        // Step-4 : On RM restart, AM(does not know RM is restarted) sends additional
        // containerRequest(cRequest4) and blacklisted nodes.
        // Intern RM send resync command
        // Step-5 : Allocater after resync command & new containerRequest(cRequest5)
        // Step-6 : RM allocates containers i.e cRequest3,cRequest4 and cRequest5
        /// <exception cref="System.Exception"/>
        public virtual void TestAMRMClientResendsRequestsOnRMRestart()
        {
            UserGroupInformation.SetLoginUser(null);
            MemoryRMStateStore memStore = new MemoryRMStateStore();

            memStore.Init(conf);
            // Phase-1 Start 1st RM
            TestAMRMClientOnRMRestart.MyResourceManager rm1 = new TestAMRMClientOnRMRestart.MyResourceManager
                                                                  (conf, memStore);
            rm1.Start();
            DrainDispatcher dispatcher = (DrainDispatcher)rm1.GetRMContext().GetDispatcher();
            // Submit the application
            RMApp app = rm1.SubmitApp(1024);

            dispatcher.Await();
            MockNM nm1 = new MockNM("h1:1234", 15120, rm1.GetResourceTrackerService());

            nm1.RegisterNode();
            nm1.NodeHeartbeat(true);
            // Node heartbeat
            dispatcher.Await();
            ApplicationAttemptId appAttemptId = app.GetCurrentAppAttempt().GetAppAttemptId();

            rm1.SendAMLaunched(appAttemptId);
            dispatcher.Await();
            Org.Apache.Hadoop.Security.Token.Token <AMRMTokenIdentifier> token = rm1.GetRMContext
                                                                                     ().GetRMApps()[appAttemptId.GetApplicationId()].GetRMAppAttempt(appAttemptId).GetAMRMToken
                                                                                     ();
            UserGroupInformation ugi = UserGroupInformation.GetCurrentUser();

            ugi.AddTokenIdentifier(token.DecodeIdentifier());
            // Step-1 : AMRMClient send allocate request for 2 ContainerRequest
            // cRequest1 = h1 and cRequest2 = h1,h2
            // blacklisted nodes = h2
            AMRMClient <AMRMClient.ContainerRequest> amClient = new TestAMRMClientOnRMRestart.MyAMRMClientImpl
                                                                    (rm1);

            amClient.Init(conf);
            amClient.Start();
            amClient.RegisterApplicationMaster("Host", 10000, string.Empty);
            AMRMClient.ContainerRequest cRequest1 = CreateReq(1, 1024, new string[] { "h1" });
            amClient.AddContainerRequest(cRequest1);
            AMRMClient.ContainerRequest cRequest2 = CreateReq(1, 1024, new string[] { "h1", "h2" });
            amClient.AddContainerRequest(cRequest2);
            IList <string> blacklistAdditions = new AList <string>();
            IList <string> blacklistRemoval   = new AList <string>();

            blacklistAdditions.AddItem("h2");
            blacklistRemoval.AddItem("h10");
            amClient.UpdateBlacklist(blacklistAdditions, blacklistRemoval);
            blacklistAdditions.Remove("h2");
            // remove from local list
            AllocateResponse allocateResponse = amClient.Allocate(0.1f);

            dispatcher.Await();
            NUnit.Framework.Assert.AreEqual("No of assignments must be 0", 0, allocateResponse
                                            .GetAllocatedContainers().Count);
            // Why 4 ask, why not 3 ask even h2 is blacklisted?
            // On blacklisting host,applicationmaster has to remove ask request from
            // remoterequest table.Here,test does not remove explicitely
            AssertAsksAndReleases(4, 0, rm1);
            AssertBlacklistAdditionsAndRemovals(1, 1, rm1);
            // Step-2 : NM heart beat is sent.
            // On 2nd AM allocate request, RM allocates 2 containers to AM
            nm1.NodeHeartbeat(true);
            // Node heartbeat
            dispatcher.Await();
            allocateResponse = amClient.Allocate(0.2f);
            dispatcher.Await();
            // 2 containers are allocated i.e for cRequest1 and cRequest2.
            NUnit.Framework.Assert.AreEqual("No of assignments must be 0", 2, allocateResponse
                                            .GetAllocatedContainers().Count);
            AssertAsksAndReleases(0, 0, rm1);
            AssertBlacklistAdditionsAndRemovals(0, 0, rm1);
            IList <Container> allocatedContainers = allocateResponse.GetAllocatedContainers();

            // removed allocated container requests
            amClient.RemoveContainerRequest(cRequest1);
            amClient.RemoveContainerRequest(cRequest2);
            allocateResponse = amClient.Allocate(0.2f);
            dispatcher.Await();
            NUnit.Framework.Assert.AreEqual("No of assignments must be 0", 0, allocateResponse
                                            .GetAllocatedContainers().Count);
            AssertAsksAndReleases(4, 0, rm1);
            AssertBlacklistAdditionsAndRemovals(0, 0, rm1);
            // Step-3 : Send 1 containerRequest and 1 releaseRequests to RM
            AMRMClient.ContainerRequest cRequest3 = CreateReq(1, 1024, new string[] { "h1" });
            amClient.AddContainerRequest(cRequest3);
            int pendingRelease         = 0;
            IEnumerator <Container> it = allocatedContainers.GetEnumerator();

            while (it.HasNext())
            {
                amClient.ReleaseAssignedContainer(it.Next().GetId());
                pendingRelease++;
                it.Remove();
                break;
            }
            // remove one container
            allocateResponse = amClient.Allocate(0.3f);
            dispatcher.Await();
            NUnit.Framework.Assert.AreEqual("No of assignments must be 0", 0, allocateResponse
                                            .GetAllocatedContainers().Count);
            AssertAsksAndReleases(3, pendingRelease, rm1);
            AssertBlacklistAdditionsAndRemovals(0, 0, rm1);
            int completedContainer = allocateResponse.GetCompletedContainersStatuses().Count;

            pendingRelease -= completedContainer;
            // Phase-2 start 2nd RM is up
            TestAMRMClientOnRMRestart.MyResourceManager rm2 = new TestAMRMClientOnRMRestart.MyResourceManager
                                                                  (conf, memStore);
            rm2.Start();
            nm1.SetResourceTrackerService(rm2.GetResourceTrackerService());
            ((TestAMRMClientOnRMRestart.MyAMRMClientImpl)amClient).UpdateRMProxy(rm2);
            dispatcher = (DrainDispatcher)rm2.GetRMContext().GetDispatcher();
            // NM should be rebooted on heartbeat, even first heartbeat for nm2
            NodeHeartbeatResponse hbResponse = nm1.NodeHeartbeat(true);

            NUnit.Framework.Assert.AreEqual(NodeAction.Resync, hbResponse.GetNodeAction());
            // new NM to represent NM re-register
            nm1 = new MockNM("h1:1234", 10240, rm2.GetResourceTrackerService());
            nm1.RegisterNode();
            nm1.NodeHeartbeat(true);
            dispatcher.Await();
            blacklistAdditions.AddItem("h3");
            amClient.UpdateBlacklist(blacklistAdditions, null);
            blacklistAdditions.Remove("h3");
            it = allocatedContainers.GetEnumerator();
            while (it.HasNext())
            {
                amClient.ReleaseAssignedContainer(it.Next().GetId());
                pendingRelease++;
                it.Remove();
            }
            AMRMClient.ContainerRequest cRequest4 = CreateReq(1, 1024, new string[] { "h1", "h2" });
            amClient.AddContainerRequest(cRequest4);
            // Step-4 : On RM restart, AM(does not know RM is restarted) sends
            // additional
            // containerRequest and blacklisted nodes.
            // Intern RM send resync command,AMRMClient resend allocate request
            allocateResponse = amClient.Allocate(0.3f);
            dispatcher.Await();
            completedContainer = allocateResponse.GetCompletedContainersStatuses().Count;
            pendingRelease    -= completedContainer;
            AssertAsksAndReleases(4, pendingRelease, rm2);
            AssertBlacklistAdditionsAndRemovals(2, 0, rm2);
            AMRMClient.ContainerRequest cRequest5 = CreateReq(1, 1024, new string[] { "h1", "h2"
                                                                                      , "h3" });
            amClient.AddContainerRequest(cRequest5);
            // Step-5 : Allocater after resync command
            allocateResponse = amClient.Allocate(0.5f);
            dispatcher.Await();
            NUnit.Framework.Assert.AreEqual("No of assignments must be 0", 0, allocateResponse
                                            .GetAllocatedContainers().Count);
            AssertAsksAndReleases(5, 0, rm2);
            AssertBlacklistAdditionsAndRemovals(0, 0, rm2);
            int noAssignedContainer = 0;
            int count = 5;

            while (count-- > 0)
            {
                nm1.NodeHeartbeat(true);
                dispatcher.Await();
                allocateResponse = amClient.Allocate(0.5f);
                dispatcher.Await();
                noAssignedContainer += allocateResponse.GetAllocatedContainers().Count;
                if (noAssignedContainer == 3)
                {
                    break;
                }
                Sharpen.Thread.Sleep(1000);
            }
            // Step-6 : RM allocates containers i.e cRequest3,cRequest4 and cRequest5
            NUnit.Framework.Assert.AreEqual("Number of container should be 3", 3, noAssignedContainer
                                            );
            amClient.Stop();
            rm1.Stop();
            rm2.Stop();
        }