예제 #1
0
        /// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/>
        /// <exception cref="System.IO.IOException"/>
        private ICollection <Container> AllocateContainers(AMRMClientImpl <AMRMClient.ContainerRequest
                                                                           > rmClient, int num)
        {
            // setup container request
            Resource capability = Resource.NewInstance(1024, 0);
            Priority priority   = Priority.NewInstance(0);
            string   node       = nodeReports[0].GetNodeId().GetHost();
            string   rack       = nodeReports[0].GetRackName();

            string[] nodes = new string[] { node };
            string[] racks = new string[] { rack };
            for (int i = 0; i < num; ++i)
            {
                rmClient.AddContainerRequest(new AMRMClient.ContainerRequest(capability, nodes, racks
                                                                             , priority));
            }
            int containersRequestedAny = rmClient.remoteRequestsTable[priority][ResourceRequest
                                                                                .Any][capability].remoteRequest.GetNumContainers();
            // RM should allocate container within 2 calls to allocate()
            int allocatedContainerCount        = 0;
            int iterationsLeft                 = 2;
            ICollection <Container> containers = new TreeSet <Container>();

            while (allocatedContainerCount < containersRequestedAny && iterationsLeft > 0)
            {
                AllocateResponse allocResponse = rmClient.Allocate(0.1f);
                allocatedContainerCount += allocResponse.GetAllocatedContainers().Count;
                foreach (Container container in allocResponse.GetAllocatedContainers())
                {
                    containers.AddItem(container);
                }
                if (!allocResponse.GetNMTokens().IsEmpty())
                {
                    foreach (NMToken token in allocResponse.GetNMTokens())
                    {
                        rmClient.GetNMTokenCache().SetToken(token.GetNodeId().ToString(), token.GetToken(
                                                                ));
                    }
                }
                if (allocatedContainerCount < containersRequestedAny)
                {
                    // sleep to let NM's heartbeat to RM and trigger allocations
                    Sleep(1000);
                }
                --iterationsLeft;
            }
            return(containers);
        }
예제 #2
0
        // Test even if AM container is allocated with containerId not equal to 1, the
        // following allocate requests from AM should be able to retrieve the
        // corresponding NM Token.
        /// <exception cref="System.Exception"/>
        public virtual void TestNMTokenSentForNormalContainer()
        {
            conf.Set(YarnConfiguration.RmScheduler, typeof(CapacityScheduler).GetCanonicalName
                         ());
            MockRM rm = new MockRM(conf);

            rm.Start();
            MockNM       nm1     = rm.RegisterNode("h1:1234", 5120);
            RMApp        app     = rm.SubmitApp(2000);
            RMAppAttempt attempt = app.GetCurrentAppAttempt();
            // Call getNewContainerId to increase container Id so that the AM container
            // Id doesn't equal to one.
            CapacityScheduler cs = (CapacityScheduler)rm.GetResourceScheduler();

            cs.GetApplicationAttempt(attempt.GetAppAttemptId()).GetNewContainerId();
            // kick the scheduling
            nm1.NodeHeartbeat(true);
            MockAM am = MockRM.LaunchAM(app, rm, nm1);

            // am container Id not equal to 1.
            NUnit.Framework.Assert.IsTrue(attempt.GetMasterContainer().GetId().GetContainerId
                                              () != 1);
            // NMSecretManager doesn't record the node on which the am is allocated.
            NUnit.Framework.Assert.IsFalse(rm.GetRMContext().GetNMTokenSecretManager().IsApplicationAttemptNMTokenPresent
                                               (attempt.GetAppAttemptId(), nm1.GetNodeId()));
            am.RegisterAppAttempt();
            rm.WaitForState(app.GetApplicationId(), RMAppState.Running);
            int NumContainers            = 1;
            IList <Container> containers = new AList <Container>();
            // nmTokens keeps track of all the nmTokens issued in the allocate call.
            IList <NMToken> expectedNMTokens = new AList <NMToken>();

            // am1 allocate 1 container on nm1.
            while (true)
            {
                AllocateResponse response = am.Allocate("127.0.0.1", 2000, NumContainers, new AList
                                                        <ContainerId>());
                nm1.NodeHeartbeat(true);
                Sharpen.Collections.AddAll(containers, response.GetAllocatedContainers());
                Sharpen.Collections.AddAll(expectedNMTokens, response.GetNMTokens());
                if (containers.Count == NumContainers)
                {
                    break;
                }
                Sharpen.Thread.Sleep(200);
                System.Console.Out.WriteLine("Waiting for container to be allocated.");
            }
            NodeId nodeId = expectedNMTokens[0].GetNodeId();

            // NMToken is sent for the allocated container.
            NUnit.Framework.Assert.AreEqual(nm1.GetNodeId(), nodeId);
        }
예제 #3
0
        /// <exception cref="System.Exception"/>
        public virtual void TestNMTokensRebindOnAMRestart()
        {
            YarnConfiguration conf = new YarnConfiguration();

            conf.SetInt(YarnConfiguration.RmAmMaxAttempts, 3);
            MockRM rm1 = new MockRM(conf);

            rm1.Start();
            RMApp app1 = rm1.SubmitApp(200, "myname", "myuser", new Dictionary <ApplicationAccessType
                                                                                , string>(), false, "default", -1, null, "MAPREDUCE", false, true);
            MockNM nm1 = new MockNM("127.0.0.1:1234", 8000, rm1.GetResourceTrackerService());

            nm1.RegisterNode();
            MockNM nm2 = new MockNM("127.1.1.1:4321", 8000, rm1.GetResourceTrackerService());

            nm2.RegisterNode();
            MockAM            am1        = MockRM.LaunchAndRegisterAM(app1, rm1, nm1);
            IList <Container> containers = new AList <Container>();
            // nmTokens keeps track of all the nmTokens issued in the allocate call.
            IList <NMToken> expectedNMTokens = new AList <NMToken>();

            // am1 allocate 2 container on nm1.
            // first container
            while (true)
            {
                AllocateResponse response = am1.Allocate("127.0.0.1", 2000, 2, new AList <ContainerId
                                                                                          >());
                nm1.NodeHeartbeat(true);
                Sharpen.Collections.AddAll(containers, response.GetAllocatedContainers());
                Sharpen.Collections.AddAll(expectedNMTokens, response.GetNMTokens());
                if (containers.Count == 2)
                {
                    break;
                }
                Sharpen.Thread.Sleep(200);
                System.Console.Out.WriteLine("Waiting for container to be allocated.");
            }
            // launch the container-2
            nm1.NodeHeartbeat(am1.GetApplicationAttemptId(), 2, ContainerState.Running);
            ContainerId containerId2 = ContainerId.NewContainerId(am1.GetApplicationAttemptId
                                                                      (), 2);

            rm1.WaitForState(nm1, containerId2, RMContainerState.Running);
            // launch the container-3
            nm1.NodeHeartbeat(am1.GetApplicationAttemptId(), 3, ContainerState.Running);
            ContainerId containerId3 = ContainerId.NewContainerId(am1.GetApplicationAttemptId
                                                                      (), 3);

            rm1.WaitForState(nm1, containerId3, RMContainerState.Running);
            // fail am1
            nm1.NodeHeartbeat(am1.GetApplicationAttemptId(), 1, ContainerState.Complete);
            am1.WaitForState(RMAppAttemptState.Failed);
            rm1.WaitForState(app1.GetApplicationId(), RMAppState.Accepted);
            // restart the am
            MockAM am2 = MockRM.LaunchAM(app1, rm1, nm1);
            RegisterApplicationMasterResponse registerResponse = am2.RegisterAppAttempt();

            rm1.WaitForState(app1.GetApplicationId(), RMAppState.Running);
            // check am2 get the nm token from am1.
            NUnit.Framework.Assert.AreEqual(expectedNMTokens, registerResponse.GetNMTokensFromPreviousAttempts
                                                ());
            // am2 allocate 1 container on nm2
            containers = new AList <Container>();
            while (true)
            {
                AllocateResponse allocateResponse = am2.Allocate("127.1.1.1", 4000, 1, new AList <
                                                                     ContainerId>());
                nm2.NodeHeartbeat(true);
                Sharpen.Collections.AddAll(containers, allocateResponse.GetAllocatedContainers());
                Sharpen.Collections.AddAll(expectedNMTokens, allocateResponse.GetNMTokens());
                if (containers.Count == 1)
                {
                    break;
                }
                Sharpen.Thread.Sleep(200);
                System.Console.Out.WriteLine("Waiting for container to be allocated.");
            }
            nm1.NodeHeartbeat(am2.GetApplicationAttemptId(), 2, ContainerState.Running);
            ContainerId am2ContainerId2 = ContainerId.NewContainerId(am2.GetApplicationAttemptId
                                                                         (), 2);

            rm1.WaitForState(nm1, am2ContainerId2, RMContainerState.Running);
            // fail am2.
            nm1.NodeHeartbeat(am2.GetApplicationAttemptId(), 1, ContainerState.Complete);
            am2.WaitForState(RMAppAttemptState.Failed);
            rm1.WaitForState(app1.GetApplicationId(), RMAppState.Accepted);
            // restart am
            MockAM am3 = MockRM.LaunchAM(app1, rm1, nm1);

            registerResponse = am3.RegisterAppAttempt();
            rm1.WaitForState(app1.GetApplicationId(), RMAppState.Running);
            // check am3 get the NM token from both am1 and am2;
            IList <NMToken> transferredTokens = registerResponse.GetNMTokensFromPreviousAttempts
                                                    ();

            NUnit.Framework.Assert.AreEqual(2, transferredTokens.Count);
            NUnit.Framework.Assert.IsTrue(transferredTokens.ContainsAll(expectedNMTokens));
            rm1.Stop();
        }
예제 #4
0
        /// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/>
        /// <exception cref="System.IO.IOException"/>
        public override AllocateResponse Allocate(float progressIndicator)
        {
            Preconditions.CheckArgument(progressIndicator >= 0, "Progress indicator should not be negative"
                                        );
            AllocateResponse        allocateResponse  = null;
            IList <ResourceRequest> askList           = null;
            IList <ContainerId>     releaseList       = null;
            AllocateRequest         allocateRequest   = null;
            IList <string>          blacklistToAdd    = new AList <string>();
            IList <string>          blacklistToRemove = new AList <string>();

            try
            {
                lock (this)
                {
                    askList = new AList <ResourceRequest>(ask.Count);
                    foreach (ResourceRequest r in ask)
                    {
                        // create a copy of ResourceRequest as we might change it while the
                        // RPC layer is using it to send info across
                        askList.AddItem(ResourceRequest.NewInstance(r.GetPriority(), r.GetResourceName(),
                                                                    r.GetCapability(), r.GetNumContainers(), r.GetRelaxLocality(), r.GetNodeLabelExpression
                                                                        ()));
                    }
                    releaseList = new AList <ContainerId>(release);
                    // optimistically clear this collection assuming no RPC failure
                    ask.Clear();
                    release.Clear();
                    Sharpen.Collections.AddAll(blacklistToAdd, blacklistAdditions);
                    Sharpen.Collections.AddAll(blacklistToRemove, blacklistRemovals);
                    ResourceBlacklistRequest blacklistRequest = ResourceBlacklistRequest.NewInstance(
                        blacklistToAdd, blacklistToRemove);
                    allocateRequest = AllocateRequest.NewInstance(lastResponseId, progressIndicator,
                                                                  askList, releaseList, blacklistRequest);
                    // clear blacklistAdditions and blacklistRemovals before
                    // unsynchronized part
                    blacklistAdditions.Clear();
                    blacklistRemovals.Clear();
                }
                try
                {
                    allocateResponse = rmClient.Allocate(allocateRequest);
                }
                catch (ApplicationMasterNotRegisteredException)
                {
                    Log.Warn("ApplicationMaster is out of sync with ResourceManager," + " hence resyncing."
                             );
                    lock (this)
                    {
                        Sharpen.Collections.AddAll(release, this.pendingRelease);
                        Sharpen.Collections.AddAll(blacklistAdditions, this.blacklistedNodes);
                        foreach (IDictionary <string, SortedDictionary <Resource, AMRMClientImpl.ResourceRequestInfo
                                                                        > > rr in remoteRequestsTable.Values)
                        {
                            foreach (IDictionary <Resource, AMRMClientImpl.ResourceRequestInfo> capabalities in
                                     rr.Values)
                            {
                                foreach (AMRMClientImpl.ResourceRequestInfo request in capabalities.Values)
                                {
                                    AddResourceRequestToAsk(request.remoteRequest);
                                }
                            }
                        }
                    }
                    // re register with RM
                    RegisterApplicationMaster();
                    allocateResponse = Allocate(progressIndicator);
                    return(allocateResponse);
                }
                lock (this)
                {
                    // update these on successful RPC
                    clusterNodeCount          = allocateResponse.GetNumClusterNodes();
                    lastResponseId            = allocateResponse.GetResponseId();
                    clusterAvailableResources = allocateResponse.GetAvailableResources();
                    if (!allocateResponse.GetNMTokens().IsEmpty())
                    {
                        PopulateNMTokens(allocateResponse.GetNMTokens());
                    }
                    if (allocateResponse.GetAMRMToken() != null)
                    {
                        UpdateAMRMToken(allocateResponse.GetAMRMToken());
                    }
                    if (!pendingRelease.IsEmpty() && !allocateResponse.GetCompletedContainersStatuses
                            ().IsEmpty())
                    {
                        RemovePendingReleaseRequests(allocateResponse.GetCompletedContainersStatuses());
                    }
                }
            }
            finally
            {
                // TODO how to differentiate remote yarn exception vs error in rpc
                if (allocateResponse == null)
                {
                    // we hit an exception in allocate()
                    // preserve ask and release for next call to allocate()
                    lock (this)
                    {
                        Sharpen.Collections.AddAll(release, releaseList);
                        // requests could have been added or deleted during call to allocate
                        // If requests were added/removed then there is nothing to do since
                        // the ResourceRequest object in ask would have the actual new value.
                        // If ask does not have this ResourceRequest then it was unchanged and
                        // so we can add the value back safely.
                        // This assumes that there will no concurrent calls to allocate() and
                        // so we dont have to worry about ask being changed in the
                        // synchronized block at the beginning of this method.
                        foreach (ResourceRequest oldAsk in askList)
                        {
                            if (!ask.Contains(oldAsk))
                            {
                                ask.AddItem(oldAsk);
                            }
                        }
                        Sharpen.Collections.AddAll(blacklistAdditions, blacklistToAdd);
                        Sharpen.Collections.AddAll(blacklistRemovals, blacklistToRemove);
                    }
                }
            }
            return(allocateResponse);
        }