Example #1
0
        /// <exception cref="System.Exception"/>
        public virtual void TestInvalidContainerReleaseRequest()
        {
            MockRM rm = new MockRM(conf);

            try
            {
                rm.Start();
                // Register node1
                MockNM nm1 = rm.RegisterNode("127.0.0.1:1234", 6 * Gb);
                // Submit an application
                RMApp app1 = rm.SubmitApp(1024);
                // kick the scheduling
                nm1.NodeHeartbeat(true);
                RMAppAttempt attempt1 = app1.GetCurrentAppAttempt();
                MockAM       am1      = rm.SendAMLaunched(attempt1.GetAppAttemptId());
                am1.RegisterAppAttempt();
                am1.AddRequests(new string[] { "127.0.0.1" }, Gb, 1, 1);
                AllocateResponse alloc1Response = am1.Schedule();
                // send the request
                // kick the scheduler
                nm1.NodeHeartbeat(true);
                while (alloc1Response.GetAllocatedContainers().Count < 1)
                {
                    Log.Info("Waiting for containers to be created for app 1...");
                    Sharpen.Thread.Sleep(1000);
                    alloc1Response = am1.Schedule();
                }
                NUnit.Framework.Assert.IsTrue(alloc1Response.GetAllocatedContainers().Count > 0);
                RMApp app2 = rm.SubmitApp(1024);
                nm1.NodeHeartbeat(true);
                RMAppAttempt attempt2 = app2.GetCurrentAppAttempt();
                MockAM       am2      = rm.SendAMLaunched(attempt2.GetAppAttemptId());
                am2.RegisterAppAttempt();
                // Now trying to release container allocated for app1 -> appAttempt1.
                ContainerId cId = alloc1Response.GetAllocatedContainers()[0].GetId();
                am2.AddContainerToBeReleased(cId);
                try
                {
                    am2.Schedule();
                    NUnit.Framework.Assert.Fail("Exception was expected!!");
                }
                catch (InvalidContainerReleaseException e)
                {
                    StringBuilder sb = new StringBuilder("Cannot release container : ");
                    sb.Append(cId.ToString());
                    sb.Append(" not belonging to this application attempt : ");
                    sb.Append(attempt2.GetAppAttemptId().ToString());
                    NUnit.Framework.Assert.IsTrue(e.Message.Contains(sb.ToString()));
                }
            }
            finally
            {
                if (rm != null)
                {
                    rm.Stop();
                }
            }
        }
        /// <exception cref="System.Exception"/>
        public virtual void TestAppCleanupWhenRMRestartedBeforeAppFinished()
        {
            conf.SetInt(YarnConfiguration.RmAmMaxAttempts, 1);
            MemoryRMStateStore memStore = new MemoryRMStateStore();

            memStore.Init(conf);
            // start RM
            MockRM rm1 = new MockRM(conf, memStore);

            rm1.Start();
            MockNM nm1 = new MockNM("127.0.0.1:1234", 1024, rm1.GetResourceTrackerService());

            nm1.RegisterNode();
            MockNM nm2 = new MockNM("127.0.0.1:5678", 1024, rm1.GetResourceTrackerService());

            nm2.RegisterNode();
            // create app and launch the AM
            RMApp  app0 = rm1.SubmitApp(200);
            MockAM am0  = LaunchAM(app0, rm1, nm1);
            // alloc another container on nm2
            AllocateResponse allocResponse = am0.Allocate(Arrays.AsList(ResourceRequest.NewInstance
                                                                            (Priority.NewInstance(1), "*", Resource.NewInstance(1024, 0), 1)), null);

            while (null == allocResponse.GetAllocatedContainers() || allocResponse.GetAllocatedContainers
                       ().IsEmpty())
            {
                nm2.NodeHeartbeat(true);
                allocResponse = am0.Allocate(null, null);
                Sharpen.Thread.Sleep(1000);
            }
            // start new RM
            MockRM rm2 = new MockRM(conf, memStore);

            rm2.Start();
            // nm1/nm2 register to rm2, and do a heartbeat
            nm1.SetResourceTrackerService(rm2.GetResourceTrackerService());
            nm1.RegisterNode(Arrays.AsList(NMContainerStatus.NewInstance(ContainerId.NewContainerId
                                                                             (am0.GetApplicationAttemptId(), 1), ContainerState.Complete, Resource.NewInstance
                                                                             (1024, 1), string.Empty, 0, Priority.NewInstance(0), 1234)), Arrays.AsList(app0.
                                                                                                                                                        GetApplicationId()));
            nm2.SetResourceTrackerService(rm2.GetResourceTrackerService());
            nm2.RegisterNode(Arrays.AsList(app0.GetApplicationId()));
            // assert app state has been saved.
            rm2.WaitForState(app0.GetApplicationId(), RMAppState.Failed);
            // wait for application cleanup message received on NM1
            WaitForAppCleanupMessageRecved(nm1, app0.GetApplicationId());
            // wait for application cleanup message received on NM2
            WaitForAppCleanupMessageRecved(nm2, app0.GetApplicationId());
            rm1.Stop();
            rm2.Stop();
        }
Example #3
0
        /// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/>
        /// <exception cref="System.IO.IOException"/>
        private ICollection <Container> AllocateContainers(AMRMClientImpl <AMRMClient.ContainerRequest
                                                                           > rmClient, int num)
        {
            // setup container request
            Resource capability = Resource.NewInstance(1024, 0);
            Priority priority   = Priority.NewInstance(0);
            string   node       = nodeReports[0].GetNodeId().GetHost();
            string   rack       = nodeReports[0].GetRackName();

            string[] nodes = new string[] { node };
            string[] racks = new string[] { rack };
            for (int i = 0; i < num; ++i)
            {
                rmClient.AddContainerRequest(new AMRMClient.ContainerRequest(capability, nodes, racks
                                                                             , priority));
            }
            int containersRequestedAny = rmClient.remoteRequestsTable[priority][ResourceRequest
                                                                                .Any][capability].remoteRequest.GetNumContainers();
            // RM should allocate container within 2 calls to allocate()
            int allocatedContainerCount        = 0;
            int iterationsLeft                 = 2;
            ICollection <Container> containers = new TreeSet <Container>();

            while (allocatedContainerCount < containersRequestedAny && iterationsLeft > 0)
            {
                AllocateResponse allocResponse = rmClient.Allocate(0.1f);
                allocatedContainerCount += allocResponse.GetAllocatedContainers().Count;
                foreach (Container container in allocResponse.GetAllocatedContainers())
                {
                    containers.AddItem(container);
                }
                if (!allocResponse.GetNMTokens().IsEmpty())
                {
                    foreach (NMToken token in allocResponse.GetNMTokens())
                    {
                        rmClient.GetNMTokenCache().SetToken(token.GetNodeId().ToString(), token.GetToken(
                                                                ));
                    }
                }
                if (allocatedContainerCount < containersRequestedAny)
                {
                    // sleep to let NM's heartbeat to RM and trigger allocations
                    Sleep(1000);
                }
                --iterationsLeft;
            }
            return(containers);
        }
Example #4
0
        // Test even if AM container is allocated with containerId not equal to 1, the
        // following allocate requests from AM should be able to retrieve the
        // corresponding NM Token.
        /// <exception cref="System.Exception"/>
        public virtual void TestNMTokenSentForNormalContainer()
        {
            conf.Set(YarnConfiguration.RmScheduler, typeof(CapacityScheduler).GetCanonicalName
                         ());
            MockRM rm = new MockRM(conf);

            rm.Start();
            MockNM       nm1     = rm.RegisterNode("h1:1234", 5120);
            RMApp        app     = rm.SubmitApp(2000);
            RMAppAttempt attempt = app.GetCurrentAppAttempt();
            // Call getNewContainerId to increase container Id so that the AM container
            // Id doesn't equal to one.
            CapacityScheduler cs = (CapacityScheduler)rm.GetResourceScheduler();

            cs.GetApplicationAttempt(attempt.GetAppAttemptId()).GetNewContainerId();
            // kick the scheduling
            nm1.NodeHeartbeat(true);
            MockAM am = MockRM.LaunchAM(app, rm, nm1);

            // am container Id not equal to 1.
            NUnit.Framework.Assert.IsTrue(attempt.GetMasterContainer().GetId().GetContainerId
                                              () != 1);
            // NMSecretManager doesn't record the node on which the am is allocated.
            NUnit.Framework.Assert.IsFalse(rm.GetRMContext().GetNMTokenSecretManager().IsApplicationAttemptNMTokenPresent
                                               (attempt.GetAppAttemptId(), nm1.GetNodeId()));
            am.RegisterAppAttempt();
            rm.WaitForState(app.GetApplicationId(), RMAppState.Running);
            int NumContainers            = 1;
            IList <Container> containers = new AList <Container>();
            // nmTokens keeps track of all the nmTokens issued in the allocate call.
            IList <NMToken> expectedNMTokens = new AList <NMToken>();

            // am1 allocate 1 container on nm1.
            while (true)
            {
                AllocateResponse response = am.Allocate("127.0.0.1", 2000, NumContainers, new AList
                                                        <ContainerId>());
                nm1.NodeHeartbeat(true);
                Sharpen.Collections.AddAll(containers, response.GetAllocatedContainers());
                Sharpen.Collections.AddAll(expectedNMTokens, response.GetNMTokens());
                if (containers.Count == NumContainers)
                {
                    break;
                }
                Sharpen.Thread.Sleep(200);
                System.Console.Out.WriteLine("Waiting for container to be allocated.");
            }
            NodeId nodeId = expectedNMTokens[0].GetNodeId();

            // NMToken is sent for the allocated container.
            NUnit.Framework.Assert.AreEqual(nm1.GetNodeId(), nodeId);
        }
Example #5
0
        /// <exception cref="System.Exception"/>
        public virtual void TestRMIdentifierOnContainerAllocation()
        {
            MockRM rm = new MockRM(conf);

            rm.Start();
            // Register node1
            MockNM nm1 = rm.RegisterNode("127.0.0.1:1234", 6 * Gb);
            // Submit an application
            RMApp app1 = rm.SubmitApp(2048);

            // kick the scheduling
            nm1.NodeHeartbeat(true);
            RMAppAttempt attempt1 = app1.GetCurrentAppAttempt();
            MockAM       am1      = rm.SendAMLaunched(attempt1.GetAppAttemptId());

            am1.RegisterAppAttempt();
            am1.AddRequests(new string[] { "127.0.0.1" }, Gb, 1, 1);
            AllocateResponse alloc1Response = am1.Schedule();

            // send the request
            // kick the scheduler
            nm1.NodeHeartbeat(true);
            while (alloc1Response.GetAllocatedContainers().Count < 1)
            {
                Log.Info("Waiting for containers to be created for app 1...");
                Sharpen.Thread.Sleep(1000);
                alloc1Response = am1.Schedule();
            }
            // assert RMIdentifer is set properly in allocated containers
            Container allocatedContainer     = alloc1Response.GetAllocatedContainers()[0];
            ContainerTokenIdentifier tokenId = BuilderUtils.NewContainerTokenIdentifier(allocatedContainer
                                                                                        .GetContainerToken());

            NUnit.Framework.Assert.AreEqual(MockRM.GetClusterTimeStamp(), tokenId.GetRMIdentifier
                                                ());
            rm.Stop();
        }
        /// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/>
        /// <exception cref="System.IO.IOException"/>
        protected internal virtual AllocateResponse MakeRemoteRequest()
        {
            ApplyRequestLimits();
            ResourceBlacklistRequest blacklistRequest = ResourceBlacklistRequest.NewInstance(
                new AList <string>(blacklistAdditions), new AList <string>(blacklistRemovals));
            AllocateRequest allocateRequest = AllocateRequest.NewInstance(lastResponseID, base
                                                                          .GetApplicationProgress(), new AList <ResourceRequest>(ask), new AList <ContainerId
                                                                                                                                                  >(release), blacklistRequest);
            AllocateResponse allocateResponse = scheduler.Allocate(allocateRequest);

            lastResponseID     = allocateResponse.GetResponseId();
            availableResources = allocateResponse.GetAvailableResources();
            lastClusterNmCount = clusterNmCount;
            clusterNmCount     = allocateResponse.GetNumClusterNodes();
            int numCompletedContainers = allocateResponse.GetCompletedContainersStatuses().Count;

            if (ask.Count > 0 || release.Count > 0)
            {
                Log.Info("getResources() for " + applicationId + ":" + " ask=" + ask.Count + " release= "
                         + release.Count + " newContainers=" + allocateResponse.GetAllocatedContainers()
                         .Count + " finishedContainers=" + numCompletedContainers + " resourcelimit=" + availableResources
                         + " knownNMs=" + clusterNmCount);
            }
            ask.Clear();
            release.Clear();
            if (numCompletedContainers > 0)
            {
                // re-send limited requests when a container completes to trigger asking
                // for more containers
                Sharpen.Collections.AddAll(requestLimitsToUpdate, requestLimits.Keys);
            }
            if (blacklistAdditions.Count > 0 || blacklistRemovals.Count > 0)
            {
                Log.Info("Update the blacklist for " + applicationId + ": blacklistAdditions=" +
                         blacklistAdditions.Count + " blacklistRemovals=" + blacklistRemovals.Count);
            }
            blacklistAdditions.Clear();
            blacklistRemovals.Clear();
            return(allocateResponse);
        }
Example #7
0
        /// <exception cref="System.Exception"/>
        public virtual void TestNMTokensRebindOnAMRestart()
        {
            YarnConfiguration conf = new YarnConfiguration();

            conf.SetInt(YarnConfiguration.RmAmMaxAttempts, 3);
            MockRM rm1 = new MockRM(conf);

            rm1.Start();
            RMApp app1 = rm1.SubmitApp(200, "myname", "myuser", new Dictionary <ApplicationAccessType
                                                                                , string>(), false, "default", -1, null, "MAPREDUCE", false, true);
            MockNM nm1 = new MockNM("127.0.0.1:1234", 8000, rm1.GetResourceTrackerService());

            nm1.RegisterNode();
            MockNM nm2 = new MockNM("127.1.1.1:4321", 8000, rm1.GetResourceTrackerService());

            nm2.RegisterNode();
            MockAM            am1        = MockRM.LaunchAndRegisterAM(app1, rm1, nm1);
            IList <Container> containers = new AList <Container>();
            // nmTokens keeps track of all the nmTokens issued in the allocate call.
            IList <NMToken> expectedNMTokens = new AList <NMToken>();

            // am1 allocate 2 container on nm1.
            // first container
            while (true)
            {
                AllocateResponse response = am1.Allocate("127.0.0.1", 2000, 2, new AList <ContainerId
                                                                                          >());
                nm1.NodeHeartbeat(true);
                Sharpen.Collections.AddAll(containers, response.GetAllocatedContainers());
                Sharpen.Collections.AddAll(expectedNMTokens, response.GetNMTokens());
                if (containers.Count == 2)
                {
                    break;
                }
                Sharpen.Thread.Sleep(200);
                System.Console.Out.WriteLine("Waiting for container to be allocated.");
            }
            // launch the container-2
            nm1.NodeHeartbeat(am1.GetApplicationAttemptId(), 2, ContainerState.Running);
            ContainerId containerId2 = ContainerId.NewContainerId(am1.GetApplicationAttemptId
                                                                      (), 2);

            rm1.WaitForState(nm1, containerId2, RMContainerState.Running);
            // launch the container-3
            nm1.NodeHeartbeat(am1.GetApplicationAttemptId(), 3, ContainerState.Running);
            ContainerId containerId3 = ContainerId.NewContainerId(am1.GetApplicationAttemptId
                                                                      (), 3);

            rm1.WaitForState(nm1, containerId3, RMContainerState.Running);
            // fail am1
            nm1.NodeHeartbeat(am1.GetApplicationAttemptId(), 1, ContainerState.Complete);
            am1.WaitForState(RMAppAttemptState.Failed);
            rm1.WaitForState(app1.GetApplicationId(), RMAppState.Accepted);
            // restart the am
            MockAM am2 = MockRM.LaunchAM(app1, rm1, nm1);
            RegisterApplicationMasterResponse registerResponse = am2.RegisterAppAttempt();

            rm1.WaitForState(app1.GetApplicationId(), RMAppState.Running);
            // check am2 get the nm token from am1.
            NUnit.Framework.Assert.AreEqual(expectedNMTokens, registerResponse.GetNMTokensFromPreviousAttempts
                                                ());
            // am2 allocate 1 container on nm2
            containers = new AList <Container>();
            while (true)
            {
                AllocateResponse allocateResponse = am2.Allocate("127.1.1.1", 4000, 1, new AList <
                                                                     ContainerId>());
                nm2.NodeHeartbeat(true);
                Sharpen.Collections.AddAll(containers, allocateResponse.GetAllocatedContainers());
                Sharpen.Collections.AddAll(expectedNMTokens, allocateResponse.GetNMTokens());
                if (containers.Count == 1)
                {
                    break;
                }
                Sharpen.Thread.Sleep(200);
                System.Console.Out.WriteLine("Waiting for container to be allocated.");
            }
            nm1.NodeHeartbeat(am2.GetApplicationAttemptId(), 2, ContainerState.Running);
            ContainerId am2ContainerId2 = ContainerId.NewContainerId(am2.GetApplicationAttemptId
                                                                         (), 2);

            rm1.WaitForState(nm1, am2ContainerId2, RMContainerState.Running);
            // fail am2.
            nm1.NodeHeartbeat(am2.GetApplicationAttemptId(), 1, ContainerState.Complete);
            am2.WaitForState(RMAppAttemptState.Failed);
            rm1.WaitForState(app1.GetApplicationId(), RMAppState.Accepted);
            // restart am
            MockAM am3 = MockRM.LaunchAM(app1, rm1, nm1);

            registerResponse = am3.RegisterAppAttempt();
            rm1.WaitForState(app1.GetApplicationId(), RMAppState.Running);
            // check am3 get the NM token from both am1 and am2;
            IList <NMToken> transferredTokens = registerResponse.GetNMTokensFromPreviousAttempts
                                                    ();

            NUnit.Framework.Assert.AreEqual(2, transferredTokens.Count);
            NUnit.Framework.Assert.IsTrue(transferredTokens.ContainsAll(expectedNMTokens));
            rm1.Stop();
        }
Example #8
0
        /// <exception cref="System.Exception"/>
        public virtual void TestExcessReservationThanNodeManagerCapacity()
        {
            MockRM rm = new MockRM(conf);

            rm.Start();
            // Register node1
            MockNM nm1 = rm.RegisterNode("127.0.0.1:1234", 2 * Gb, 4);
            MockNM nm2 = rm.RegisterNode("127.0.0.1:2234", 3 * Gb, 4);

            nm1.NodeHeartbeat(true);
            nm2.NodeHeartbeat(true);
            // wait..
            int waitCount = 20;
            int size      = rm.GetRMContext().GetRMNodes().Count;

            while ((size = rm.GetRMContext().GetRMNodes().Count) != 2 && waitCount-- > 0)
            {
                Log.Info("Waiting for node managers to register : " + size);
                Sharpen.Thread.Sleep(100);
            }
            NUnit.Framework.Assert.AreEqual(2, rm.GetRMContext().GetRMNodes().Count);
            // Submit an application
            RMApp app1 = rm.SubmitApp(128);

            // kick the scheduling
            nm1.NodeHeartbeat(true);
            RMAppAttempt attempt1 = app1.GetCurrentAppAttempt();
            MockAM       am1      = rm.SendAMLaunched(attempt1.GetAppAttemptId());

            am1.RegisterAppAttempt();
            Log.Info("sending container requests ");
            am1.AddRequests(new string[] { "*" }, 2 * Gb, 1, 1);
            AllocateResponse alloc1Response = am1.Schedule();

            // send the request
            // kick the scheduler
            nm1.NodeHeartbeat(true);
            int waitCounter = 20;

            Log.Info("heartbeating nm1");
            while (alloc1Response.GetAllocatedContainers().Count < 1 && waitCounter-- > 0)
            {
                Log.Info("Waiting for containers to be created for app 1...");
                Sharpen.Thread.Sleep(500);
                alloc1Response = am1.Schedule();
            }
            Log.Info("received container : " + alloc1Response.GetAllocatedContainers().Count);
            // No container should be allocated.
            // Internally it should not been reserved.
            NUnit.Framework.Assert.IsTrue(alloc1Response.GetAllocatedContainers().Count == 0);
            Log.Info("heartbeating nm2");
            waitCounter = 20;
            nm2.NodeHeartbeat(true);
            while (alloc1Response.GetAllocatedContainers().Count < 1 && waitCounter-- > 0)
            {
                Log.Info("Waiting for containers to be created for app 1...");
                Sharpen.Thread.Sleep(500);
                alloc1Response = am1.Schedule();
            }
            Log.Info("received container : " + alloc1Response.GetAllocatedContainers().Count);
            NUnit.Framework.Assert.IsTrue(alloc1Response.GetAllocatedContainers().Count == 1);
            rm.Stop();
        }
Example #9
0
        public virtual void TestResourceOverCommit()
        {
            MockRM rm = new MockRM(conf);

            rm.Start();
            MockNM nm1  = rm.RegisterNode("127.0.0.1:1234", 4 * Gb);
            RMApp  app1 = rm.SubmitApp(2048);

            // kick the scheduling, 2 GB given to AM1, remaining 2GB on nm1
            nm1.NodeHeartbeat(true);
            RMAppAttempt attempt1 = app1.GetCurrentAppAttempt();
            MockAM       am1      = rm.SendAMLaunched(attempt1.GetAppAttemptId());

            am1.RegisterAppAttempt();
            SchedulerNodeReport report_nm1 = rm.GetResourceScheduler().GetNodeReport(nm1.GetNodeId
                                                                                         ());

            // check node report, 2 GB used and 2 GB available
            NUnit.Framework.Assert.AreEqual(2 * Gb, report_nm1.GetUsedResource().GetMemory());
            NUnit.Framework.Assert.AreEqual(2 * Gb, report_nm1.GetAvailableResource().GetMemory
                                                ());
            // add request for containers
            am1.AddRequests(new string[] { "127.0.0.1", "127.0.0.2" }, 2 * Gb, 1, 1);
            AllocateResponse alloc1Response = am1.Schedule();

            // send the request
            // kick the scheduler, 2 GB given to AM1, resource remaining 0
            nm1.NodeHeartbeat(true);
            while (alloc1Response.GetAllocatedContainers().Count < 1)
            {
                Log.Info("Waiting for containers to be created for app 1...");
                Sharpen.Thread.Sleep(1000);
                alloc1Response = am1.Schedule();
            }
            IList <Container> allocated1 = alloc1Response.GetAllocatedContainers();

            NUnit.Framework.Assert.AreEqual(1, allocated1.Count);
            NUnit.Framework.Assert.AreEqual(2 * Gb, allocated1[0].GetResource().GetMemory());
            NUnit.Framework.Assert.AreEqual(nm1.GetNodeId(), allocated1[0].GetNodeId());
            report_nm1 = rm.GetResourceScheduler().GetNodeReport(nm1.GetNodeId());
            // check node report, 4 GB used and 0 GB available
            NUnit.Framework.Assert.AreEqual(0, report_nm1.GetAvailableResource().GetMemory());
            NUnit.Framework.Assert.AreEqual(4 * Gb, report_nm1.GetUsedResource().GetMemory());
            // check container is assigned with 2 GB.
            Container c1 = allocated1[0];

            NUnit.Framework.Assert.AreEqual(2 * Gb, c1.GetResource().GetMemory());
            // update node resource to 2 GB, so resource is over-consumed.
            IDictionary <NodeId, ResourceOption> nodeResourceMap = new Dictionary <NodeId, ResourceOption
                                                                                   >();

            nodeResourceMap[nm1.GetNodeId()] = ResourceOption.NewInstance(Org.Apache.Hadoop.Yarn.Api.Records.Resource
                                                                          .NewInstance(2 * Gb, 1), -1);
            UpdateNodeResourceRequest request = UpdateNodeResourceRequest.NewInstance(nodeResourceMap
                                                                                      );
            AdminService @as = rm.adminService;

            @as.UpdateNodeResource(request);
            // Now, the used resource is still 4 GB, and available resource is minus value.
            report_nm1 = rm.GetResourceScheduler().GetNodeReport(nm1.GetNodeId());
            NUnit.Framework.Assert.AreEqual(4 * Gb, report_nm1.GetUsedResource().GetMemory());
            NUnit.Framework.Assert.AreEqual(-2 * Gb, report_nm1.GetAvailableResource().GetMemory
                                                ());
            // Check container can complete successfully in case of resource over-commitment.
            ContainerStatus containerStatus = BuilderUtils.NewContainerStatus(c1.GetId(), ContainerState
                                                                              .Complete, string.Empty, 0);

            nm1.ContainerStatus(containerStatus);
            int waitCount = 0;

            while (attempt1.GetJustFinishedContainers().Count < 1 && waitCount++ != 20)
            {
                Log.Info("Waiting for containers to be finished for app 1... Tried " + waitCount
                         + " times already..");
                Sharpen.Thread.Sleep(100);
            }
            NUnit.Framework.Assert.AreEqual(1, attempt1.GetJustFinishedContainers().Count);
            NUnit.Framework.Assert.AreEqual(1, am1.Schedule().GetCompletedContainersStatuses(
                                                ).Count);
            report_nm1 = rm.GetResourceScheduler().GetNodeReport(nm1.GetNodeId());
            NUnit.Framework.Assert.AreEqual(2 * Gb, report_nm1.GetUsedResource().GetMemory());
            // As container return 2 GB back, the available resource becomes 0 again.
            NUnit.Framework.Assert.AreEqual(0 * Gb, report_nm1.GetAvailableResource().GetMemory
                                                ());
            rm.Stop();
        }
Example #10
0
        public virtual void Test()
        {
            Logger rootLogger = LogManager.GetRootLogger();

            rootLogger.SetLevel(Level.Debug);
            MockRM rm = new MockRM(conf);

            rm.Start();
            MockNM nm1  = rm.RegisterNode("127.0.0.1:1234", 6 * Gb);
            MockNM nm2  = rm.RegisterNode("127.0.0.2:5678", 4 * Gb);
            RMApp  app1 = rm.SubmitApp(2048);

            // kick the scheduling, 2 GB given to AM1, remaining 4GB on nm1
            nm1.NodeHeartbeat(true);
            RMAppAttempt attempt1 = app1.GetCurrentAppAttempt();
            MockAM       am1      = rm.SendAMLaunched(attempt1.GetAppAttemptId());

            am1.RegisterAppAttempt();
            SchedulerNodeReport report_nm1 = rm.GetResourceScheduler().GetNodeReport(nm1.GetNodeId
                                                                                         ());

            NUnit.Framework.Assert.AreEqual(2 * Gb, report_nm1.GetUsedResource().GetMemory());
            RMApp app2 = rm.SubmitApp(2048);

            // kick the scheduling, 2GB given to AM, remaining 2 GB on nm2
            nm2.NodeHeartbeat(true);
            RMAppAttempt attempt2 = app2.GetCurrentAppAttempt();
            MockAM       am2      = rm.SendAMLaunched(attempt2.GetAppAttemptId());

            am2.RegisterAppAttempt();
            SchedulerNodeReport report_nm2 = rm.GetResourceScheduler().GetNodeReport(nm2.GetNodeId
                                                                                         ());

            NUnit.Framework.Assert.AreEqual(2 * Gb, report_nm2.GetUsedResource().GetMemory());
            // add request for containers
            am1.AddRequests(new string[] { "127.0.0.1", "127.0.0.2" }, Gb, 1, 1);
            AllocateResponse alloc1Response = am1.Schedule();

            // send the request
            // add request for containers
            am2.AddRequests(new string[] { "127.0.0.1", "127.0.0.2" }, 3 * Gb, 0, 1);
            AllocateResponse alloc2Response = am2.Schedule();

            // send the request
            // kick the scheduler, 1 GB and 3 GB given to AM1 and AM2, remaining 0
            nm1.NodeHeartbeat(true);
            while (alloc1Response.GetAllocatedContainers().Count < 1)
            {
                Log.Info("Waiting for containers to be created for app 1...");
                Sharpen.Thread.Sleep(1000);
                alloc1Response = am1.Schedule();
            }
            while (alloc2Response.GetAllocatedContainers().Count < 1)
            {
                Log.Info("Waiting for containers to be created for app 2...");
                Sharpen.Thread.Sleep(1000);
                alloc2Response = am2.Schedule();
            }
            // kick the scheduler, nothing given remaining 2 GB.
            nm2.NodeHeartbeat(true);
            IList <Container> allocated1 = alloc1Response.GetAllocatedContainers();

            NUnit.Framework.Assert.AreEqual(1, allocated1.Count);
            NUnit.Framework.Assert.AreEqual(1 * Gb, allocated1[0].GetResource().GetMemory());
            NUnit.Framework.Assert.AreEqual(nm1.GetNodeId(), allocated1[0].GetNodeId());
            IList <Container> allocated2 = alloc2Response.GetAllocatedContainers();

            NUnit.Framework.Assert.AreEqual(1, allocated2.Count);
            NUnit.Framework.Assert.AreEqual(3 * Gb, allocated2[0].GetResource().GetMemory());
            NUnit.Framework.Assert.AreEqual(nm1.GetNodeId(), allocated2[0].GetNodeId());
            report_nm1 = rm.GetResourceScheduler().GetNodeReport(nm1.GetNodeId());
            report_nm2 = rm.GetResourceScheduler().GetNodeReport(nm2.GetNodeId());
            NUnit.Framework.Assert.AreEqual(0, report_nm1.GetAvailableResource().GetMemory());
            NUnit.Framework.Assert.AreEqual(2 * Gb, report_nm2.GetAvailableResource().GetMemory
                                                ());
            NUnit.Framework.Assert.AreEqual(6 * Gb, report_nm1.GetUsedResource().GetMemory());
            NUnit.Framework.Assert.AreEqual(2 * Gb, report_nm2.GetUsedResource().GetMemory());
            Container c1 = allocated1[0];

            NUnit.Framework.Assert.AreEqual(Gb, c1.GetResource().GetMemory());
            ContainerStatus containerStatus = BuilderUtils.NewContainerStatus(c1.GetId(), ContainerState
                                                                              .Complete, string.Empty, 0);

            nm1.ContainerStatus(containerStatus);
            int waitCount = 0;

            while (attempt1.GetJustFinishedContainers().Count < 1 && waitCount++ != 20)
            {
                Log.Info("Waiting for containers to be finished for app 1... Tried " + waitCount
                         + " times already..");
                Sharpen.Thread.Sleep(1000);
            }
            NUnit.Framework.Assert.AreEqual(1, attempt1.GetJustFinishedContainers().Count);
            NUnit.Framework.Assert.AreEqual(1, am1.Schedule().GetCompletedContainersStatuses(
                                                ).Count);
            report_nm1 = rm.GetResourceScheduler().GetNodeReport(nm1.GetNodeId());
            NUnit.Framework.Assert.AreEqual(5 * Gb, report_nm1.GetUsedResource().GetMemory());
            rm.Stop();
        }
        // Test does major 6 steps verification.
        // Step-1 : AMRMClient send allocate request for 2 container requests
        // Step-2 : 2 containers are allocated by RM.
        // Step-3 : AM Send 1 containerRequest(cRequest3) and 1 releaseRequests to
        // RM
        // Step-4 : On RM restart, AM(does not know RM is restarted) sends additional
        // containerRequest(cRequest4) and blacklisted nodes.
        // Intern RM send resync command
        // Step-5 : Allocater after resync command & new containerRequest(cRequest5)
        // Step-6 : RM allocates containers i.e cRequest3,cRequest4 and cRequest5
        /// <exception cref="System.Exception"/>
        public virtual void TestAMRMClientResendsRequestsOnRMRestart()
        {
            UserGroupInformation.SetLoginUser(null);
            MemoryRMStateStore memStore = new MemoryRMStateStore();

            memStore.Init(conf);
            // Phase-1 Start 1st RM
            TestAMRMClientOnRMRestart.MyResourceManager rm1 = new TestAMRMClientOnRMRestart.MyResourceManager
                                                                  (conf, memStore);
            rm1.Start();
            DrainDispatcher dispatcher = (DrainDispatcher)rm1.GetRMContext().GetDispatcher();
            // Submit the application
            RMApp app = rm1.SubmitApp(1024);

            dispatcher.Await();
            MockNM nm1 = new MockNM("h1:1234", 15120, rm1.GetResourceTrackerService());

            nm1.RegisterNode();
            nm1.NodeHeartbeat(true);
            // Node heartbeat
            dispatcher.Await();
            ApplicationAttemptId appAttemptId = app.GetCurrentAppAttempt().GetAppAttemptId();

            rm1.SendAMLaunched(appAttemptId);
            dispatcher.Await();
            Org.Apache.Hadoop.Security.Token.Token <AMRMTokenIdentifier> token = rm1.GetRMContext
                                                                                     ().GetRMApps()[appAttemptId.GetApplicationId()].GetRMAppAttempt(appAttemptId).GetAMRMToken
                                                                                     ();
            UserGroupInformation ugi = UserGroupInformation.GetCurrentUser();

            ugi.AddTokenIdentifier(token.DecodeIdentifier());
            // Step-1 : AMRMClient send allocate request for 2 ContainerRequest
            // cRequest1 = h1 and cRequest2 = h1,h2
            // blacklisted nodes = h2
            AMRMClient <AMRMClient.ContainerRequest> amClient = new TestAMRMClientOnRMRestart.MyAMRMClientImpl
                                                                    (rm1);

            amClient.Init(conf);
            amClient.Start();
            amClient.RegisterApplicationMaster("Host", 10000, string.Empty);
            AMRMClient.ContainerRequest cRequest1 = CreateReq(1, 1024, new string[] { "h1" });
            amClient.AddContainerRequest(cRequest1);
            AMRMClient.ContainerRequest cRequest2 = CreateReq(1, 1024, new string[] { "h1", "h2" });
            amClient.AddContainerRequest(cRequest2);
            IList <string> blacklistAdditions = new AList <string>();
            IList <string> blacklistRemoval   = new AList <string>();

            blacklistAdditions.AddItem("h2");
            blacklistRemoval.AddItem("h10");
            amClient.UpdateBlacklist(blacklistAdditions, blacklistRemoval);
            blacklistAdditions.Remove("h2");
            // remove from local list
            AllocateResponse allocateResponse = amClient.Allocate(0.1f);

            dispatcher.Await();
            NUnit.Framework.Assert.AreEqual("No of assignments must be 0", 0, allocateResponse
                                            .GetAllocatedContainers().Count);
            // Why 4 ask, why not 3 ask even h2 is blacklisted?
            // On blacklisting host,applicationmaster has to remove ask request from
            // remoterequest table.Here,test does not remove explicitely
            AssertAsksAndReleases(4, 0, rm1);
            AssertBlacklistAdditionsAndRemovals(1, 1, rm1);
            // Step-2 : NM heart beat is sent.
            // On 2nd AM allocate request, RM allocates 2 containers to AM
            nm1.NodeHeartbeat(true);
            // Node heartbeat
            dispatcher.Await();
            allocateResponse = amClient.Allocate(0.2f);
            dispatcher.Await();
            // 2 containers are allocated i.e for cRequest1 and cRequest2.
            NUnit.Framework.Assert.AreEqual("No of assignments must be 0", 2, allocateResponse
                                            .GetAllocatedContainers().Count);
            AssertAsksAndReleases(0, 0, rm1);
            AssertBlacklistAdditionsAndRemovals(0, 0, rm1);
            IList <Container> allocatedContainers = allocateResponse.GetAllocatedContainers();

            // removed allocated container requests
            amClient.RemoveContainerRequest(cRequest1);
            amClient.RemoveContainerRequest(cRequest2);
            allocateResponse = amClient.Allocate(0.2f);
            dispatcher.Await();
            NUnit.Framework.Assert.AreEqual("No of assignments must be 0", 0, allocateResponse
                                            .GetAllocatedContainers().Count);
            AssertAsksAndReleases(4, 0, rm1);
            AssertBlacklistAdditionsAndRemovals(0, 0, rm1);
            // Step-3 : Send 1 containerRequest and 1 releaseRequests to RM
            AMRMClient.ContainerRequest cRequest3 = CreateReq(1, 1024, new string[] { "h1" });
            amClient.AddContainerRequest(cRequest3);
            int pendingRelease         = 0;
            IEnumerator <Container> it = allocatedContainers.GetEnumerator();

            while (it.HasNext())
            {
                amClient.ReleaseAssignedContainer(it.Next().GetId());
                pendingRelease++;
                it.Remove();
                break;
            }
            // remove one container
            allocateResponse = amClient.Allocate(0.3f);
            dispatcher.Await();
            NUnit.Framework.Assert.AreEqual("No of assignments must be 0", 0, allocateResponse
                                            .GetAllocatedContainers().Count);
            AssertAsksAndReleases(3, pendingRelease, rm1);
            AssertBlacklistAdditionsAndRemovals(0, 0, rm1);
            int completedContainer = allocateResponse.GetCompletedContainersStatuses().Count;

            pendingRelease -= completedContainer;
            // Phase-2 start 2nd RM is up
            TestAMRMClientOnRMRestart.MyResourceManager rm2 = new TestAMRMClientOnRMRestart.MyResourceManager
                                                                  (conf, memStore);
            rm2.Start();
            nm1.SetResourceTrackerService(rm2.GetResourceTrackerService());
            ((TestAMRMClientOnRMRestart.MyAMRMClientImpl)amClient).UpdateRMProxy(rm2);
            dispatcher = (DrainDispatcher)rm2.GetRMContext().GetDispatcher();
            // NM should be rebooted on heartbeat, even first heartbeat for nm2
            NodeHeartbeatResponse hbResponse = nm1.NodeHeartbeat(true);

            NUnit.Framework.Assert.AreEqual(NodeAction.Resync, hbResponse.GetNodeAction());
            // new NM to represent NM re-register
            nm1 = new MockNM("h1:1234", 10240, rm2.GetResourceTrackerService());
            nm1.RegisterNode();
            nm1.NodeHeartbeat(true);
            dispatcher.Await();
            blacklistAdditions.AddItem("h3");
            amClient.UpdateBlacklist(blacklistAdditions, null);
            blacklistAdditions.Remove("h3");
            it = allocatedContainers.GetEnumerator();
            while (it.HasNext())
            {
                amClient.ReleaseAssignedContainer(it.Next().GetId());
                pendingRelease++;
                it.Remove();
            }
            AMRMClient.ContainerRequest cRequest4 = CreateReq(1, 1024, new string[] { "h1", "h2" });
            amClient.AddContainerRequest(cRequest4);
            // Step-4 : On RM restart, AM(does not know RM is restarted) sends
            // additional
            // containerRequest and blacklisted nodes.
            // Intern RM send resync command,AMRMClient resend allocate request
            allocateResponse = amClient.Allocate(0.3f);
            dispatcher.Await();
            completedContainer = allocateResponse.GetCompletedContainersStatuses().Count;
            pendingRelease    -= completedContainer;
            AssertAsksAndReleases(4, pendingRelease, rm2);
            AssertBlacklistAdditionsAndRemovals(2, 0, rm2);
            AMRMClient.ContainerRequest cRequest5 = CreateReq(1, 1024, new string[] { "h1", "h2"
                                                                                      , "h3" });
            amClient.AddContainerRequest(cRequest5);
            // Step-5 : Allocater after resync command
            allocateResponse = amClient.Allocate(0.5f);
            dispatcher.Await();
            NUnit.Framework.Assert.AreEqual("No of assignments must be 0", 0, allocateResponse
                                            .GetAllocatedContainers().Count);
            AssertAsksAndReleases(5, 0, rm2);
            AssertBlacklistAdditionsAndRemovals(0, 0, rm2);
            int noAssignedContainer = 0;
            int count = 5;

            while (count-- > 0)
            {
                nm1.NodeHeartbeat(true);
                dispatcher.Await();
                allocateResponse = amClient.Allocate(0.5f);
                dispatcher.Await();
                noAssignedContainer += allocateResponse.GetAllocatedContainers().Count;
                if (noAssignedContainer == 3)
                {
                    break;
                }
                Sharpen.Thread.Sleep(1000);
            }
            // Step-6 : RM allocates containers i.e cRequest3,cRequest4 and cRequest5
            NUnit.Framework.Assert.AreEqual("Number of container should be 3", 3, noAssignedContainer
                                            );
            amClient.Stop();
            rm1.Stop();
            rm2.Stop();
        }
Example #12
0
        /// <exception cref="System.Exception"/>
        public virtual void TestNMToken()
        {
            MockRM rm = new MockRM(conf);

            try
            {
                rm.Start();
                MockNM nm1 = rm.RegisterNode("h1:1234", 10000);
                NMTokenSecretManagerInRM nmTokenSecretManager = rm.GetRMContext().GetNMTokenSecretManager
                                                                    ();
                // submitting new application
                RMApp app = rm.SubmitApp(1000);
                // start scheduling.
                nm1.NodeHeartbeat(true);
                // Starting application attempt and launching
                // It should get registered with NMTokenSecretManager.
                RMAppAttempt attempt = app.GetCurrentAppAttempt();
                MockAM       am      = rm.SendAMLaunched(attempt.GetAppAttemptId());
                NUnit.Framework.Assert.IsTrue(nmTokenSecretManager.IsApplicationAttemptRegistered
                                                  (attempt.GetAppAttemptId()));
                // This will register application master.
                am.RegisterAppAttempt();
                AList <Container>          containersReceivedForNM1 = new AList <Container>();
                IList <ContainerId>        releaseContainerList     = new AList <ContainerId>();
                Dictionary <string, Token> nmTokens = new Dictionary <string, Token>();
                // initially requesting 2 containers.
                AllocateResponse response = am.Allocate("h1", 1000, 2, releaseContainerList);
                NUnit.Framework.Assert.AreEqual(0, response.GetAllocatedContainers().Count);
                AllocateContainersAndValidateNMTokens(am, containersReceivedForNM1, 2, nmTokens,
                                                      nm1);
                NUnit.Framework.Assert.AreEqual(1, nmTokens.Count);
                // requesting 2 more containers.
                response = am.Allocate("h1", 1000, 2, releaseContainerList);
                NUnit.Framework.Assert.AreEqual(0, response.GetAllocatedContainers().Count);
                AllocateContainersAndValidateNMTokens(am, containersReceivedForNM1, 4, nmTokens,
                                                      nm1);
                NUnit.Framework.Assert.AreEqual(1, nmTokens.Count);
                // We will be simulating NM restart so restarting newly added h2:1234
                // NM 2 now registers.
                MockNM nm2 = rm.RegisterNode("h2:1234", 10000);
                nm2.NodeHeartbeat(true);
                AList <Container> containersReceivedForNM2 = new AList <Container>();
                response = am.Allocate("h2", 1000, 2, releaseContainerList);
                NUnit.Framework.Assert.AreEqual(0, response.GetAllocatedContainers().Count);
                AllocateContainersAndValidateNMTokens(am, containersReceivedForNM2, 2, nmTokens,
                                                      nm2);
                NUnit.Framework.Assert.AreEqual(2, nmTokens.Count);
                // Simulating NM-2 restart.
                nm2 = rm.RegisterNode("h2:1234", 10000);
                // Wait for reconnect to make it through the RM and create a new RMNode
                IDictionary <NodeId, RMNode> nodes = rm.GetRMContext().GetRMNodes();
                while (nodes[nm2.GetNodeId()].GetLastNodeHeartBeatResponse().GetResponseId() > 0)
                {
                    Sharpen.Thread.Sleep(WaitSleepMs);
                }
                int interval = 40;
                // Wait for nm Token to be cleared.
                while (nmTokenSecretManager.IsApplicationAttemptNMTokenPresent(attempt.GetAppAttemptId
                                                                                   (), nm2.GetNodeId()) && interval-- > 0)
                {
                    Log.Info("waiting for nmToken to be cleared for : " + nm2.GetNodeId());
                    Sharpen.Thread.Sleep(WaitSleepMs);
                }
                NUnit.Framework.Assert.IsTrue(nmTokenSecretManager.IsApplicationAttemptRegistered
                                                  (attempt.GetAppAttemptId()));
                // removing NMToken for h2:1234
                Sharpen.Collections.Remove(nmTokens, nm2.GetNodeId().ToString());
                NUnit.Framework.Assert.AreEqual(1, nmTokens.Count);
                // We should again receive the NMToken.
                response = am.Allocate("h2", 1000, 2, releaseContainerList);
                NUnit.Framework.Assert.AreEqual(0, response.GetAllocatedContainers().Count);
                AllocateContainersAndValidateNMTokens(am, containersReceivedForNM2, 4, nmTokens,
                                                      nm2);
                NUnit.Framework.Assert.AreEqual(2, nmTokens.Count);
                // Now rolling over NMToken masterKey. it should resend the NMToken in
                // next allocate call.
                NUnit.Framework.Assert.IsTrue(nmTokenSecretManager.IsApplicationAttemptNMTokenPresent
                                                  (attempt.GetAppAttemptId(), nm1.GetNodeId()));
                NUnit.Framework.Assert.IsTrue(nmTokenSecretManager.IsApplicationAttemptNMTokenPresent
                                                  (attempt.GetAppAttemptId(), nm2.GetNodeId()));
                nmTokenSecretManager.RollMasterKey();
                nmTokenSecretManager.ActivateNextMasterKey();
                NUnit.Framework.Assert.IsFalse(nmTokenSecretManager.IsApplicationAttemptNMTokenPresent
                                                   (attempt.GetAppAttemptId(), nm1.GetNodeId()));
                NUnit.Framework.Assert.IsFalse(nmTokenSecretManager.IsApplicationAttemptNMTokenPresent
                                                   (attempt.GetAppAttemptId(), nm2.GetNodeId()));
                // It should not remove application attempt entry.
                NUnit.Framework.Assert.IsTrue(nmTokenSecretManager.IsApplicationAttemptRegistered
                                                  (attempt.GetAppAttemptId()));
                nmTokens.Clear();
                NUnit.Framework.Assert.AreEqual(0, nmTokens.Count);
                // We should again receive the NMToken.
                response = am.Allocate("h2", 1000, 1, releaseContainerList);
                NUnit.Framework.Assert.AreEqual(0, response.GetAllocatedContainers().Count);
                AllocateContainersAndValidateNMTokens(am, containersReceivedForNM2, 5, nmTokens,
                                                      nm2);
                NUnit.Framework.Assert.AreEqual(1, nmTokens.Count);
                NUnit.Framework.Assert.IsTrue(nmTokenSecretManager.IsApplicationAttemptNMTokenPresent
                                                  (attempt.GetAppAttemptId(), nm2.GetNodeId()));
                // After AM is finished making sure that nmtoken entry for app
                NUnit.Framework.Assert.IsTrue(nmTokenSecretManager.IsApplicationAttemptRegistered
                                                  (attempt.GetAppAttemptId()));
                am.UnregisterAppAttempt();
                // marking all the containers as finished.
                foreach (Container container in containersReceivedForNM1)
                {
                    nm1.NodeHeartbeat(attempt.GetAppAttemptId(), container.GetId().GetContainerId(),
                                      ContainerState.Complete);
                }
                foreach (Container container_1 in containersReceivedForNM2)
                {
                    nm2.NodeHeartbeat(attempt.GetAppAttemptId(), container_1.GetId().GetContainerId()
                                      , ContainerState.Complete);
                }
                nm1.NodeHeartbeat(am.GetApplicationAttemptId(), 1, ContainerState.Complete);
                am.WaitForState(RMAppAttemptState.Finished);
                NUnit.Framework.Assert.IsFalse(nmTokenSecretManager.IsApplicationAttemptRegistered
                                                   (attempt.GetAppAttemptId()));
            }
            finally
            {
                rm.Stop();
            }
        }