Exemplo n.º 1
0
        /// <exception cref="System.Exception"/>
        private void TestMinimumAllocation(YarnConfiguration conf, int testAlloc)
        {
            MockRM rm = new MockRM(conf);

            rm.Start();
            // Register node1
            MockNM nm1 = rm.RegisterNode("127.0.0.1:1234", 6 * Gb);
            // Submit an application
            RMApp app1 = rm.SubmitApp(testAlloc);

            // kick the scheduling
            nm1.NodeHeartbeat(true);
            RMAppAttempt attempt1 = app1.GetCurrentAppAttempt();
            MockAM       am1      = rm.SendAMLaunched(attempt1.GetAppAttemptId());

            am1.RegisterAppAttempt();
            SchedulerNodeReport report_nm1 = rm.GetResourceScheduler().GetNodeReport(nm1.GetNodeId
                                                                                         ());
            int checkAlloc = conf.GetInt(YarnConfiguration.RmSchedulerMinimumAllocationMb, YarnConfiguration
                                         .DefaultRmSchedulerMinimumAllocationMb);

            NUnit.Framework.Assert.AreEqual(checkAlloc, report_nm1.GetUsedResource().GetMemory
                                                ());
            rm.Stop();
        }
Exemplo n.º 2
0
        /// <exception cref="System.Exception"/>
        public virtual void TestAppOnMultiNode()
        {
            Logger rootLogger = LogManager.GetRootLogger();

            rootLogger.SetLevel(Level.Debug);
            conf.Set("yarn.scheduler.capacity.node-locality-delay", "-1");
            MockRM rm = new MockRM(conf);

            rm.Start();
            MockNM nm1 = rm.RegisterNode("h1:1234", 5120);
            MockNM nm2 = rm.RegisterNode("h2:5678", 10240);
            RMApp  app = rm.SubmitApp(2000);

            //kick the scheduling
            nm1.NodeHeartbeat(true);
            RMAppAttempt attempt = app.GetCurrentAppAttempt();
            MockAM       am      = rm.SendAMLaunched(attempt.GetAppAttemptId());

            am.RegisterAppAttempt();
            //request for containers
            int request = 13;

            am.Allocate("h1", 1000, request, new AList <ContainerId>());
            //kick the scheduler
            IList <Container> conts = am.Allocate(new AList <ResourceRequest>(), new AList <ContainerId
                                                                                            >()).GetAllocatedContainers();
            int contReceived = conts.Count;

            while (contReceived < 3)
            {
                //only 3 containers are available on node1
                nm1.NodeHeartbeat(true);
                Sharpen.Collections.AddAll(conts, am.Allocate(new AList <ResourceRequest>(), new AList
                                                              <ContainerId>()).GetAllocatedContainers());
                contReceived = conts.Count;
                Log.Info("Got " + contReceived + " containers. Waiting to get " + 3);
                Sharpen.Thread.Sleep(WaitSleepMs);
            }
            NUnit.Framework.Assert.AreEqual(3, conts.Count);
            //send node2 heartbeat
            conts = am.Allocate(new AList <ResourceRequest>(), new AList <ContainerId>()).GetAllocatedContainers
                        ();
            contReceived = conts.Count;
            while (contReceived < 10)
            {
                nm2.NodeHeartbeat(true);
                Sharpen.Collections.AddAll(conts, am.Allocate(new AList <ResourceRequest>(), new AList
                                                              <ContainerId>()).GetAllocatedContainers());
                contReceived = conts.Count;
                Log.Info("Got " + contReceived + " containers. Waiting to get " + 10);
                Sharpen.Thread.Sleep(WaitSleepMs);
            }
            NUnit.Framework.Assert.AreEqual(10, conts.Count);
            am.UnregisterAppAttempt();
            nm1.NodeHeartbeat(attempt.GetAppAttemptId(), 1, ContainerState.Complete);
            am.WaitForState(RMAppAttemptState.Finished);
            rm.Stop();
        }
Exemplo n.º 3
0
        /// <exception cref="System.Exception"/>
        public virtual void TestInvalidContainerReleaseRequest()
        {
            MockRM rm = new MockRM(conf);

            try
            {
                rm.Start();
                // Register node1
                MockNM nm1 = rm.RegisterNode("127.0.0.1:1234", 6 * Gb);
                // Submit an application
                RMApp app1 = rm.SubmitApp(1024);
                // kick the scheduling
                nm1.NodeHeartbeat(true);
                RMAppAttempt attempt1 = app1.GetCurrentAppAttempt();
                MockAM       am1      = rm.SendAMLaunched(attempt1.GetAppAttemptId());
                am1.RegisterAppAttempt();
                am1.AddRequests(new string[] { "127.0.0.1" }, Gb, 1, 1);
                AllocateResponse alloc1Response = am1.Schedule();
                // send the request
                // kick the scheduler
                nm1.NodeHeartbeat(true);
                while (alloc1Response.GetAllocatedContainers().Count < 1)
                {
                    Log.Info("Waiting for containers to be created for app 1...");
                    Sharpen.Thread.Sleep(1000);
                    alloc1Response = am1.Schedule();
                }
                NUnit.Framework.Assert.IsTrue(alloc1Response.GetAllocatedContainers().Count > 0);
                RMApp app2 = rm.SubmitApp(1024);
                nm1.NodeHeartbeat(true);
                RMAppAttempt attempt2 = app2.GetCurrentAppAttempt();
                MockAM       am2      = rm.SendAMLaunched(attempt2.GetAppAttemptId());
                am2.RegisterAppAttempt();
                // Now trying to release container allocated for app1 -> appAttempt1.
                ContainerId cId = alloc1Response.GetAllocatedContainers()[0].GetId();
                am2.AddContainerToBeReleased(cId);
                try
                {
                    am2.Schedule();
                    NUnit.Framework.Assert.Fail("Exception was expected!!");
                }
                catch (InvalidContainerReleaseException e)
                {
                    StringBuilder sb = new StringBuilder("Cannot release container : ");
                    sb.Append(cId.ToString());
                    sb.Append(" not belonging to this application attempt : ");
                    sb.Append(attempt2.GetAppAttemptId().ToString());
                    NUnit.Framework.Assert.IsTrue(e.Message.Contains(sb.ToString()));
                }
            }
            finally
            {
                if (rm != null)
                {
                    rm.Stop();
                }
            }
        }
Exemplo n.º 4
0
        /// <exception cref="System.Exception"/>
        public static MockAM LaunchAndRegisterAM(RMApp app, Org.Apache.Hadoop.Yarn.Server.Resourcemanager.MockRM
                                                 rm, MockNM nm)
        {
            MockAM am = LaunchAM(app, rm, nm);

            am.RegisterAppAttempt();
            rm.WaitForState(app.GetApplicationId(), RMAppState.Running);
            return(am);
        }
        public virtual void TestAMLaunchAndCleanup()
        {
            Logger rootLogger = LogManager.GetRootLogger();

            rootLogger.SetLevel(Level.Debug);
            TestApplicationMasterLauncher.MyContainerManagerImpl containerManager = new TestApplicationMasterLauncher.MyContainerManagerImpl
                                                                                        ();
            MockRMWithCustomAMLauncher rm = new MockRMWithCustomAMLauncher(containerManager);

            rm.Start();
            MockNM nm1 = rm.RegisterNode("127.0.0.1:1234", 5120);
            RMApp  app = rm.SubmitApp(2000);

            // kick the scheduling
            nm1.NodeHeartbeat(true);
            int waitCount = 0;

            while (containerManager.launched == false && waitCount++ < 20)
            {
                Log.Info("Waiting for AM Launch to happen..");
                Sharpen.Thread.Sleep(1000);
            }
            NUnit.Framework.Assert.IsTrue(containerManager.launched);
            RMAppAttempt         attempt      = app.GetCurrentAppAttempt();
            ApplicationAttemptId appAttemptId = attempt.GetAppAttemptId();

            NUnit.Framework.Assert.AreEqual(appAttemptId.ToString(), containerManager.attemptIdAtContainerManager
                                            );
            NUnit.Framework.Assert.AreEqual(app.GetSubmitTime(), containerManager.submitTimeAtContainerManager
                                            );
            NUnit.Framework.Assert.AreEqual(app.GetRMAppAttempt(appAttemptId).GetMasterContainer
                                                ().GetId().ToString(), containerManager.containerIdAtContainerManager);
            NUnit.Framework.Assert.AreEqual(nm1.GetNodeId().ToString(), containerManager.nmHostAtContainerManager
                                            );
            NUnit.Framework.Assert.AreEqual(YarnConfiguration.DefaultRmAmMaxAttempts, containerManager
                                            .maxAppAttempts);
            MockAM am = new MockAM(rm.GetRMContext(), rm.GetApplicationMasterService(), appAttemptId
                                   );

            am.RegisterAppAttempt();
            am.UnregisterAppAttempt();
            //complete the AM container to finish the app normally
            nm1.NodeHeartbeat(attempt.GetAppAttemptId(), 1, ContainerState.Complete);
            am.WaitForState(RMAppAttemptState.Finished);
            waitCount = 0;
            while (containerManager.cleanedup == false && waitCount++ < 20)
            {
                Log.Info("Waiting for AM Cleanup to happen..");
                Sharpen.Thread.Sleep(1000);
            }
            NUnit.Framework.Assert.IsTrue(containerManager.cleanedup);
            am.WaitForState(RMAppAttemptState.Finished);
            rm.Stop();
        }
Exemplo n.º 6
0
        /// <exception cref="System.Exception"/>
        private MockAM LaunchAM(RMApp app, MockRM rm, MockNM nm)
        {
            RMAppAttempt attempt = app.GetCurrentAppAttempt();

            nm.NodeHeartbeat(true);
            MockAM am = rm.SendAMLaunched(attempt.GetAppAttemptId());

            am.RegisterAppAttempt();
            rm.WaitForState(app.GetApplicationId(), RMAppState.Running);
            return(am);
        }
Exemplo n.º 7
0
        // Test even if AM container is allocated with containerId not equal to 1, the
        // following allocate requests from AM should be able to retrieve the
        // corresponding NM Token.
        /// <exception cref="System.Exception"/>
        public virtual void TestNMTokenSentForNormalContainer()
        {
            conf.Set(YarnConfiguration.RmScheduler, typeof(CapacityScheduler).GetCanonicalName
                         ());
            MockRM rm = new MockRM(conf);

            rm.Start();
            MockNM       nm1     = rm.RegisterNode("h1:1234", 5120);
            RMApp        app     = rm.SubmitApp(2000);
            RMAppAttempt attempt = app.GetCurrentAppAttempt();
            // Call getNewContainerId to increase container Id so that the AM container
            // Id doesn't equal to one.
            CapacityScheduler cs = (CapacityScheduler)rm.GetResourceScheduler();

            cs.GetApplicationAttempt(attempt.GetAppAttemptId()).GetNewContainerId();
            // kick the scheduling
            nm1.NodeHeartbeat(true);
            MockAM am = MockRM.LaunchAM(app, rm, nm1);

            // am container Id not equal to 1.
            NUnit.Framework.Assert.IsTrue(attempt.GetMasterContainer().GetId().GetContainerId
                                              () != 1);
            // NMSecretManager doesn't record the node on which the am is allocated.
            NUnit.Framework.Assert.IsFalse(rm.GetRMContext().GetNMTokenSecretManager().IsApplicationAttemptNMTokenPresent
                                               (attempt.GetAppAttemptId(), nm1.GetNodeId()));
            am.RegisterAppAttempt();
            rm.WaitForState(app.GetApplicationId(), RMAppState.Running);
            int NumContainers            = 1;
            IList <Container> containers = new AList <Container>();
            // nmTokens keeps track of all the nmTokens issued in the allocate call.
            IList <NMToken> expectedNMTokens = new AList <NMToken>();

            // am1 allocate 1 container on nm1.
            while (true)
            {
                AllocateResponse response = am.Allocate("127.0.0.1", 2000, NumContainers, new AList
                                                        <ContainerId>());
                nm1.NodeHeartbeat(true);
                Sharpen.Collections.AddAll(containers, response.GetAllocatedContainers());
                Sharpen.Collections.AddAll(expectedNMTokens, response.GetNMTokens());
                if (containers.Count == NumContainers)
                {
                    break;
                }
                Sharpen.Thread.Sleep(200);
                System.Console.Out.WriteLine("Waiting for container to be allocated.");
            }
            NodeId nodeId = expectedNMTokens[0].GetNodeId();

            // NMToken is sent for the allocated container.
            NUnit.Framework.Assert.AreEqual(nm1.GetNodeId(), nodeId);
        }
        /// <exception cref="System.Exception"/>
        public virtual void TestUsageWithOneAttemptAndOneContainer()
        {
            MockRM rm = new MockRM(conf);

            rm.Start();
            MockNM nm = new MockNM("127.0.0.1:1234", 15120, rm.GetResourceTrackerService());

            nm.RegisterNode();
            RMApp        app0         = rm.SubmitApp(200);
            RMAppMetrics rmAppMetrics = app0.GetRMAppMetrics();

            NUnit.Framework.Assert.IsTrue("Before app submittion, memory seconds should have been 0 but was "
                                          + rmAppMetrics.GetMemorySeconds(), rmAppMetrics.GetMemorySeconds() == 0);
            NUnit.Framework.Assert.IsTrue("Before app submission, vcore seconds should have been 0 but was "
                                          + rmAppMetrics.GetVcoreSeconds(), rmAppMetrics.GetVcoreSeconds() == 0);
            RMAppAttempt attempt0 = app0.GetCurrentAppAttempt();

            nm.NodeHeartbeat(true);
            MockAM am0 = rm.SendAMLaunched(attempt0.GetAppAttemptId());

            am0.RegisterAppAttempt();
            RMContainer rmContainer = rm.GetResourceScheduler().GetRMContainer(attempt0.GetMasterContainer
                                                                                   ().GetId());
            // Allow metrics to accumulate.
            int sleepInterval       = 1000;
            int cumulativeSleepTime = 0;

            while (rmAppMetrics.GetMemorySeconds() <= 0 && cumulativeSleepTime < 5000)
            {
                Sharpen.Thread.Sleep(sleepInterval);
                cumulativeSleepTime += sleepInterval;
            }
            rmAppMetrics = app0.GetRMAppMetrics();
            NUnit.Framework.Assert.IsTrue("While app is running, memory seconds should be >0 but is "
                                          + rmAppMetrics.GetMemorySeconds(), rmAppMetrics.GetMemorySeconds() > 0);
            NUnit.Framework.Assert.IsTrue("While app is running, vcore seconds should be >0 but is "
                                          + rmAppMetrics.GetVcoreSeconds(), rmAppMetrics.GetVcoreSeconds() > 0);
            MockRM.FinishAMAndVerifyAppState(app0, rm, nm, am0);
            AggregateAppResourceUsage ru = CalculateContainerResourceMetrics(rmContainer);

            rmAppMetrics = app0.GetRMAppMetrics();
            NUnit.Framework.Assert.AreEqual("Unexcpected MemorySeconds value", ru.GetMemorySeconds
                                                (), rmAppMetrics.GetMemorySeconds());
            NUnit.Framework.Assert.AreEqual("Unexpected VcoreSeconds value", ru.GetVcoreSeconds
                                                (), rmAppMetrics.GetVcoreSeconds());
            rm.Stop();
        }
Exemplo n.º 9
0
        /// <exception cref="System.Exception"/>
        public virtual void TestResourceTypes()
        {
            Dictionary <YarnConfiguration, EnumSet <YarnServiceProtos.SchedulerResourceTypes> >
            driver = new Dictionary <YarnConfiguration, EnumSet <YarnServiceProtos.SchedulerResourceTypes
                                                                 > >();
            CapacitySchedulerConfiguration csconf = new CapacitySchedulerConfiguration();

            csconf.SetResourceComparator(typeof(DominantResourceCalculator));
            YarnConfiguration testCapacityDRConf = new YarnConfiguration(csconf);

            testCapacityDRConf.SetClass(YarnConfiguration.RmScheduler, typeof(CapacityScheduler
                                                                              ), typeof(ResourceScheduler));
            YarnConfiguration testCapacityDefConf = new YarnConfiguration();

            testCapacityDefConf.SetClass(YarnConfiguration.RmScheduler, typeof(CapacityScheduler
                                                                               ), typeof(ResourceScheduler));
            YarnConfiguration testFairDefConf = new YarnConfiguration();

            testFairDefConf.SetClass(YarnConfiguration.RmScheduler, typeof(FairScheduler), typeof(
                                         ResourceScheduler));
            driver[conf] = EnumSet.Of(YarnServiceProtos.SchedulerResourceTypes.Memory);
            driver[testCapacityDRConf] = EnumSet.Of(YarnServiceProtos.SchedulerResourceTypes.
                                                    Cpu, YarnServiceProtos.SchedulerResourceTypes.Memory);
            driver[testCapacityDefConf] = EnumSet.Of(YarnServiceProtos.SchedulerResourceTypes
                                                     .Memory);
            driver[testFairDefConf] = EnumSet.Of(YarnServiceProtos.SchedulerResourceTypes.Memory
                                                 , YarnServiceProtos.SchedulerResourceTypes.Cpu);
            foreach (KeyValuePair <YarnConfiguration, EnumSet <YarnServiceProtos.SchedulerResourceTypes
                                                               > > entry in driver)
            {
                EnumSet <YarnServiceProtos.SchedulerResourceTypes> expectedValue = entry.Value;
                MockRM rm = new MockRM(entry.Key);
                rm.Start();
                MockNM nm1  = rm.RegisterNode("127.0.0.1:1234", 6 * Gb);
                RMApp  app1 = rm.SubmitApp(2048);
                nm1.NodeHeartbeat(true);
                RMAppAttempt attempt1 = app1.GetCurrentAppAttempt();
                MockAM       am1      = rm.SendAMLaunched(attempt1.GetAppAttemptId());
                RegisterApplicationMasterResponse resp = am1.RegisterAppAttempt();
                EnumSet <YarnServiceProtos.SchedulerResourceTypes> types = resp.GetSchedulerResourceTypes
                                                                               ();
                Log.Info("types = " + types.ToString());
                NUnit.Framework.Assert.AreEqual(expectedValue, types);
                rm.Stop();
            }
        }
Exemplo n.º 10
0
        /// <exception cref="System.Exception"/>
        public virtual void TestFinishApplicationMasterBeforeRegistering()
        {
            MockRM rm = new MockRM(conf);

            try
            {
                rm.Start();
                // Register node1
                MockNM nm1 = rm.RegisterNode("127.0.0.1:1234", 6 * Gb);
                // Submit an application
                RMApp  app1 = rm.SubmitApp(2048);
                MockAM am1  = MockRM.LaunchAM(app1, rm, nm1);
                FinishApplicationMasterRequest req = FinishApplicationMasterRequest.NewInstance(FinalApplicationStatus
                                                                                                .Failed, string.Empty, string.Empty);
                try
                {
                    am1.UnregisterAppAttempt(req, false);
                    NUnit.Framework.Assert.Fail("ApplicationMasterNotRegisteredException should be thrown"
                                                );
                }
                catch (ApplicationMasterNotRegisteredException e)
                {
                    NUnit.Framework.Assert.IsNotNull(e);
                    NUnit.Framework.Assert.IsNotNull(e.Message);
                    NUnit.Framework.Assert.IsTrue(e.Message.Contains("Application Master is trying to unregister before registering for:"
                                                                     ));
                }
                catch (Exception)
                {
                    NUnit.Framework.Assert.Fail("ApplicationMasterNotRegisteredException should be thrown"
                                                );
                }
                am1.RegisterAppAttempt();
                am1.UnregisterAppAttempt(req, false);
                am1.WaitForState(RMAppAttemptState.Finishing);
            }
            finally
            {
                if (rm != null)
                {
                    rm.Stop();
                }
            }
        }
Exemplo n.º 11
0
        /// <summary>Validate killing an application when it is at accepted state.</summary>
        /// <exception cref="System.Exception">exception</exception>
        public virtual void TestApplicationKillAtAcceptedState()
        {
            Dispatcher dispatcher = new _AsyncDispatcher_573();
            MockRM     rm         = new _MockRM_596(dispatcher, conf);
            // test metrics
            QueueMetrics metrics       = rm.GetResourceScheduler().GetRootQueueMetrics();
            int          appsKilled    = metrics.GetAppsKilled();
            int          appsSubmitted = metrics.GetAppsSubmitted();

            rm.Start();
            MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm.GetResourceTrackerService());

            nm1.RegisterNode();
            // a failed app
            RMApp  application = rm.SubmitApp(200);
            MockAM am          = MockRM.LaunchAM(application, rm, nm1);

            am.WaitForState(RMAppAttemptState.Launched);
            nm1.NodeHeartbeat(am.GetApplicationAttemptId(), 1, ContainerState.Running);
            rm.WaitForState(application.GetApplicationId(), RMAppState.Accepted);
            // Now kill the application before new attempt is launched, the app report
            // returns the invalid AM host and port.
            KillApplicationRequest request = KillApplicationRequest.NewInstance(application.GetApplicationId
                                                                                    ());

            rm.GetClientRMService().ForceKillApplication(request);
            // Specific test for YARN-1689 follows
            // Now let's say a race causes AM to register now. This should not crash RM.
            am.RegisterAppAttempt(false);
            // We explicitly intercepted the kill-event to RMAppAttempt, so app should
            // still be in KILLING state.
            rm.WaitForState(application.GetApplicationId(), RMAppState.Killing);
            // AM should now be in running
            rm.WaitForState(am.GetApplicationAttemptId(), RMAppAttemptState.Running);
            // Simulate that appAttempt is killed.
            rm.GetRMContext().GetDispatcher().GetEventHandler().Handle(new RMAppEvent(application
                                                                                      .GetApplicationId(), RMAppEventType.AttemptKilled));
            rm.WaitForState(application.GetApplicationId(), RMAppState.Killed);
            // test metrics
            metrics = rm.GetResourceScheduler().GetRootQueueMetrics();
            NUnit.Framework.Assert.AreEqual(appsKilled + 1, metrics.GetAppsKilled());
            NUnit.Framework.Assert.AreEqual(appsSubmitted + 1, metrics.GetAppsSubmitted());
        }
Exemplo n.º 12
0
        /// <exception cref="System.Exception"/>
        public virtual void TestAppWithNoContainers()
        {
            Logger rootLogger = LogManager.GetRootLogger();

            rootLogger.SetLevel(Level.Debug);
            MockRM rm = new MockRM(conf);

            rm.Start();
            MockNM nm1 = rm.RegisterNode("h1:1234", 5120);
            RMApp  app = rm.SubmitApp(2000);

            //kick the scheduling
            nm1.NodeHeartbeat(true);
            RMAppAttempt attempt = app.GetCurrentAppAttempt();
            MockAM       am      = rm.SendAMLaunched(attempt.GetAppAttemptId());

            am.RegisterAppAttempt();
            am.UnregisterAppAttempt();
            nm1.NodeHeartbeat(attempt.GetAppAttemptId(), 1, ContainerState.Complete);
            am.WaitForState(RMAppAttemptState.Finished);
            rm.Stop();
        }
Exemplo n.º 13
0
        /// <exception cref="System.Exception"/>
        public virtual void TestRMIdentifierOnContainerAllocation()
        {
            MockRM rm = new MockRM(conf);

            rm.Start();
            // Register node1
            MockNM nm1 = rm.RegisterNode("127.0.0.1:1234", 6 * Gb);
            // Submit an application
            RMApp app1 = rm.SubmitApp(2048);

            // kick the scheduling
            nm1.NodeHeartbeat(true);
            RMAppAttempt attempt1 = app1.GetCurrentAppAttempt();
            MockAM       am1      = rm.SendAMLaunched(attempt1.GetAppAttemptId());

            am1.RegisterAppAttempt();
            am1.AddRequests(new string[] { "127.0.0.1" }, Gb, 1, 1);
            AllocateResponse alloc1Response = am1.Schedule();

            // send the request
            // kick the scheduler
            nm1.NodeHeartbeat(true);
            while (alloc1Response.GetAllocatedContainers().Count < 1)
            {
                Log.Info("Waiting for containers to be created for app 1...");
                Sharpen.Thread.Sleep(1000);
                alloc1Response = am1.Schedule();
            }
            // assert RMIdentifer is set properly in allocated containers
            Container allocatedContainer     = alloc1Response.GetAllocatedContainers()[0];
            ContainerTokenIdentifier tokenId = BuilderUtils.NewContainerTokenIdentifier(allocatedContainer
                                                                                        .GetContainerToken());

            NUnit.Framework.Assert.AreEqual(MockRM.GetClusterTimeStamp(), tokenId.GetRMIdentifier
                                                ());
            rm.Stop();
        }
Exemplo n.º 14
0
        public virtual void TestAllocateContainerOnNodeWithoutOffSwitchSpecified()
        {
            Logger rootLogger = LogManager.GetRootLogger();

            rootLogger.SetLevel(Level.Debug);
            MockRM rm = new MockRM(conf);

            rm.Start();
            MockNM nm1  = rm.RegisterNode("127.0.0.1:1234", 6 * Gb);
            RMApp  app1 = rm.SubmitApp(2048);

            // kick the scheduling, 2 GB given to AM1, remaining 4GB on nm1
            nm1.NodeHeartbeat(true);
            RMAppAttempt attempt1 = app1.GetCurrentAppAttempt();
            MockAM       am1      = rm.SendAMLaunched(attempt1.GetAppAttemptId());

            am1.RegisterAppAttempt();
            // add request for containers
            IList <ResourceRequest> requests = new AList <ResourceRequest>();

            requests.AddItem(am1.CreateResourceReq("127.0.0.1", 1 * Gb, 1, 1));
            requests.AddItem(am1.CreateResourceReq("/default-rack", 1 * Gb, 1, 1));
            am1.Allocate(requests, null);
            // send the request
            try
            {
                // kick the schedule
                nm1.NodeHeartbeat(true);
            }
            catch (ArgumentNullException)
            {
                NUnit.Framework.Assert.Fail("NPE when allocating container on node but " + "forget to set off-switch request should be handled"
                                            );
            }
            rm.Stop();
        }
Exemplo n.º 15
0
        public virtual void Test()
        {
            Logger rootLogger = LogManager.GetRootLogger();

            rootLogger.SetLevel(Level.Debug);
            MockRM rm = new MockRM(conf);

            rm.Start();
            MockNM nm1  = rm.RegisterNode("127.0.0.1:1234", 6 * Gb);
            MockNM nm2  = rm.RegisterNode("127.0.0.2:5678", 4 * Gb);
            RMApp  app1 = rm.SubmitApp(2048);

            // kick the scheduling, 2 GB given to AM1, remaining 4GB on nm1
            nm1.NodeHeartbeat(true);
            RMAppAttempt attempt1 = app1.GetCurrentAppAttempt();
            MockAM       am1      = rm.SendAMLaunched(attempt1.GetAppAttemptId());

            am1.RegisterAppAttempt();
            SchedulerNodeReport report_nm1 = rm.GetResourceScheduler().GetNodeReport(nm1.GetNodeId
                                                                                         ());

            NUnit.Framework.Assert.AreEqual(2 * Gb, report_nm1.GetUsedResource().GetMemory());
            RMApp app2 = rm.SubmitApp(2048);

            // kick the scheduling, 2GB given to AM, remaining 2 GB on nm2
            nm2.NodeHeartbeat(true);
            RMAppAttempt attempt2 = app2.GetCurrentAppAttempt();
            MockAM       am2      = rm.SendAMLaunched(attempt2.GetAppAttemptId());

            am2.RegisterAppAttempt();
            SchedulerNodeReport report_nm2 = rm.GetResourceScheduler().GetNodeReport(nm2.GetNodeId
                                                                                         ());

            NUnit.Framework.Assert.AreEqual(2 * Gb, report_nm2.GetUsedResource().GetMemory());
            // add request for containers
            am1.AddRequests(new string[] { "127.0.0.1", "127.0.0.2" }, Gb, 1, 1);
            AllocateResponse alloc1Response = am1.Schedule();

            // send the request
            // add request for containers
            am2.AddRequests(new string[] { "127.0.0.1", "127.0.0.2" }, 3 * Gb, 0, 1);
            AllocateResponse alloc2Response = am2.Schedule();

            // send the request
            // kick the scheduler, 1 GB and 3 GB given to AM1 and AM2, remaining 0
            nm1.NodeHeartbeat(true);
            while (alloc1Response.GetAllocatedContainers().Count < 1)
            {
                Log.Info("Waiting for containers to be created for app 1...");
                Sharpen.Thread.Sleep(1000);
                alloc1Response = am1.Schedule();
            }
            while (alloc2Response.GetAllocatedContainers().Count < 1)
            {
                Log.Info("Waiting for containers to be created for app 2...");
                Sharpen.Thread.Sleep(1000);
                alloc2Response = am2.Schedule();
            }
            // kick the scheduler, nothing given remaining 2 GB.
            nm2.NodeHeartbeat(true);
            IList <Container> allocated1 = alloc1Response.GetAllocatedContainers();

            NUnit.Framework.Assert.AreEqual(1, allocated1.Count);
            NUnit.Framework.Assert.AreEqual(1 * Gb, allocated1[0].GetResource().GetMemory());
            NUnit.Framework.Assert.AreEqual(nm1.GetNodeId(), allocated1[0].GetNodeId());
            IList <Container> allocated2 = alloc2Response.GetAllocatedContainers();

            NUnit.Framework.Assert.AreEqual(1, allocated2.Count);
            NUnit.Framework.Assert.AreEqual(3 * Gb, allocated2[0].GetResource().GetMemory());
            NUnit.Framework.Assert.AreEqual(nm1.GetNodeId(), allocated2[0].GetNodeId());
            report_nm1 = rm.GetResourceScheduler().GetNodeReport(nm1.GetNodeId());
            report_nm2 = rm.GetResourceScheduler().GetNodeReport(nm2.GetNodeId());
            NUnit.Framework.Assert.AreEqual(0, report_nm1.GetAvailableResource().GetMemory());
            NUnit.Framework.Assert.AreEqual(2 * Gb, report_nm2.GetAvailableResource().GetMemory
                                                ());
            NUnit.Framework.Assert.AreEqual(6 * Gb, report_nm1.GetUsedResource().GetMemory());
            NUnit.Framework.Assert.AreEqual(2 * Gb, report_nm2.GetUsedResource().GetMemory());
            Container c1 = allocated1[0];

            NUnit.Framework.Assert.AreEqual(Gb, c1.GetResource().GetMemory());
            ContainerStatus containerStatus = BuilderUtils.NewContainerStatus(c1.GetId(), ContainerState
                                                                              .Complete, string.Empty, 0);

            nm1.ContainerStatus(containerStatus);
            int waitCount = 0;

            while (attempt1.GetJustFinishedContainers().Count < 1 && waitCount++ != 20)
            {
                Log.Info("Waiting for containers to be finished for app 1... Tried " + waitCount
                         + " times already..");
                Sharpen.Thread.Sleep(1000);
            }
            NUnit.Framework.Assert.AreEqual(1, attempt1.GetJustFinishedContainers().Count);
            NUnit.Framework.Assert.AreEqual(1, am1.Schedule().GetCompletedContainersStatuses(
                                                ).Count);
            report_nm1 = rm.GetResourceScheduler().GetNodeReport(nm1.GetNodeId());
            NUnit.Framework.Assert.AreEqual(5 * Gb, report_nm1.GetUsedResource().GetMemory());
            rm.Stop();
        }
        /// <exception cref="System.Exception"/>
        private void AmRestartTests(bool keepRunningContainers)
        {
            MockRM rm = new MockRM(conf);

            rm.Start();
            RMApp app = rm.SubmitApp(200, "name", "user", new Dictionary <ApplicationAccessType
                                                                          , string>(), false, "default", -1, null, "MAPREDUCE", false, keepRunningContainers
                                     );
            MockNM nm = new MockNM("127.0.0.1:1234", 10240, rm.GetResourceTrackerService());

            nm.RegisterNode();
            MockAM am0           = MockRM.LaunchAndRegisterAM(app, rm, nm);
            int    NumContainers = 1;

            // allocate NUM_CONTAINERS containers
            am0.Allocate("127.0.0.1", 1024, NumContainers, new AList <ContainerId>());
            nm.NodeHeartbeat(true);
            // wait for containers to be allocated.
            IList <Container> containers = am0.Allocate(new AList <ResourceRequest>(), new AList
                                                        <ContainerId>()).GetAllocatedContainers();

            while (containers.Count != NumContainers)
            {
                nm.NodeHeartbeat(true);
                Sharpen.Collections.AddAll(containers, am0.Allocate(new AList <ResourceRequest>(),
                                                                    new AList <ContainerId>()).GetAllocatedContainers());
                Sharpen.Thread.Sleep(200);
            }
            // launch the 2nd container.
            ContainerId containerId2 = ContainerId.NewContainerId(am0.GetApplicationAttemptId
                                                                      (), 2);

            nm.NodeHeartbeat(am0.GetApplicationAttemptId(), containerId2.GetContainerId(), ContainerState
                             .Running);
            rm.WaitForState(nm, containerId2, RMContainerState.Running);
            // Capture the containers here so the metrics can be calculated after the
            // app has completed.
            ICollection <RMContainer> rmContainers = rm.scheduler.GetSchedulerAppInfo(am0.GetApplicationAttemptId
                                                                                          ()).GetLiveContainers();
            // fail the first app attempt by sending CONTAINER_FINISHED event without
            // registering.
            ContainerId amContainerId = app.GetCurrentAppAttempt().GetMasterContainer().GetId
                                            ();

            nm.NodeHeartbeat(am0.GetApplicationAttemptId(), amContainerId.GetContainerId(), ContainerState
                             .Complete);
            am0.WaitForState(RMAppAttemptState.Failed);
            long memorySeconds = 0;
            long vcoreSeconds  = 0;

            // Calculate container usage metrics for first attempt.
            if (keepRunningContainers)
            {
                // Only calculate the usage for the one container that has completed.
                foreach (RMContainer c in rmContainers)
                {
                    if (c.GetContainerId().Equals(amContainerId))
                    {
                        AggregateAppResourceUsage ru = CalculateContainerResourceMetrics(c);
                        memorySeconds += ru.GetMemorySeconds();
                        vcoreSeconds  += ru.GetVcoreSeconds();
                    }
                    else
                    {
                        // The remaining container should be RUNNING.
                        NUnit.Framework.Assert.IsTrue("After first attempt failed, remaining container "
                                                      + "should still be running. ", c.GetContainerState().Equals(ContainerState.Running
                                                                                                                  ));
                    }
                }
            }
            else
            {
                // If keepRunningContainers is false, all live containers should now
                // be completed. Calculate the resource usage metrics for all of them.
                foreach (RMContainer c in rmContainers)
                {
                    AggregateAppResourceUsage ru = CalculateContainerResourceMetrics(c);
                    memorySeconds += ru.GetMemorySeconds();
                    vcoreSeconds  += ru.GetVcoreSeconds();
                }
            }
            // wait for app to start a new attempt.
            rm.WaitForState(app.GetApplicationId(), RMAppState.Accepted);
            // assert this is a new AM.
            RMAppAttempt attempt2 = app.GetCurrentAppAttempt();

            NUnit.Framework.Assert.IsFalse(attempt2.GetAppAttemptId().Equals(am0.GetApplicationAttemptId
                                                                                 ()));
            // launch the new AM
            nm.NodeHeartbeat(true);
            MockAM am1 = rm.SendAMLaunched(attempt2.GetAppAttemptId());

            am1.RegisterAppAttempt();
            // allocate NUM_CONTAINERS containers
            am1.Allocate("127.0.0.1", 1024, NumContainers, new AList <ContainerId>());
            nm.NodeHeartbeat(true);
            // wait for containers to be allocated.
            containers = am1.Allocate(new AList <ResourceRequest>(), new AList <ContainerId>())
                         .GetAllocatedContainers();
            while (containers.Count != NumContainers)
            {
                nm.NodeHeartbeat(true);
                Sharpen.Collections.AddAll(containers, am1.Allocate(new AList <ResourceRequest>(),
                                                                    new AList <ContainerId>()).GetAllocatedContainers());
                Sharpen.Thread.Sleep(200);
            }
            rm.WaitForState(app.GetApplicationId(), RMAppState.Running);
            // Capture running containers for later use by metrics calculations.
            rmContainers = rm.scheduler.GetSchedulerAppInfo(attempt2.GetAppAttemptId()).GetLiveContainers
                               ();
            // complete container by sending the container complete event which has
            // earlier attempt's attemptId
            amContainerId = app.GetCurrentAppAttempt().GetMasterContainer().GetId();
            nm.NodeHeartbeat(am0.GetApplicationAttemptId(), amContainerId.GetContainerId(), ContainerState
                             .Complete);
            MockRM.FinishAMAndVerifyAppState(app, rm, nm, am1);
            // Calculate container usage metrics for second attempt.
            foreach (RMContainer c_1 in rmContainers)
            {
                AggregateAppResourceUsage ru = CalculateContainerResourceMetrics(c_1);
                memorySeconds += ru.GetMemorySeconds();
                vcoreSeconds  += ru.GetVcoreSeconds();
            }
            RMAppMetrics rmAppMetrics = app.GetRMAppMetrics();

            NUnit.Framework.Assert.AreEqual("Unexcpected MemorySeconds value", memorySeconds,
                                            rmAppMetrics.GetMemorySeconds());
            NUnit.Framework.Assert.AreEqual("Unexpected VcoreSeconds value", vcoreSeconds, rmAppMetrics
                                            .GetVcoreSeconds());
            rm.Stop();
            return;
        }
        /// <exception cref="System.Exception"/>
        public virtual void TestUsageWithMultipleContainersAndRMRestart()
        {
            // Set max attempts to 1 so that when the first attempt fails, the app
            // won't try to start a new one.
            conf.SetInt(YarnConfiguration.RmAmMaxAttempts, 1);
            conf.SetBoolean(YarnConfiguration.RecoveryEnabled, true);
            conf.SetBoolean(YarnConfiguration.RmWorkPreservingRecoveryEnabled, false);
            MemoryRMStateStore memStore = new MemoryRMStateStore();

            memStore.Init(conf);
            MockRM rm0 = new MockRM(conf, memStore);

            rm0.Start();
            MockNM nm = new MockNM("127.0.0.1:1234", 65536, rm0.GetResourceTrackerService());

            nm.RegisterNode();
            RMApp app0 = rm0.SubmitApp(200);

            rm0.WaitForState(app0.GetApplicationId(), RMAppState.Accepted);
            RMAppAttempt         attempt0   = app0.GetCurrentAppAttempt();
            ApplicationAttemptId attemptId0 = attempt0.GetAppAttemptId();

            rm0.WaitForState(attemptId0, RMAppAttemptState.Scheduled);
            nm.NodeHeartbeat(true);
            rm0.WaitForState(attemptId0, RMAppAttemptState.Allocated);
            MockAM am0 = rm0.SendAMLaunched(attempt0.GetAppAttemptId());

            am0.RegisterAppAttempt();
            int NumContainers = 2;

            am0.Allocate("127.0.0.1", 1000, NumContainers, new AList <ContainerId>());
            nm.NodeHeartbeat(true);
            IList <Container> conts = am0.Allocate(new AList <ResourceRequest>(), new AList <ContainerId
                                                                                             >()).GetAllocatedContainers();

            while (conts.Count != NumContainers)
            {
                nm.NodeHeartbeat(true);
                Sharpen.Collections.AddAll(conts, am0.Allocate(new AList <ResourceRequest>(), new
                                                               AList <ContainerId>()).GetAllocatedContainers());
                Sharpen.Thread.Sleep(500);
            }
            // launch the 2nd and 3rd containers.
            foreach (Container c in conts)
            {
                nm.NodeHeartbeat(attempt0.GetAppAttemptId(), c.GetId().GetContainerId(), ContainerState
                                 .Running);
                rm0.WaitForState(nm, c.GetId(), RMContainerState.Running);
            }
            // Get the RMContainers for all of the live containers, to be used later
            // for metrics calculations and comparisons.
            ICollection <RMContainer> rmContainers = rm0.scheduler.GetSchedulerAppInfo(attempt0
                                                                                       .GetAppAttemptId()).GetLiveContainers();
            // Allow metrics to accumulate.
            int sleepInterval       = 1000;
            int cumulativeSleepTime = 0;

            while (app0.GetRMAppMetrics().GetMemorySeconds() <= 0 && cumulativeSleepTime < 5000
                   )
            {
                Sharpen.Thread.Sleep(sleepInterval);
                cumulativeSleepTime += sleepInterval;
            }
            // Stop all non-AM containers
            foreach (Container c_1 in conts)
            {
                if (c_1.GetId().GetContainerId() == 1)
                {
                    continue;
                }
                nm.NodeHeartbeat(attempt0.GetAppAttemptId(), c_1.GetId().GetContainerId(), ContainerState
                                 .Complete);
                rm0.WaitForState(nm, c_1.GetId(), RMContainerState.Completed);
            }
            // After all other containers have completed, manually complete the master
            // container in order to trigger a save to the state store of the resource
            // usage metrics. This will cause the attempt to fail, and, since the max
            // attempt retries is 1, the app will also fail. This is intentional so
            // that all containers will complete prior to saving.
            ContainerId cId = ContainerId.NewContainerId(attempt0.GetAppAttemptId(), 1);

            nm.NodeHeartbeat(attempt0.GetAppAttemptId(), cId.GetContainerId(), ContainerState
                             .Complete);
            rm0.WaitForState(nm, cId, RMContainerState.Completed);
            // Check that the container metrics match those from the app usage report.
            long memorySeconds = 0;
            long vcoreSeconds  = 0;

            foreach (RMContainer c_2 in rmContainers)
            {
                AggregateAppResourceUsage ru = CalculateContainerResourceMetrics(c_2);
                memorySeconds += ru.GetMemorySeconds();
                vcoreSeconds  += ru.GetVcoreSeconds();
            }
            RMAppMetrics metricsBefore = app0.GetRMAppMetrics();

            NUnit.Framework.Assert.AreEqual("Unexcpected MemorySeconds value", memorySeconds,
                                            metricsBefore.GetMemorySeconds());
            NUnit.Framework.Assert.AreEqual("Unexpected VcoreSeconds value", vcoreSeconds, metricsBefore
                                            .GetVcoreSeconds());
            // create new RM to represent RM restart. Load up the state store.
            MockRM rm1 = new MockRM(conf, memStore);

            rm1.Start();
            RMApp app0After = rm1.GetRMContext().GetRMApps()[app0.GetApplicationId()];
            // Compare container resource usage metrics from before and after restart.
            RMAppMetrics metricsAfter = app0After.GetRMAppMetrics();

            NUnit.Framework.Assert.AreEqual("Vcore seconds were not the same after RM Restart"
                                            , metricsBefore.GetVcoreSeconds(), metricsAfter.GetVcoreSeconds());
            NUnit.Framework.Assert.AreEqual("Memory seconds were not the same after RM Restart"
                                            , metricsBefore.GetMemorySeconds(), metricsAfter.GetMemorySeconds());
            rm0.Stop();
            rm0.Close();
            rm1.Stop();
            rm1.Close();
        }
Exemplo n.º 18
0
        public virtual void TestAppCleanup()
        {
            Logger rootLogger = LogManager.GetRootLogger();

            rootLogger.SetLevel(Level.Debug);
            MockRM rm = new MockRM();

            rm.Start();
            MockNM nm1 = rm.RegisterNode("127.0.0.1:1234", 5000);
            RMApp  app = rm.SubmitApp(2000);

            //kick the scheduling
            nm1.NodeHeartbeat(true);
            RMAppAttempt attempt = app.GetCurrentAppAttempt();
            MockAM       am      = rm.SendAMLaunched(attempt.GetAppAttemptId());

            am.RegisterAppAttempt();
            //request for containers
            int request = 2;

            am.Allocate("127.0.0.1", 1000, request, new AList <ContainerId>());
            //kick the scheduler
            nm1.NodeHeartbeat(true);
            IList <Container> conts = am.Allocate(new AList <ResourceRequest>(), new AList <ContainerId
                                                                                            >()).GetAllocatedContainers();
            int contReceived = conts.Count;
            int waitCount    = 0;

            while (contReceived < request && waitCount++ < 200)
            {
                Log.Info("Got " + contReceived + " containers. Waiting to get " + request);
                Sharpen.Thread.Sleep(100);
                conts = am.Allocate(new AList <ResourceRequest>(), new AList <ContainerId>()).GetAllocatedContainers
                            ();
                contReceived += conts.Count;
                nm1.NodeHeartbeat(true);
            }
            NUnit.Framework.Assert.AreEqual(request, contReceived);
            am.UnregisterAppAttempt();
            NodeHeartbeatResponse resp = nm1.NodeHeartbeat(attempt.GetAppAttemptId(), 1, ContainerState
                                                           .Complete);

            am.WaitForState(RMAppAttemptState.Finished);
            //currently only containers are cleaned via this
            //AM container is cleaned via container launcher
            resp = nm1.NodeHeartbeat(true);
            IList <ContainerId>   containersToCleanup = resp.GetContainersToCleanup();
            IList <ApplicationId> appsToCleanup       = resp.GetApplicationsToCleanup();
            int numCleanedContainers = containersToCleanup.Count;
            int numCleanedApps       = appsToCleanup.Count;

            waitCount = 0;
            while ((numCleanedContainers < 2 || numCleanedApps < 1) && waitCount++ < 200)
            {
                Log.Info("Waiting to get cleanup events.. cleanedConts: " + numCleanedContainers
                         + " cleanedApps: " + numCleanedApps);
                Sharpen.Thread.Sleep(100);
                resp = nm1.NodeHeartbeat(true);
                IList <ContainerId>   deltaContainersToCleanup = resp.GetContainersToCleanup();
                IList <ApplicationId> deltaAppsToCleanup       = resp.GetApplicationsToCleanup();
                // Add the deltas to the global list
                Sharpen.Collections.AddAll(containersToCleanup, deltaContainersToCleanup);
                Sharpen.Collections.AddAll(appsToCleanup, deltaAppsToCleanup);
                // Update counts now
                numCleanedContainers = containersToCleanup.Count;
                numCleanedApps       = appsToCleanup.Count;
            }
            NUnit.Framework.Assert.AreEqual(1, appsToCleanup.Count);
            NUnit.Framework.Assert.AreEqual(app.GetApplicationId(), appsToCleanup[0]);
            NUnit.Framework.Assert.AreEqual(1, numCleanedApps);
            NUnit.Framework.Assert.AreEqual(2, numCleanedContainers);
            rm.Stop();
        }
Exemplo n.º 19
0
        public virtual void TestContainerCleanup()
        {
            Logger rootLogger = LogManager.GetRootLogger();

            rootLogger.SetLevel(Level.Debug);
            DrainDispatcher dispatcher = new DrainDispatcher();
            MockRM          rm         = new _MockRM_167(this, dispatcher);

            rm.Start();
            MockNM nm1 = rm.RegisterNode("127.0.0.1:1234", 5000);
            RMApp  app = rm.SubmitApp(2000);

            //kick the scheduling
            nm1.NodeHeartbeat(true);
            RMAppAttempt attempt = app.GetCurrentAppAttempt();
            MockAM       am      = rm.SendAMLaunched(attempt.GetAppAttemptId());

            am.RegisterAppAttempt();
            //request for containers
            int request = 2;

            am.Allocate("127.0.0.1", 1000, request, new AList <ContainerId>());
            dispatcher.Await();
            //kick the scheduler
            nm1.NodeHeartbeat(true);
            IList <Container> conts = am.Allocate(new AList <ResourceRequest>(), new AList <ContainerId
                                                                                            >()).GetAllocatedContainers();
            int contReceived = conts.Count;
            int waitCount    = 0;

            while (contReceived < request && waitCount++ < 200)
            {
                Log.Info("Got " + contReceived + " containers. Waiting to get " + request);
                Sharpen.Thread.Sleep(100);
                conts = am.Allocate(new AList <ResourceRequest>(), new AList <ContainerId>()).GetAllocatedContainers
                            ();
                dispatcher.Await();
                contReceived += conts.Count;
                nm1.NodeHeartbeat(true);
            }
            NUnit.Framework.Assert.AreEqual(request, contReceived);
            // Release a container.
            AList <ContainerId> release = new AList <ContainerId>();

            release.AddItem(conts[0].GetId());
            am.Allocate(new AList <ResourceRequest>(), release);
            dispatcher.Await();
            // Send one more heartbeat with a fake running container. This is to
            // simulate the situation that can happen if the NM reports that container
            // is running in the same heartbeat when the RM asks it to clean it up.
            IDictionary <ApplicationId, IList <ContainerStatus> > containerStatuses = new Dictionary
                                                                                      <ApplicationId, IList <ContainerStatus> >();
            AList <ContainerStatus> containerStatusList = new AList <ContainerStatus>();

            containerStatusList.AddItem(BuilderUtils.NewContainerStatus(conts[0].GetId(), ContainerState
                                                                        .Running, "nothing", 0));
            containerStatuses[app.GetApplicationId()] = containerStatusList;
            NodeHeartbeatResponse resp = nm1.NodeHeartbeat(containerStatuses, true);

            WaitForContainerCleanup(dispatcher, nm1, resp);
            // Now to test the case when RM already gave cleanup, and NM suddenly
            // realizes that the container is running.
            Log.Info("Testing container launch much after release and " + "NM getting cleanup"
                     );
            containerStatuses.Clear();
            containerStatusList.Clear();
            containerStatusList.AddItem(BuilderUtils.NewContainerStatus(conts[0].GetId(), ContainerState
                                                                        .Running, "nothing", 0));
            containerStatuses[app.GetApplicationId()] = containerStatusList;
            resp = nm1.NodeHeartbeat(containerStatuses, true);
            // The cleanup list won't be instantaneous as it is given out by scheduler
            // and not RMNodeImpl.
            WaitForContainerCleanup(dispatcher, nm1, resp);
            rm.Stop();
        }
        /// <exception cref="System.Exception"/>
        public virtual void TestallocateBeforeAMRegistration()
        {
            Logger rootLogger = LogManager.GetRootLogger();
            bool   thrown     = false;

            rootLogger.SetLevel(Level.Debug);
            MockRM rm = new MockRM();

            rm.Start();
            MockNM nm1 = rm.RegisterNode("h1:1234", 5000);
            RMApp  app = rm.SubmitApp(2000);

            // kick the scheduling
            nm1.NodeHeartbeat(true);
            RMAppAttempt attempt = app.GetCurrentAppAttempt();
            MockAM       am      = rm.SendAMLaunched(attempt.GetAppAttemptId());
            // request for containers
            int request         = 2;
            AllocateResponse ar = null;

            try
            {
                ar = am.Allocate("h1", 1000, request, new AList <ContainerId>());
                NUnit.Framework.Assert.Fail();
            }
            catch (ApplicationMasterNotRegisteredException)
            {
            }
            // kick the scheduler
            nm1.NodeHeartbeat(true);
            AllocateResponse amrs = null;

            try
            {
                amrs = am.Allocate(new AList <ResourceRequest>(), new AList <ContainerId>());
                NUnit.Framework.Assert.Fail();
            }
            catch (ApplicationMasterNotRegisteredException)
            {
            }
            am.RegisterAppAttempt();
            try
            {
                am.RegisterAppAttempt(false);
                NUnit.Framework.Assert.Fail();
            }
            catch (Exception e)
            {
                NUnit.Framework.Assert.AreEqual("Application Master is already registered : " + attempt
                                                .GetAppAttemptId().GetApplicationId(), e.Message);
            }
            // Simulate an AM that was disconnected and app attempt was removed
            // (responseMap does not contain attemptid)
            am.UnregisterAppAttempt();
            nm1.NodeHeartbeat(attempt.GetAppAttemptId(), 1, ContainerState.Complete);
            am.WaitForState(RMAppAttemptState.Finished);
            try
            {
                amrs = am.Allocate(new AList <ResourceRequest>(), new AList <ContainerId>());
                NUnit.Framework.Assert.Fail();
            }
            catch (ApplicationAttemptNotFoundException)
            {
            }
        }
Exemplo n.º 21
0
        public virtual void TestResourceOverCommit()
        {
            MockRM rm = new MockRM(conf);

            rm.Start();
            MockNM nm1  = rm.RegisterNode("127.0.0.1:1234", 4 * Gb);
            RMApp  app1 = rm.SubmitApp(2048);

            // kick the scheduling, 2 GB given to AM1, remaining 2GB on nm1
            nm1.NodeHeartbeat(true);
            RMAppAttempt attempt1 = app1.GetCurrentAppAttempt();
            MockAM       am1      = rm.SendAMLaunched(attempt1.GetAppAttemptId());

            am1.RegisterAppAttempt();
            SchedulerNodeReport report_nm1 = rm.GetResourceScheduler().GetNodeReport(nm1.GetNodeId
                                                                                         ());

            // check node report, 2 GB used and 2 GB available
            NUnit.Framework.Assert.AreEqual(2 * Gb, report_nm1.GetUsedResource().GetMemory());
            NUnit.Framework.Assert.AreEqual(2 * Gb, report_nm1.GetAvailableResource().GetMemory
                                                ());
            // add request for containers
            am1.AddRequests(new string[] { "127.0.0.1", "127.0.0.2" }, 2 * Gb, 1, 1);
            AllocateResponse alloc1Response = am1.Schedule();

            // send the request
            // kick the scheduler, 2 GB given to AM1, resource remaining 0
            nm1.NodeHeartbeat(true);
            while (alloc1Response.GetAllocatedContainers().Count < 1)
            {
                Log.Info("Waiting for containers to be created for app 1...");
                Sharpen.Thread.Sleep(1000);
                alloc1Response = am1.Schedule();
            }
            IList <Container> allocated1 = alloc1Response.GetAllocatedContainers();

            NUnit.Framework.Assert.AreEqual(1, allocated1.Count);
            NUnit.Framework.Assert.AreEqual(2 * Gb, allocated1[0].GetResource().GetMemory());
            NUnit.Framework.Assert.AreEqual(nm1.GetNodeId(), allocated1[0].GetNodeId());
            report_nm1 = rm.GetResourceScheduler().GetNodeReport(nm1.GetNodeId());
            // check node report, 4 GB used and 0 GB available
            NUnit.Framework.Assert.AreEqual(0, report_nm1.GetAvailableResource().GetMemory());
            NUnit.Framework.Assert.AreEqual(4 * Gb, report_nm1.GetUsedResource().GetMemory());
            // check container is assigned with 2 GB.
            Container c1 = allocated1[0];

            NUnit.Framework.Assert.AreEqual(2 * Gb, c1.GetResource().GetMemory());
            // update node resource to 2 GB, so resource is over-consumed.
            IDictionary <NodeId, ResourceOption> nodeResourceMap = new Dictionary <NodeId, ResourceOption
                                                                                   >();

            nodeResourceMap[nm1.GetNodeId()] = ResourceOption.NewInstance(Org.Apache.Hadoop.Yarn.Api.Records.Resource
                                                                          .NewInstance(2 * Gb, 1), -1);
            UpdateNodeResourceRequest request = UpdateNodeResourceRequest.NewInstance(nodeResourceMap
                                                                                      );
            AdminService @as = rm.adminService;

            @as.UpdateNodeResource(request);
            // Now, the used resource is still 4 GB, and available resource is minus value.
            report_nm1 = rm.GetResourceScheduler().GetNodeReport(nm1.GetNodeId());
            NUnit.Framework.Assert.AreEqual(4 * Gb, report_nm1.GetUsedResource().GetMemory());
            NUnit.Framework.Assert.AreEqual(-2 * Gb, report_nm1.GetAvailableResource().GetMemory
                                                ());
            // Check container can complete successfully in case of resource over-commitment.
            ContainerStatus containerStatus = BuilderUtils.NewContainerStatus(c1.GetId(), ContainerState
                                                                              .Complete, string.Empty, 0);

            nm1.ContainerStatus(containerStatus);
            int waitCount = 0;

            while (attempt1.GetJustFinishedContainers().Count < 1 && waitCount++ != 20)
            {
                Log.Info("Waiting for containers to be finished for app 1... Tried " + waitCount
                         + " times already..");
                Sharpen.Thread.Sleep(100);
            }
            NUnit.Framework.Assert.AreEqual(1, attempt1.GetJustFinishedContainers().Count);
            NUnit.Framework.Assert.AreEqual(1, am1.Schedule().GetCompletedContainersStatuses(
                                                ).Count);
            report_nm1 = rm.GetResourceScheduler().GetNodeReport(nm1.GetNodeId());
            NUnit.Framework.Assert.AreEqual(2 * Gb, report_nm1.GetUsedResource().GetMemory());
            // As container return 2 GB back, the available resource becomes 0 again.
            NUnit.Framework.Assert.AreEqual(0 * Gb, report_nm1.GetAvailableResource().GetMemory
                                                ());
            rm.Stop();
        }
Exemplo n.º 22
0
        /// <exception cref="System.Exception"/>
        public virtual void TestProgressFilter()
        {
            MockRM rm = new MockRM(conf);

            rm.Start();
            // Register node1
            MockNM nm1 = rm.RegisterNode("127.0.0.1:1234", 6 * Gb);
            // Submit an application
            RMApp app1 = rm.SubmitApp(2048);

            nm1.NodeHeartbeat(true);
            RMAppAttempt attempt1 = app1.GetCurrentAppAttempt();
            MockAM       am1      = rm.SendAMLaunched(attempt1.GetAppAttemptId());

            am1.RegisterAppAttempt();
            AllocateRequestPBImpl   allocateRequest = new AllocateRequestPBImpl();
            IList <ContainerId>     release         = new AList <ContainerId>();
            IList <ResourceRequest> ask             = new AList <ResourceRequest>();

            allocateRequest.SetReleaseList(release);
            allocateRequest.SetAskList(ask);
            allocateRequest.SetProgress(float.PositiveInfinity);
            am1.Allocate(allocateRequest);
            while (attempt1.GetProgress() != 1)
            {
                Log.Info("Waiting for allocate event to be handled ...");
                Sharpen.Thread.Sleep(100);
            }
            allocateRequest.SetProgress(float.NaN);
            am1.Allocate(allocateRequest);
            while (attempt1.GetProgress() != 0)
            {
                Log.Info("Waiting for allocate event to be handled ...");
                Sharpen.Thread.Sleep(100);
            }
            allocateRequest.SetProgress((float)9);
            am1.Allocate(allocateRequest);
            while (attempt1.GetProgress() != 1)
            {
                Log.Info("Waiting for allocate event to be handled ...");
                Sharpen.Thread.Sleep(100);
            }
            allocateRequest.SetProgress(float.NegativeInfinity);
            am1.Allocate(allocateRequest);
            while (attempt1.GetProgress() != 0)
            {
                Log.Info("Waiting for allocate event to be handled ...");
                Sharpen.Thread.Sleep(100);
            }
            allocateRequest.SetProgress((float)0.5);
            am1.Allocate(allocateRequest);
            while (attempt1.GetProgress() != 0.5)
            {
                Log.Info("Waiting for allocate event to be handled ...");
                Sharpen.Thread.Sleep(100);
            }
            allocateRequest.SetProgress((float)-1);
            am1.Allocate(allocateRequest);
            while (attempt1.GetProgress() != 0)
            {
                Log.Info("Waiting for allocate event to be handled ...");
                Sharpen.Thread.Sleep(100);
            }
        }
Exemplo n.º 23
0
        /// <exception cref="System.Exception"/>
        public virtual void TestNMToken()
        {
            MockRM rm = new MockRM(conf);

            try
            {
                rm.Start();
                MockNM nm1 = rm.RegisterNode("h1:1234", 10000);
                NMTokenSecretManagerInRM nmTokenSecretManager = rm.GetRMContext().GetNMTokenSecretManager
                                                                    ();
                // submitting new application
                RMApp app = rm.SubmitApp(1000);
                // start scheduling.
                nm1.NodeHeartbeat(true);
                // Starting application attempt and launching
                // It should get registered with NMTokenSecretManager.
                RMAppAttempt attempt = app.GetCurrentAppAttempt();
                MockAM       am      = rm.SendAMLaunched(attempt.GetAppAttemptId());
                NUnit.Framework.Assert.IsTrue(nmTokenSecretManager.IsApplicationAttemptRegistered
                                                  (attempt.GetAppAttemptId()));
                // This will register application master.
                am.RegisterAppAttempt();
                AList <Container>          containersReceivedForNM1 = new AList <Container>();
                IList <ContainerId>        releaseContainerList     = new AList <ContainerId>();
                Dictionary <string, Token> nmTokens = new Dictionary <string, Token>();
                // initially requesting 2 containers.
                AllocateResponse response = am.Allocate("h1", 1000, 2, releaseContainerList);
                NUnit.Framework.Assert.AreEqual(0, response.GetAllocatedContainers().Count);
                AllocateContainersAndValidateNMTokens(am, containersReceivedForNM1, 2, nmTokens,
                                                      nm1);
                NUnit.Framework.Assert.AreEqual(1, nmTokens.Count);
                // requesting 2 more containers.
                response = am.Allocate("h1", 1000, 2, releaseContainerList);
                NUnit.Framework.Assert.AreEqual(0, response.GetAllocatedContainers().Count);
                AllocateContainersAndValidateNMTokens(am, containersReceivedForNM1, 4, nmTokens,
                                                      nm1);
                NUnit.Framework.Assert.AreEqual(1, nmTokens.Count);
                // We will be simulating NM restart so restarting newly added h2:1234
                // NM 2 now registers.
                MockNM nm2 = rm.RegisterNode("h2:1234", 10000);
                nm2.NodeHeartbeat(true);
                AList <Container> containersReceivedForNM2 = new AList <Container>();
                response = am.Allocate("h2", 1000, 2, releaseContainerList);
                NUnit.Framework.Assert.AreEqual(0, response.GetAllocatedContainers().Count);
                AllocateContainersAndValidateNMTokens(am, containersReceivedForNM2, 2, nmTokens,
                                                      nm2);
                NUnit.Framework.Assert.AreEqual(2, nmTokens.Count);
                // Simulating NM-2 restart.
                nm2 = rm.RegisterNode("h2:1234", 10000);
                // Wait for reconnect to make it through the RM and create a new RMNode
                IDictionary <NodeId, RMNode> nodes = rm.GetRMContext().GetRMNodes();
                while (nodes[nm2.GetNodeId()].GetLastNodeHeartBeatResponse().GetResponseId() > 0)
                {
                    Sharpen.Thread.Sleep(WaitSleepMs);
                }
                int interval = 40;
                // Wait for nm Token to be cleared.
                while (nmTokenSecretManager.IsApplicationAttemptNMTokenPresent(attempt.GetAppAttemptId
                                                                                   (), nm2.GetNodeId()) && interval-- > 0)
                {
                    Log.Info("waiting for nmToken to be cleared for : " + nm2.GetNodeId());
                    Sharpen.Thread.Sleep(WaitSleepMs);
                }
                NUnit.Framework.Assert.IsTrue(nmTokenSecretManager.IsApplicationAttemptRegistered
                                                  (attempt.GetAppAttemptId()));
                // removing NMToken for h2:1234
                Sharpen.Collections.Remove(nmTokens, nm2.GetNodeId().ToString());
                NUnit.Framework.Assert.AreEqual(1, nmTokens.Count);
                // We should again receive the NMToken.
                response = am.Allocate("h2", 1000, 2, releaseContainerList);
                NUnit.Framework.Assert.AreEqual(0, response.GetAllocatedContainers().Count);
                AllocateContainersAndValidateNMTokens(am, containersReceivedForNM2, 4, nmTokens,
                                                      nm2);
                NUnit.Framework.Assert.AreEqual(2, nmTokens.Count);
                // Now rolling over NMToken masterKey. it should resend the NMToken in
                // next allocate call.
                NUnit.Framework.Assert.IsTrue(nmTokenSecretManager.IsApplicationAttemptNMTokenPresent
                                                  (attempt.GetAppAttemptId(), nm1.GetNodeId()));
                NUnit.Framework.Assert.IsTrue(nmTokenSecretManager.IsApplicationAttemptNMTokenPresent
                                                  (attempt.GetAppAttemptId(), nm2.GetNodeId()));
                nmTokenSecretManager.RollMasterKey();
                nmTokenSecretManager.ActivateNextMasterKey();
                NUnit.Framework.Assert.IsFalse(nmTokenSecretManager.IsApplicationAttemptNMTokenPresent
                                                   (attempt.GetAppAttemptId(), nm1.GetNodeId()));
                NUnit.Framework.Assert.IsFalse(nmTokenSecretManager.IsApplicationAttemptNMTokenPresent
                                                   (attempt.GetAppAttemptId(), nm2.GetNodeId()));
                // It should not remove application attempt entry.
                NUnit.Framework.Assert.IsTrue(nmTokenSecretManager.IsApplicationAttemptRegistered
                                                  (attempt.GetAppAttemptId()));
                nmTokens.Clear();
                NUnit.Framework.Assert.AreEqual(0, nmTokens.Count);
                // We should again receive the NMToken.
                response = am.Allocate("h2", 1000, 1, releaseContainerList);
                NUnit.Framework.Assert.AreEqual(0, response.GetAllocatedContainers().Count);
                AllocateContainersAndValidateNMTokens(am, containersReceivedForNM2, 5, nmTokens,
                                                      nm2);
                NUnit.Framework.Assert.AreEqual(1, nmTokens.Count);
                NUnit.Framework.Assert.IsTrue(nmTokenSecretManager.IsApplicationAttemptNMTokenPresent
                                                  (attempt.GetAppAttemptId(), nm2.GetNodeId()));
                // After AM is finished making sure that nmtoken entry for app
                NUnit.Framework.Assert.IsTrue(nmTokenSecretManager.IsApplicationAttemptRegistered
                                                  (attempt.GetAppAttemptId()));
                am.UnregisterAppAttempt();
                // marking all the containers as finished.
                foreach (Container container in containersReceivedForNM1)
                {
                    nm1.NodeHeartbeat(attempt.GetAppAttemptId(), container.GetId().GetContainerId(),
                                      ContainerState.Complete);
                }
                foreach (Container container_1 in containersReceivedForNM2)
                {
                    nm2.NodeHeartbeat(attempt.GetAppAttemptId(), container_1.GetId().GetContainerId()
                                      , ContainerState.Complete);
                }
                nm1.NodeHeartbeat(am.GetApplicationAttemptId(), 1, ContainerState.Complete);
                am.WaitForState(RMAppAttemptState.Finished);
                NUnit.Framework.Assert.IsFalse(nmTokenSecretManager.IsApplicationAttemptRegistered
                                                   (attempt.GetAppAttemptId()));
            }
            finally
            {
                rm.Stop();
            }
        }