Ejemplo n.º 1
0
        public virtual void TestHandleContainerStatusInvalidCompletions()
        {
            rm = new MockRM(new YarnConfiguration());
            rm.Start();
            EventHandler handler = Org.Mockito.Mockito.Spy(rm.GetRMContext().GetDispatcher().
                                                           GetEventHandler());
            // Case 1: Unmanaged AM
            RMApp app = rm.SubmitApp(1024, true);
            // Case 1.1: AppAttemptId is null
            NMContainerStatus report = NMContainerStatus.NewInstance(ContainerId.NewContainerId
                                                                         (ApplicationAttemptId.NewInstance(app.GetApplicationId(), 2), 1), ContainerState
                                                                     .Complete, Resource.NewInstance(1024, 1), "Dummy Completed", 0, Priority.NewInstance
                                                                         (10), 1234);

            rm.GetResourceTrackerService().HandleNMContainerStatus(report, null);
            Org.Mockito.Mockito.Verify(handler, Org.Mockito.Mockito.Never()).Handle((Org.Apache.Hadoop.Yarn.Event.Event
                                                                                     )Matchers.Any());
            // Case 1.2: Master container is null
            RMAppAttemptImpl currentAttempt = (RMAppAttemptImpl)app.GetCurrentAppAttempt();

            currentAttempt.SetMasterContainer(null);
            report = NMContainerStatus.NewInstance(ContainerId.NewContainerId(currentAttempt.
                                                                              GetAppAttemptId(), 0), ContainerState.Complete, Resource.NewInstance(1024, 1), "Dummy Completed"
                                                   , 0, Priority.NewInstance(10), 1234);
            rm.GetResourceTrackerService().HandleNMContainerStatus(report, null);
            Org.Mockito.Mockito.Verify(handler, Org.Mockito.Mockito.Never()).Handle((Org.Apache.Hadoop.Yarn.Event.Event
                                                                                     )Matchers.Any());
            // Case 2: Managed AM
            app = rm.SubmitApp(1024);
            // Case 2.1: AppAttemptId is null
            report = NMContainerStatus.NewInstance(ContainerId.NewContainerId(ApplicationAttemptId
                                                                              .NewInstance(app.GetApplicationId(), 2), 1), ContainerState.Complete, Resource.NewInstance
                                                       (1024, 1), "Dummy Completed", 0, Priority.NewInstance(10), 1234);
            try
            {
                rm.GetResourceTrackerService().HandleNMContainerStatus(report, null);
            }
            catch (Exception)
            {
            }
            // expected - ignore
            Org.Mockito.Mockito.Verify(handler, Org.Mockito.Mockito.Never()).Handle((Org.Apache.Hadoop.Yarn.Event.Event
                                                                                     )Matchers.Any());
            // Case 2.2: Master container is null
            currentAttempt = (RMAppAttemptImpl)app.GetCurrentAppAttempt();
            currentAttempt.SetMasterContainer(null);
            report = NMContainerStatus.NewInstance(ContainerId.NewContainerId(currentAttempt.
                                                                              GetAppAttemptId(), 0), ContainerState.Complete, Resource.NewInstance(1024, 1), "Dummy Completed"
                                                   , 0, Priority.NewInstance(10), 1234);
            try
            {
                rm.GetResourceTrackerService().HandleNMContainerStatus(report, null);
            }
            catch (Exception)
            {
            }
            // expected - ignore
            Org.Mockito.Mockito.Verify(handler, Org.Mockito.Mockito.Never()).Handle((Org.Apache.Hadoop.Yarn.Event.Event
                                                                                     )Matchers.Any());
        }
Ejemplo n.º 2
0
        /// <exception cref="System.Exception"/>
        public virtual void TestAppCleanupWhenRMRestartedBeforeAppFinished()
        {
            conf.SetInt(YarnConfiguration.RmAmMaxAttempts, 1);
            MemoryRMStateStore memStore = new MemoryRMStateStore();

            memStore.Init(conf);
            // start RM
            MockRM rm1 = new MockRM(conf, memStore);

            rm1.Start();
            MockNM nm1 = new MockNM("127.0.0.1:1234", 1024, rm1.GetResourceTrackerService());

            nm1.RegisterNode();
            MockNM nm2 = new MockNM("127.0.0.1:5678", 1024, rm1.GetResourceTrackerService());

            nm2.RegisterNode();
            // create app and launch the AM
            RMApp  app0 = rm1.SubmitApp(200);
            MockAM am0  = LaunchAM(app0, rm1, nm1);
            // alloc another container on nm2
            AllocateResponse allocResponse = am0.Allocate(Arrays.AsList(ResourceRequest.NewInstance
                                                                            (Priority.NewInstance(1), "*", Resource.NewInstance(1024, 0), 1)), null);

            while (null == allocResponse.GetAllocatedContainers() || allocResponse.GetAllocatedContainers
                       ().IsEmpty())
            {
                nm2.NodeHeartbeat(true);
                allocResponse = am0.Allocate(null, null);
                Sharpen.Thread.Sleep(1000);
            }
            // start new RM
            MockRM rm2 = new MockRM(conf, memStore);

            rm2.Start();
            // nm1/nm2 register to rm2, and do a heartbeat
            nm1.SetResourceTrackerService(rm2.GetResourceTrackerService());
            nm1.RegisterNode(Arrays.AsList(NMContainerStatus.NewInstance(ContainerId.NewContainerId
                                                                             (am0.GetApplicationAttemptId(), 1), ContainerState.Complete, Resource.NewInstance
                                                                             (1024, 1), string.Empty, 0, Priority.NewInstance(0), 1234)), Arrays.AsList(app0.
                                                                                                                                                        GetApplicationId()));
            nm2.SetResourceTrackerService(rm2.GetResourceTrackerService());
            nm2.RegisterNode(Arrays.AsList(app0.GetApplicationId()));
            // assert app state has been saved.
            rm2.WaitForState(app0.GetApplicationId(), RMAppState.Failed);
            // wait for application cleanup message received on NM1
            WaitForAppCleanupMessageRecved(nm1, app0.GetApplicationId());
            // wait for application cleanup message received on NM2
            WaitForAppCleanupMessageRecved(nm2, app0.GetApplicationId());
            rm1.Stop();
            rm2.Stop();
        }
Ejemplo n.º 3
0
        /// <exception cref="System.Exception"/>
        public virtual void TestAppCleanupWhenNMReconnects()
        {
            conf.SetInt(YarnConfiguration.RmAmMaxAttempts, 1);
            MemoryRMStateStore memStore = new MemoryRMStateStore();

            memStore.Init(conf);
            // start RM
            MockRM rm1 = new MockRM(conf, memStore);

            rm1.Start();
            MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.GetResourceTrackerService());

            nm1.RegisterNode();
            // create app and launch the AM
            RMApp  app0 = rm1.SubmitApp(200);
            MockAM am0  = LaunchAM(app0, rm1, nm1);

            nm1.NodeHeartbeat(am0.GetApplicationAttemptId(), 1, ContainerState.Complete);
            rm1.WaitForState(app0.GetApplicationId(), RMAppState.Failed);
            // wait for application cleanup message received
            WaitForAppCleanupMessageRecved(nm1, app0.GetApplicationId());
            // reconnect NM with application still active
            nm1.RegisterNode(Arrays.AsList(app0.GetApplicationId()));
            WaitForAppCleanupMessageRecved(nm1, app0.GetApplicationId());
            rm1.Stop();
        }
Ejemplo n.º 4
0
        public virtual void TestNodeRegistrationVersionLessThanRM()
        {
            WriteToHostsFile("host2");
            Configuration conf = new Configuration();

            conf.Set(YarnConfiguration.RmNodesIncludeFilePath, hostFile.GetAbsolutePath());
            conf.Set(YarnConfiguration.RmNodemanagerMinimumVersion, "EqualToRM");
            rm = new MockRM(conf);
            rm.Start();
            string nmVersion = "1.9.9";
            ResourceTrackerService     resourceTrackerService = rm.GetResourceTrackerService();
            RegisterNodeManagerRequest req = Org.Apache.Hadoop.Yarn.Util.Records.NewRecord <RegisterNodeManagerRequest
                                                                                            >();
            NodeId   nodeId     = NodeId.NewInstance("host2", 1234);
            Resource capability = BuilderUtils.NewResource(1024, 1);

            req.SetResource(capability);
            req.SetNodeId(nodeId);
            req.SetHttpPort(1234);
            req.SetNMVersion(nmVersion);
            // trying to register a invalid node.
            RegisterNodeManagerResponse response = resourceTrackerService.RegisterNodeManager
                                                       (req);

            NUnit.Framework.Assert.AreEqual(NodeAction.Shutdown, response.GetNodeAction());
            NUnit.Framework.Assert.IsTrue("Diagnostic message did not contain: 'Disallowed NodeManager "
                                          + "Version " + nmVersion + ", is less than the minimum version'", response.GetDiagnosticsMessage
                                              ().Contains("Disallowed NodeManager Version " + nmVersion + ", is less than the minimum version "
                                                          ));
        }
Ejemplo n.º 5
0
        /// <exception cref="System.Exception"/>
        public virtual void TestInvalidatedAMHostPortOnAMRestart()
        {
            MockRM rm1 = new MockRM(conf);

            rm1.Start();
            MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.GetResourceTrackerService());

            nm1.RegisterNode();
            // a failed app
            RMApp  app2 = rm1.SubmitApp(200);
            MockAM am2  = MockRM.LaunchAndRegisterAM(app2, rm1, nm1);

            nm1.NodeHeartbeat(am2.GetApplicationAttemptId(), 1, ContainerState.Complete);
            am2.WaitForState(RMAppAttemptState.Failed);
            rm1.WaitForState(app2.GetApplicationId(), RMAppState.Accepted);
            // before new attempt is launched, the app report returns the invalid AM
            // host and port.
            GetApplicationReportRequest request1 = GetApplicationReportRequest.NewInstance(app2
                                                                                           .GetApplicationId());
            ApplicationReport report1 = rm1.GetClientRMService().GetApplicationReport(request1
                                                                                      ).GetApplicationReport();

            NUnit.Framework.Assert.AreEqual("N/A", report1.GetHost());
            NUnit.Framework.Assert.AreEqual(-1, report1.GetRpcPort());
        }
Ejemplo n.º 6
0
        // This is to test AM Host and rpc port are invalidated after the am attempt
        // is killed or failed, so that client doesn't get the wrong information.
        /// <exception cref="System.Exception"/>
        public virtual void TestInvalidateAMHostPortWhenAMFailedOrKilled()
        {
            conf.SetInt(YarnConfiguration.RmAmMaxAttempts, 1);
            MockRM rm1 = new MockRM(conf);

            rm1.Start();
            // a succeeded app
            RMApp  app1 = rm1.SubmitApp(200);
            MockNM nm1  = new MockNM("127.0.0.1:1234", 15120, rm1.GetResourceTrackerService());

            nm1.RegisterNode();
            MockAM am1 = MockRM.LaunchAndRegisterAM(app1, rm1, nm1);

            MockRM.FinishAMAndVerifyAppState(app1, rm1, nm1, am1);
            // a failed app
            RMApp  app2 = rm1.SubmitApp(200);
            MockAM am2  = MockRM.LaunchAndRegisterAM(app2, rm1, nm1);

            nm1.NodeHeartbeat(am2.GetApplicationAttemptId(), 1, ContainerState.Complete);
            am2.WaitForState(RMAppAttemptState.Failed);
            rm1.WaitForState(app2.GetApplicationId(), RMAppState.Failed);
            // a killed app
            RMApp  app3 = rm1.SubmitApp(200);
            MockAM am3  = MockRM.LaunchAndRegisterAM(app3, rm1, nm1);

            rm1.KillApp(app3.GetApplicationId());
            rm1.WaitForState(app3.GetApplicationId(), RMAppState.Killed);
            rm1.WaitForState(am3.GetApplicationAttemptId(), RMAppAttemptState.Killed);
            GetApplicationsRequest request1 = GetApplicationsRequest.NewInstance(EnumSet.Of(YarnApplicationState
                                                                                            .Finished, YarnApplicationState.Killed, YarnApplicationState.Failed));
            GetApplicationsResponse response1 = rm1.GetClientRMService().GetApplications(request1
                                                                                         );
            IList <ApplicationReport> appList1 = response1.GetApplicationList();

            NUnit.Framework.Assert.AreEqual(3, appList1.Count);
            foreach (ApplicationReport report in appList1)
            {
                // killed/failed apps host and rpc port are invalidated.
                if (report.GetApplicationId().Equals(app2.GetApplicationId()) || report.GetApplicationId
                        ().Equals(app3.GetApplicationId()))
                {
                    NUnit.Framework.Assert.AreEqual("N/A", report.GetHost());
                    NUnit.Framework.Assert.AreEqual(-1, report.GetRpcPort());
                }
                // succeeded app's host and rpc port is not invalidated
                if (report.GetApplicationId().Equals(app1.GetApplicationId()))
                {
                    NUnit.Framework.Assert.IsFalse(report.GetHost().Equals("N/A"));
                    NUnit.Framework.Assert.IsTrue(report.GetRpcPort() != -1);
                }
            }
        }
Ejemplo n.º 7
0
        public virtual void TestSetRMIdentifierInRegistration()
        {
            Configuration conf = new Configuration();

            rm = new MockRM(conf);
            rm.Start();
            MockNM nm = new MockNM("host1:1234", 5120, rm.GetResourceTrackerService());
            RegisterNodeManagerResponse response = nm.RegisterNode();

            // Verify the RMIdentifier is correctly set in RegisterNodeManagerResponse
            NUnit.Framework.Assert.AreEqual(ResourceManager.GetClusterTimeStamp(), response.GetRMIdentifier
                                                ());
        }
Ejemplo n.º 8
0
        /// <exception cref="System.Exception"/>
        public virtual void TestActivatingApplicationAfterAddingNM()
        {
            MockRM rm1 = new MockRM(conf);

            // start like normal because state is empty
            rm1.Start();
            // app that gets launched
            RMApp app1 = rm1.SubmitApp(200);
            // app that does not get launched
            RMApp app2 = rm1.SubmitApp(200);
            // app1 and app2 should be scheduled, but because no resource is available,
            // they are not activated.
            RMAppAttempt         attempt1   = app1.GetCurrentAppAttempt();
            ApplicationAttemptId attemptId1 = attempt1.GetAppAttemptId();

            rm1.WaitForState(attemptId1, RMAppAttemptState.Scheduled);
            RMAppAttempt         attempt2   = app2.GetCurrentAppAttempt();
            ApplicationAttemptId attemptId2 = attempt2.GetAppAttemptId();

            rm1.WaitForState(attemptId2, RMAppAttemptState.Scheduled);
            MockNM nm1 = new MockNM("h1:1234", 15120, rm1.GetResourceTrackerService());
            MockNM nm2 = new MockNM("h2:5678", 15120, rm1.GetResourceTrackerService());

            nm1.RegisterNode();
            nm2.RegisterNode();
            //kick the scheduling
            nm1.NodeHeartbeat(true);
            // app1 should be allocated now
            rm1.WaitForState(attemptId1, RMAppAttemptState.Allocated);
            rm1.WaitForState(attemptId2, RMAppAttemptState.Scheduled);
            nm2.NodeHeartbeat(true);
            // app2 should be allocated now
            rm1.WaitForState(attemptId1, RMAppAttemptState.Allocated);
            rm1.WaitForState(attemptId2, RMAppAttemptState.Allocated);
            rm1.Stop();
        }
        /// <exception cref="System.Exception"/>
        public virtual void TestUsageWithOneAttemptAndOneContainer()
        {
            MockRM rm = new MockRM(conf);

            rm.Start();
            MockNM nm = new MockNM("127.0.0.1:1234", 15120, rm.GetResourceTrackerService());

            nm.RegisterNode();
            RMApp        app0         = rm.SubmitApp(200);
            RMAppMetrics rmAppMetrics = app0.GetRMAppMetrics();

            NUnit.Framework.Assert.IsTrue("Before app submittion, memory seconds should have been 0 but was "
                                          + rmAppMetrics.GetMemorySeconds(), rmAppMetrics.GetMemorySeconds() == 0);
            NUnit.Framework.Assert.IsTrue("Before app submission, vcore seconds should have been 0 but was "
                                          + rmAppMetrics.GetVcoreSeconds(), rmAppMetrics.GetVcoreSeconds() == 0);
            RMAppAttempt attempt0 = app0.GetCurrentAppAttempt();

            nm.NodeHeartbeat(true);
            MockAM am0 = rm.SendAMLaunched(attempt0.GetAppAttemptId());

            am0.RegisterAppAttempt();
            RMContainer rmContainer = rm.GetResourceScheduler().GetRMContainer(attempt0.GetMasterContainer
                                                                                   ().GetId());
            // Allow metrics to accumulate.
            int sleepInterval       = 1000;
            int cumulativeSleepTime = 0;

            while (rmAppMetrics.GetMemorySeconds() <= 0 && cumulativeSleepTime < 5000)
            {
                Sharpen.Thread.Sleep(sleepInterval);
                cumulativeSleepTime += sleepInterval;
            }
            rmAppMetrics = app0.GetRMAppMetrics();
            NUnit.Framework.Assert.IsTrue("While app is running, memory seconds should be >0 but is "
                                          + rmAppMetrics.GetMemorySeconds(), rmAppMetrics.GetMemorySeconds() > 0);
            NUnit.Framework.Assert.IsTrue("While app is running, vcore seconds should be >0 but is "
                                          + rmAppMetrics.GetVcoreSeconds(), rmAppMetrics.GetVcoreSeconds() > 0);
            MockRM.FinishAMAndVerifyAppState(app0, rm, nm, am0);
            AggregateAppResourceUsage ru = CalculateContainerResourceMetrics(rmContainer);

            rmAppMetrics = app0.GetRMAppMetrics();
            NUnit.Framework.Assert.AreEqual("Unexcpected MemorySeconds value", ru.GetMemorySeconds
                                                (), rmAppMetrics.GetMemorySeconds());
            NUnit.Framework.Assert.AreEqual("Unexpected VcoreSeconds value", ru.GetVcoreSeconds
                                                (), rmAppMetrics.GetVcoreSeconds());
            rm.Stop();
        }
Ejemplo n.º 10
0
        public virtual void TestNodeRegistrationWithMinimumAllocations()
        {
            Configuration conf = new Configuration();

            conf.Set(YarnConfiguration.RmSchedulerMinimumAllocationMb, "2048");
            conf.Set(YarnConfiguration.RmSchedulerMinimumAllocationVcores, "4");
            rm = new MockRM(conf);
            rm.Start();
            ResourceTrackerService     resourceTrackerService = rm.GetResourceTrackerService();
            RegisterNodeManagerRequest req = Org.Apache.Hadoop.Yarn.Util.Records.NewRecord <RegisterNodeManagerRequest
                                                                                            >();
            NodeId nodeId = BuilderUtils.NewNodeId("host", 1234);

            req.SetNodeId(nodeId);
            Resource capability = BuilderUtils.NewResource(1024, 1);

            req.SetResource(capability);
            RegisterNodeManagerResponse response1 = resourceTrackerService.RegisterNodeManager
                                                        (req);

            NUnit.Framework.Assert.AreEqual(NodeAction.Shutdown, response1.GetNodeAction());
            capability.SetMemory(2048);
            capability.SetVirtualCores(1);
            req.SetResource(capability);
            RegisterNodeManagerResponse response2 = resourceTrackerService.RegisterNodeManager
                                                        (req);

            NUnit.Framework.Assert.AreEqual(NodeAction.Shutdown, response2.GetNodeAction());
            capability.SetMemory(1024);
            capability.SetVirtualCores(4);
            req.SetResource(capability);
            RegisterNodeManagerResponse response3 = resourceTrackerService.RegisterNodeManager
                                                        (req);

            NUnit.Framework.Assert.AreEqual(NodeAction.Shutdown, response3.GetNodeAction());
            capability.SetMemory(2048);
            capability.SetVirtualCores(4);
            req.SetResource(capability);
            RegisterNodeManagerResponse response4 = resourceTrackerService.RegisterNodeManager
                                                        (req);

            NUnit.Framework.Assert.AreEqual(NodeAction.Normal, response4.GetNodeAction());
        }
Ejemplo n.º 11
0
        public virtual void TestNodeRegistrationFailure()
        {
            WriteToHostsFile("host1");
            Configuration conf = new Configuration();

            conf.Set(YarnConfiguration.RmNodesIncludeFilePath, hostFile.GetAbsolutePath());
            rm = new MockRM(conf);
            rm.Start();
            ResourceTrackerService     resourceTrackerService = rm.GetResourceTrackerService();
            RegisterNodeManagerRequest req = Org.Apache.Hadoop.Yarn.Util.Records.NewRecord <RegisterNodeManagerRequest
                                                                                            >();
            NodeId nodeId = NodeId.NewInstance("host2", 1234);

            req.SetNodeId(nodeId);
            req.SetHttpPort(1234);
            // trying to register a invalid node.
            RegisterNodeManagerResponse response = resourceTrackerService.RegisterNodeManager
                                                       (req);

            NUnit.Framework.Assert.AreEqual(NodeAction.Shutdown, response.GetNodeAction());
            NUnit.Framework.Assert.AreEqual("Disallowed NodeManager from  host2, Sending SHUTDOWN signal to the NodeManager."
                                            , response.GetDiagnosticsMessage());
        }
Ejemplo n.º 12
0
        public virtual void TestNodeRegistrationSuccess()
        {
            WriteToHostsFile("host2");
            Configuration conf = new Configuration();

            conf.Set(YarnConfiguration.RmNodesIncludeFilePath, hostFile.GetAbsolutePath());
            rm = new MockRM(conf);
            rm.Start();
            ResourceTrackerService     resourceTrackerService = rm.GetResourceTrackerService();
            RegisterNodeManagerRequest req = Org.Apache.Hadoop.Yarn.Util.Records.NewRecord <RegisterNodeManagerRequest
                                                                                            >();
            NodeId   nodeId     = NodeId.NewInstance("host2", 1234);
            Resource capability = BuilderUtils.NewResource(1024, 1);

            req.SetResource(capability);
            req.SetNodeId(nodeId);
            req.SetHttpPort(1234);
            req.SetNMVersion(YarnVersionInfo.GetVersion());
            // trying to register a invalid node.
            RegisterNodeManagerResponse response = resourceTrackerService.RegisterNodeManager
                                                       (req);

            NUnit.Framework.Assert.AreEqual(NodeAction.Normal, response.GetNodeAction());
        }
        /// <exception cref="System.Exception"/>
        private void AmRestartTests(bool keepRunningContainers)
        {
            MockRM rm = new MockRM(conf);

            rm.Start();
            RMApp app = rm.SubmitApp(200, "name", "user", new Dictionary <ApplicationAccessType
                                                                          , string>(), false, "default", -1, null, "MAPREDUCE", false, keepRunningContainers
                                     );
            MockNM nm = new MockNM("127.0.0.1:1234", 10240, rm.GetResourceTrackerService());

            nm.RegisterNode();
            MockAM am0           = MockRM.LaunchAndRegisterAM(app, rm, nm);
            int    NumContainers = 1;

            // allocate NUM_CONTAINERS containers
            am0.Allocate("127.0.0.1", 1024, NumContainers, new AList <ContainerId>());
            nm.NodeHeartbeat(true);
            // wait for containers to be allocated.
            IList <Container> containers = am0.Allocate(new AList <ResourceRequest>(), new AList
                                                        <ContainerId>()).GetAllocatedContainers();

            while (containers.Count != NumContainers)
            {
                nm.NodeHeartbeat(true);
                Sharpen.Collections.AddAll(containers, am0.Allocate(new AList <ResourceRequest>(),
                                                                    new AList <ContainerId>()).GetAllocatedContainers());
                Sharpen.Thread.Sleep(200);
            }
            // launch the 2nd container.
            ContainerId containerId2 = ContainerId.NewContainerId(am0.GetApplicationAttemptId
                                                                      (), 2);

            nm.NodeHeartbeat(am0.GetApplicationAttemptId(), containerId2.GetContainerId(), ContainerState
                             .Running);
            rm.WaitForState(nm, containerId2, RMContainerState.Running);
            // Capture the containers here so the metrics can be calculated after the
            // app has completed.
            ICollection <RMContainer> rmContainers = rm.scheduler.GetSchedulerAppInfo(am0.GetApplicationAttemptId
                                                                                          ()).GetLiveContainers();
            // fail the first app attempt by sending CONTAINER_FINISHED event without
            // registering.
            ContainerId amContainerId = app.GetCurrentAppAttempt().GetMasterContainer().GetId
                                            ();

            nm.NodeHeartbeat(am0.GetApplicationAttemptId(), amContainerId.GetContainerId(), ContainerState
                             .Complete);
            am0.WaitForState(RMAppAttemptState.Failed);
            long memorySeconds = 0;
            long vcoreSeconds  = 0;

            // Calculate container usage metrics for first attempt.
            if (keepRunningContainers)
            {
                // Only calculate the usage for the one container that has completed.
                foreach (RMContainer c in rmContainers)
                {
                    if (c.GetContainerId().Equals(amContainerId))
                    {
                        AggregateAppResourceUsage ru = CalculateContainerResourceMetrics(c);
                        memorySeconds += ru.GetMemorySeconds();
                        vcoreSeconds  += ru.GetVcoreSeconds();
                    }
                    else
                    {
                        // The remaining container should be RUNNING.
                        NUnit.Framework.Assert.IsTrue("After first attempt failed, remaining container "
                                                      + "should still be running. ", c.GetContainerState().Equals(ContainerState.Running
                                                                                                                  ));
                    }
                }
            }
            else
            {
                // If keepRunningContainers is false, all live containers should now
                // be completed. Calculate the resource usage metrics for all of them.
                foreach (RMContainer c in rmContainers)
                {
                    AggregateAppResourceUsage ru = CalculateContainerResourceMetrics(c);
                    memorySeconds += ru.GetMemorySeconds();
                    vcoreSeconds  += ru.GetVcoreSeconds();
                }
            }
            // wait for app to start a new attempt.
            rm.WaitForState(app.GetApplicationId(), RMAppState.Accepted);
            // assert this is a new AM.
            RMAppAttempt attempt2 = app.GetCurrentAppAttempt();

            NUnit.Framework.Assert.IsFalse(attempt2.GetAppAttemptId().Equals(am0.GetApplicationAttemptId
                                                                                 ()));
            // launch the new AM
            nm.NodeHeartbeat(true);
            MockAM am1 = rm.SendAMLaunched(attempt2.GetAppAttemptId());

            am1.RegisterAppAttempt();
            // allocate NUM_CONTAINERS containers
            am1.Allocate("127.0.0.1", 1024, NumContainers, new AList <ContainerId>());
            nm.NodeHeartbeat(true);
            // wait for containers to be allocated.
            containers = am1.Allocate(new AList <ResourceRequest>(), new AList <ContainerId>())
                         .GetAllocatedContainers();
            while (containers.Count != NumContainers)
            {
                nm.NodeHeartbeat(true);
                Sharpen.Collections.AddAll(containers, am1.Allocate(new AList <ResourceRequest>(),
                                                                    new AList <ContainerId>()).GetAllocatedContainers());
                Sharpen.Thread.Sleep(200);
            }
            rm.WaitForState(app.GetApplicationId(), RMAppState.Running);
            // Capture running containers for later use by metrics calculations.
            rmContainers = rm.scheduler.GetSchedulerAppInfo(attempt2.GetAppAttemptId()).GetLiveContainers
                               ();
            // complete container by sending the container complete event which has
            // earlier attempt's attemptId
            amContainerId = app.GetCurrentAppAttempt().GetMasterContainer().GetId();
            nm.NodeHeartbeat(am0.GetApplicationAttemptId(), amContainerId.GetContainerId(), ContainerState
                             .Complete);
            MockRM.FinishAMAndVerifyAppState(app, rm, nm, am1);
            // Calculate container usage metrics for second attempt.
            foreach (RMContainer c_1 in rmContainers)
            {
                AggregateAppResourceUsage ru = CalculateContainerResourceMetrics(c_1);
                memorySeconds += ru.GetMemorySeconds();
                vcoreSeconds  += ru.GetVcoreSeconds();
            }
            RMAppMetrics rmAppMetrics = app.GetRMAppMetrics();

            NUnit.Framework.Assert.AreEqual("Unexcpected MemorySeconds value", memorySeconds,
                                            rmAppMetrics.GetMemorySeconds());
            NUnit.Framework.Assert.AreEqual("Unexpected VcoreSeconds value", vcoreSeconds, rmAppMetrics
                                            .GetVcoreSeconds());
            rm.Stop();
            return;
        }
        /// <exception cref="System.Exception"/>
        public virtual void TestUsageWithMultipleContainersAndRMRestart()
        {
            // Set max attempts to 1 so that when the first attempt fails, the app
            // won't try to start a new one.
            conf.SetInt(YarnConfiguration.RmAmMaxAttempts, 1);
            conf.SetBoolean(YarnConfiguration.RecoveryEnabled, true);
            conf.SetBoolean(YarnConfiguration.RmWorkPreservingRecoveryEnabled, false);
            MemoryRMStateStore memStore = new MemoryRMStateStore();

            memStore.Init(conf);
            MockRM rm0 = new MockRM(conf, memStore);

            rm0.Start();
            MockNM nm = new MockNM("127.0.0.1:1234", 65536, rm0.GetResourceTrackerService());

            nm.RegisterNode();
            RMApp app0 = rm0.SubmitApp(200);

            rm0.WaitForState(app0.GetApplicationId(), RMAppState.Accepted);
            RMAppAttempt         attempt0   = app0.GetCurrentAppAttempt();
            ApplicationAttemptId attemptId0 = attempt0.GetAppAttemptId();

            rm0.WaitForState(attemptId0, RMAppAttemptState.Scheduled);
            nm.NodeHeartbeat(true);
            rm0.WaitForState(attemptId0, RMAppAttemptState.Allocated);
            MockAM am0 = rm0.SendAMLaunched(attempt0.GetAppAttemptId());

            am0.RegisterAppAttempt();
            int NumContainers = 2;

            am0.Allocate("127.0.0.1", 1000, NumContainers, new AList <ContainerId>());
            nm.NodeHeartbeat(true);
            IList <Container> conts = am0.Allocate(new AList <ResourceRequest>(), new AList <ContainerId
                                                                                             >()).GetAllocatedContainers();

            while (conts.Count != NumContainers)
            {
                nm.NodeHeartbeat(true);
                Sharpen.Collections.AddAll(conts, am0.Allocate(new AList <ResourceRequest>(), new
                                                               AList <ContainerId>()).GetAllocatedContainers());
                Sharpen.Thread.Sleep(500);
            }
            // launch the 2nd and 3rd containers.
            foreach (Container c in conts)
            {
                nm.NodeHeartbeat(attempt0.GetAppAttemptId(), c.GetId().GetContainerId(), ContainerState
                                 .Running);
                rm0.WaitForState(nm, c.GetId(), RMContainerState.Running);
            }
            // Get the RMContainers for all of the live containers, to be used later
            // for metrics calculations and comparisons.
            ICollection <RMContainer> rmContainers = rm0.scheduler.GetSchedulerAppInfo(attempt0
                                                                                       .GetAppAttemptId()).GetLiveContainers();
            // Allow metrics to accumulate.
            int sleepInterval       = 1000;
            int cumulativeSleepTime = 0;

            while (app0.GetRMAppMetrics().GetMemorySeconds() <= 0 && cumulativeSleepTime < 5000
                   )
            {
                Sharpen.Thread.Sleep(sleepInterval);
                cumulativeSleepTime += sleepInterval;
            }
            // Stop all non-AM containers
            foreach (Container c_1 in conts)
            {
                if (c_1.GetId().GetContainerId() == 1)
                {
                    continue;
                }
                nm.NodeHeartbeat(attempt0.GetAppAttemptId(), c_1.GetId().GetContainerId(), ContainerState
                                 .Complete);
                rm0.WaitForState(nm, c_1.GetId(), RMContainerState.Completed);
            }
            // After all other containers have completed, manually complete the master
            // container in order to trigger a save to the state store of the resource
            // usage metrics. This will cause the attempt to fail, and, since the max
            // attempt retries is 1, the app will also fail. This is intentional so
            // that all containers will complete prior to saving.
            ContainerId cId = ContainerId.NewContainerId(attempt0.GetAppAttemptId(), 1);

            nm.NodeHeartbeat(attempt0.GetAppAttemptId(), cId.GetContainerId(), ContainerState
                             .Complete);
            rm0.WaitForState(nm, cId, RMContainerState.Completed);
            // Check that the container metrics match those from the app usage report.
            long memorySeconds = 0;
            long vcoreSeconds  = 0;

            foreach (RMContainer c_2 in rmContainers)
            {
                AggregateAppResourceUsage ru = CalculateContainerResourceMetrics(c_2);
                memorySeconds += ru.GetMemorySeconds();
                vcoreSeconds  += ru.GetVcoreSeconds();
            }
            RMAppMetrics metricsBefore = app0.GetRMAppMetrics();

            NUnit.Framework.Assert.AreEqual("Unexcpected MemorySeconds value", memorySeconds,
                                            metricsBefore.GetMemorySeconds());
            NUnit.Framework.Assert.AreEqual("Unexpected VcoreSeconds value", vcoreSeconds, metricsBefore
                                            .GetVcoreSeconds());
            // create new RM to represent RM restart. Load up the state store.
            MockRM rm1 = new MockRM(conf, memStore);

            rm1.Start();
            RMApp app0After = rm1.GetRMContext().GetRMApps()[app0.GetApplicationId()];
            // Compare container resource usage metrics from before and after restart.
            RMAppMetrics metricsAfter = app0After.GetRMAppMetrics();

            NUnit.Framework.Assert.AreEqual("Vcore seconds were not the same after RM Restart"
                                            , metricsBefore.GetVcoreSeconds(), metricsAfter.GetVcoreSeconds());
            NUnit.Framework.Assert.AreEqual("Memory seconds were not the same after RM Restart"
                                            , metricsBefore.GetMemorySeconds(), metricsAfter.GetMemorySeconds());
            rm0.Stop();
            rm0.Close();
            rm1.Stop();
            rm1.Close();
        }
Ejemplo n.º 15
0
        // The test verifies processing of NMContainerStatuses which are sent during
        // NM registration.
        // 1. Start the cluster-RM,NM,Submit app with 1024MB,Launch & register AM
        // 2. AM sends ResourceRequest for 1 container with memory 2048MB.
        // 3. Verify for number of container allocated by RM
        // 4. Verify Memory Usage by cluster, it should be 3072. AM memory + requested
        // memory. 1024 + 2048=3072
        // 5. Re-register NM by sending completed container status
        // 6. Verify for Memory Used, it should be 1024
        // 7. Send AM heatbeat to RM. Allocated response should contain completed
        // container.
        /// <exception cref="System.Exception"/>
        public virtual void TestProcessingNMContainerStatusesOnNMRestart()
        {
            conf.SetInt(YarnConfiguration.RmAmMaxAttempts, 1);
            MemoryRMStateStore memStore = new MemoryRMStateStore();

            memStore.Init(conf);
            // 1. Start the cluster-RM,NM,Submit app with 1024MB,Launch & register AM
            MockRM rm1 = new MockRM(conf, memStore);

            rm1.Start();
            int    nmMemory        = 8192;
            int    amMemory        = 1024;
            int    containerMemory = 2048;
            MockNM nm1             = new MockNM("127.0.0.1:1234", nmMemory, rm1.GetResourceTrackerService
                                                    ());

            nm1.RegisterNode();
            RMApp  app0 = rm1.SubmitApp(amMemory);
            MockAM am0  = MockRM.LaunchAndRegisterAM(app0, rm1, nm1);
            // 2. AM sends ResourceRequest for 1 container with memory 2048MB.
            int noOfContainers = 1;
            IList <Container> allocateContainers = am0.AllocateAndWaitForContainers(noOfContainers
                                                                                    , containerMemory, nm1);

            // 3. Verify for number of container allocated by RM
            NUnit.Framework.Assert.AreEqual(noOfContainers, allocateContainers.Count);
            Container container = allocateContainers[0];

            nm1.NodeHeartbeat(am0.GetApplicationAttemptId(), 1, ContainerState.Running);
            nm1.NodeHeartbeat(am0.GetApplicationAttemptId(), container.GetId().GetContainerId
                                  (), ContainerState.Running);
            rm1.WaitForState(app0.GetApplicationId(), RMAppState.Running);
            // 4. Verify Memory Usage by cluster, it should be 3072. AM memory +
            // requested memory. 1024 + 2048=3072
            ResourceScheduler rs = rm1.GetRMContext().GetScheduler();
            int allocatedMB      = rs.GetRootQueueMetrics().GetAllocatedMB();

            NUnit.Framework.Assert.AreEqual(amMemory + containerMemory, allocatedMB);
            // 5. Re-register NM by sending completed container status
            IList <NMContainerStatus> nMContainerStatusForApp = CreateNMContainerStatusForApp(
                am0);

            nm1.RegisterNode(nMContainerStatusForApp, Arrays.AsList(app0.GetApplicationId()));
            WaitForClusterMemory(nm1, rs, amMemory);
            // 6. Verify for Memory Used, it should be 1024
            NUnit.Framework.Assert.AreEqual(amMemory, rs.GetRootQueueMetrics().GetAllocatedMB
                                                ());
            // 7. Send AM heatbeat to RM. Allocated response should contain completed
            // container
            AllocateRequest req = AllocateRequest.NewInstance(0, 0F, new AList <ResourceRequest
                                                                                >(), new AList <ContainerId>(), null);
            AllocateResponse        allocate = am0.Allocate(req);
            IList <ContainerStatus> completedContainersStatuses = allocate.GetCompletedContainersStatuses
                                                                      ();

            NUnit.Framework.Assert.AreEqual(noOfContainers, completedContainersStatuses.Count
                                            );
            // Application clean up should happen Cluster memory used is 0
            nm1.NodeHeartbeat(am0.GetApplicationAttemptId(), 1, ContainerState.Complete);
            WaitForClusterMemory(nm1, rs, 0);
            rm1.Stop();
        }
Ejemplo n.º 16
0
        public virtual void TestReconnectNode()
        {
            DrainDispatcher dispatcher = new DrainDispatcher();

            rm = new _MockRM_567(this, dispatcher);
            rm.Start();
            MockNM nm1 = rm.RegisterNode("host1:1234", 5120);
            MockNM nm2 = rm.RegisterNode("host2:5678", 5120);

            nm1.NodeHeartbeat(true);
            nm2.NodeHeartbeat(false);
            dispatcher.Await();
            CheckUnealthyNMCount(rm, nm2, true, 1);
            int          expectedNMs = ClusterMetrics.GetMetrics().GetNumActiveNMs();
            QueueMetrics metrics     = rm.GetResourceScheduler().GetRootQueueMetrics();

            // TODO Metrics incorrect in case of the FifoScheduler
            NUnit.Framework.Assert.AreEqual(5120, metrics.GetAvailableMB());
            // reconnect of healthy node
            nm1 = rm.RegisterNode("host1:1234", 5120);
            NodeHeartbeatResponse response = nm1.NodeHeartbeat(true);

            NUnit.Framework.Assert.IsTrue(NodeAction.Normal.Equals(response.GetNodeAction()));
            dispatcher.Await();
            NUnit.Framework.Assert.AreEqual(expectedNMs, ClusterMetrics.GetMetrics().GetNumActiveNMs
                                                ());
            CheckUnealthyNMCount(rm, nm2, true, 1);
            // reconnect of unhealthy node
            nm2      = rm.RegisterNode("host2:5678", 5120);
            response = nm2.NodeHeartbeat(false);
            NUnit.Framework.Assert.IsTrue(NodeAction.Normal.Equals(response.GetNodeAction()));
            dispatcher.Await();
            NUnit.Framework.Assert.AreEqual(expectedNMs, ClusterMetrics.GetMetrics().GetNumActiveNMs
                                                ());
            CheckUnealthyNMCount(rm, nm2, true, 1);
            // unhealthy node changed back to healthy
            nm2 = rm.RegisterNode("host2:5678", 5120);
            dispatcher.Await();
            response = nm2.NodeHeartbeat(true);
            response = nm2.NodeHeartbeat(true);
            dispatcher.Await();
            NUnit.Framework.Assert.AreEqual(5120 + 5120, metrics.GetAvailableMB());
            // reconnect of node with changed capability
            nm1 = rm.RegisterNode("host2:5678", 10240);
            dispatcher.Await();
            response = nm1.NodeHeartbeat(true);
            dispatcher.Await();
            NUnit.Framework.Assert.IsTrue(NodeAction.Normal.Equals(response.GetNodeAction()));
            NUnit.Framework.Assert.AreEqual(5120 + 10240, metrics.GetAvailableMB());
            // reconnect of node with changed capability and running applications
            IList <ApplicationId> runningApps = new AList <ApplicationId>();

            runningApps.AddItem(ApplicationId.NewInstance(1, 0));
            nm1 = rm.RegisterNode("host2:5678", 15360, 2, runningApps);
            dispatcher.Await();
            response = nm1.NodeHeartbeat(true);
            dispatcher.Await();
            NUnit.Framework.Assert.IsTrue(NodeAction.Normal.Equals(response.GetNodeAction()));
            NUnit.Framework.Assert.AreEqual(5120 + 15360, metrics.GetAvailableMB());
            // reconnect healthy node changing http port
            nm1 = new MockNM("host1:1234", 5120, rm.GetResourceTrackerService());
            nm1.SetHttpPort(3);
            nm1.RegisterNode();
            dispatcher.Await();
            response = nm1.NodeHeartbeat(true);
            response = nm1.NodeHeartbeat(true);
            dispatcher.Await();
            RMNode rmNode = rm.GetRMContext().GetRMNodes()[nm1.GetNodeId()];

            NUnit.Framework.Assert.AreEqual(3, rmNode.GetHttpPort());
            NUnit.Framework.Assert.AreEqual(5120, rmNode.GetTotalCapability().GetMemory());
            NUnit.Framework.Assert.AreEqual(5120 + 15360, metrics.GetAvailableMB());
        }