示例#1
0
        public virtual void TestAddNewExcludePathToConfiguration()
        {
            Configuration conf = new Configuration();

            rm = new MockRM(conf);
            rm.Start();
            MockNM         nm1     = rm.RegisterNode("host1:1234", 5120);
            MockNM         nm2     = rm.RegisterNode("host2:5678", 10240);
            ClusterMetrics metrics = ClusterMetrics.GetMetrics();

            System.Diagnostics.Debug.Assert((metrics != null));
            int initialMetricCount = metrics.GetNumDecommisionedNMs();
            NodeHeartbeatResponse nodeHeartbeat = nm1.NodeHeartbeat(true);

            NUnit.Framework.Assert.AreEqual(NodeAction.Normal, nodeHeartbeat.GetNodeAction());
            nodeHeartbeat = nm2.NodeHeartbeat(true);
            NUnit.Framework.Assert.AreEqual(NodeAction.Normal, nodeHeartbeat.GetNodeAction());
            WriteToHostsFile("host2");
            conf.Set(YarnConfiguration.RmNodesExcludeFilePath, hostFile.GetAbsolutePath());
            rm.GetNodesListManager().RefreshNodes(conf);
            CheckDecommissionedNMCount(rm, ++initialMetricCount);
            nodeHeartbeat = nm1.NodeHeartbeat(true);
            NUnit.Framework.Assert.AreEqual("Node should not have been decomissioned.", NodeAction
                                            .Normal, nodeHeartbeat.GetNodeAction());
            nodeHeartbeat = nm2.NodeHeartbeat(true);
            NUnit.Framework.Assert.AreEqual("Node should have been decomissioned but is in state"
                                            + nodeHeartbeat.GetNodeAction(), NodeAction.Shutdown, nodeHeartbeat.GetNodeAction
                                                ());
        }
示例#2
0
        /// <exception cref="System.Exception"/>
        public virtual void TestInvalidContainerReleaseRequest()
        {
            MockRM rm = new MockRM(conf);

            try
            {
                rm.Start();
                // Register node1
                MockNM nm1 = rm.RegisterNode("127.0.0.1:1234", 6 * Gb);
                // Submit an application
                RMApp app1 = rm.SubmitApp(1024);
                // kick the scheduling
                nm1.NodeHeartbeat(true);
                RMAppAttempt attempt1 = app1.GetCurrentAppAttempt();
                MockAM       am1      = rm.SendAMLaunched(attempt1.GetAppAttemptId());
                am1.RegisterAppAttempt();
                am1.AddRequests(new string[] { "127.0.0.1" }, Gb, 1, 1);
                AllocateResponse alloc1Response = am1.Schedule();
                // send the request
                // kick the scheduler
                nm1.NodeHeartbeat(true);
                while (alloc1Response.GetAllocatedContainers().Count < 1)
                {
                    Log.Info("Waiting for containers to be created for app 1...");
                    Sharpen.Thread.Sleep(1000);
                    alloc1Response = am1.Schedule();
                }
                NUnit.Framework.Assert.IsTrue(alloc1Response.GetAllocatedContainers().Count > 0);
                RMApp app2 = rm.SubmitApp(1024);
                nm1.NodeHeartbeat(true);
                RMAppAttempt attempt2 = app2.GetCurrentAppAttempt();
                MockAM       am2      = rm.SendAMLaunched(attempt2.GetAppAttemptId());
                am2.RegisterAppAttempt();
                // Now trying to release container allocated for app1 -> appAttempt1.
                ContainerId cId = alloc1Response.GetAllocatedContainers()[0].GetId();
                am2.AddContainerToBeReleased(cId);
                try
                {
                    am2.Schedule();
                    NUnit.Framework.Assert.Fail("Exception was expected!!");
                }
                catch (InvalidContainerReleaseException e)
                {
                    StringBuilder sb = new StringBuilder("Cannot release container : ");
                    sb.Append(cId.ToString());
                    sb.Append(" not belonging to this application attempt : ");
                    sb.Append(attempt2.GetAppAttemptId().ToString());
                    NUnit.Framework.Assert.IsTrue(e.Message.Contains(sb.ToString()));
                }
            }
            finally
            {
                if (rm != null)
                {
                    rm.Stop();
                }
            }
        }
示例#3
0
        /// <exception cref="System.Exception"/>
        public virtual void TestAppOnMultiNode()
        {
            Logger rootLogger = LogManager.GetRootLogger();

            rootLogger.SetLevel(Level.Debug);
            conf.Set("yarn.scheduler.capacity.node-locality-delay", "-1");
            MockRM rm = new MockRM(conf);

            rm.Start();
            MockNM nm1 = rm.RegisterNode("h1:1234", 5120);
            MockNM nm2 = rm.RegisterNode("h2:5678", 10240);
            RMApp  app = rm.SubmitApp(2000);

            //kick the scheduling
            nm1.NodeHeartbeat(true);
            RMAppAttempt attempt = app.GetCurrentAppAttempt();
            MockAM       am      = rm.SendAMLaunched(attempt.GetAppAttemptId());

            am.RegisterAppAttempt();
            //request for containers
            int request = 13;

            am.Allocate("h1", 1000, request, new AList <ContainerId>());
            //kick the scheduler
            IList <Container> conts = am.Allocate(new AList <ResourceRequest>(), new AList <ContainerId
                                                                                            >()).GetAllocatedContainers();
            int contReceived = conts.Count;

            while (contReceived < 3)
            {
                //only 3 containers are available on node1
                nm1.NodeHeartbeat(true);
                Sharpen.Collections.AddAll(conts, am.Allocate(new AList <ResourceRequest>(), new AList
                                                              <ContainerId>()).GetAllocatedContainers());
                contReceived = conts.Count;
                Log.Info("Got " + contReceived + " containers. Waiting to get " + 3);
                Sharpen.Thread.Sleep(WaitSleepMs);
            }
            NUnit.Framework.Assert.AreEqual(3, conts.Count);
            //send node2 heartbeat
            conts = am.Allocate(new AList <ResourceRequest>(), new AList <ContainerId>()).GetAllocatedContainers
                        ();
            contReceived = conts.Count;
            while (contReceived < 10)
            {
                nm2.NodeHeartbeat(true);
                Sharpen.Collections.AddAll(conts, am.Allocate(new AList <ResourceRequest>(), new AList
                                                              <ContainerId>()).GetAllocatedContainers());
                contReceived = conts.Count;
                Log.Info("Got " + contReceived + " containers. Waiting to get " + 10);
                Sharpen.Thread.Sleep(WaitSleepMs);
            }
            NUnit.Framework.Assert.AreEqual(10, conts.Count);
            am.UnregisterAppAttempt();
            nm1.NodeHeartbeat(attempt.GetAppAttemptId(), 1, ContainerState.Complete);
            am.WaitForState(RMAppAttemptState.Finished);
            rm.Stop();
        }
        public virtual void TestAMLaunchAndCleanup()
        {
            Logger rootLogger = LogManager.GetRootLogger();

            rootLogger.SetLevel(Level.Debug);
            TestApplicationMasterLauncher.MyContainerManagerImpl containerManager = new TestApplicationMasterLauncher.MyContainerManagerImpl
                                                                                        ();
            MockRMWithCustomAMLauncher rm = new MockRMWithCustomAMLauncher(containerManager);

            rm.Start();
            MockNM nm1 = rm.RegisterNode("127.0.0.1:1234", 5120);
            RMApp  app = rm.SubmitApp(2000);

            // kick the scheduling
            nm1.NodeHeartbeat(true);
            int waitCount = 0;

            while (containerManager.launched == false && waitCount++ < 20)
            {
                Log.Info("Waiting for AM Launch to happen..");
                Sharpen.Thread.Sleep(1000);
            }
            NUnit.Framework.Assert.IsTrue(containerManager.launched);
            RMAppAttempt         attempt      = app.GetCurrentAppAttempt();
            ApplicationAttemptId appAttemptId = attempt.GetAppAttemptId();

            NUnit.Framework.Assert.AreEqual(appAttemptId.ToString(), containerManager.attemptIdAtContainerManager
                                            );
            NUnit.Framework.Assert.AreEqual(app.GetSubmitTime(), containerManager.submitTimeAtContainerManager
                                            );
            NUnit.Framework.Assert.AreEqual(app.GetRMAppAttempt(appAttemptId).GetMasterContainer
                                                ().GetId().ToString(), containerManager.containerIdAtContainerManager);
            NUnit.Framework.Assert.AreEqual(nm1.GetNodeId().ToString(), containerManager.nmHostAtContainerManager
                                            );
            NUnit.Framework.Assert.AreEqual(YarnConfiguration.DefaultRmAmMaxAttempts, containerManager
                                            .maxAppAttempts);
            MockAM am = new MockAM(rm.GetRMContext(), rm.GetApplicationMasterService(), appAttemptId
                                   );

            am.RegisterAppAttempt();
            am.UnregisterAppAttempt();
            //complete the AM container to finish the app normally
            nm1.NodeHeartbeat(attempt.GetAppAttemptId(), 1, ContainerState.Complete);
            am.WaitForState(RMAppAttemptState.Finished);
            waitCount = 0;
            while (containerManager.cleanedup == false && waitCount++ < 20)
            {
                Log.Info("Waiting for AM Cleanup to happen..");
                Sharpen.Thread.Sleep(1000);
            }
            NUnit.Framework.Assert.IsTrue(containerManager.cleanedup);
            am.WaitForState(RMAppAttemptState.Finished);
            rm.Stop();
        }
示例#5
0
        public virtual void TestDecommissionWithExcludeHosts()
        {
            Configuration conf = new Configuration();

            conf.Set(YarnConfiguration.RmNodesExcludeFilePath, hostFile.GetAbsolutePath());
            WriteToHostsFile(string.Empty);
            DrainDispatcher dispatcher = new DrainDispatcher();

            rm = new _MockRM_162(dispatcher, conf);
            rm.Start();
            MockNM nm1 = rm.RegisterNode("host1:1234", 5120);
            MockNM nm2 = rm.RegisterNode("host2:5678", 10240);
            MockNM nm3 = rm.RegisterNode("localhost:4433", 1024);

            dispatcher.Await();
            int metricCount = ClusterMetrics.GetMetrics().GetNumDecommisionedNMs();
            NodeHeartbeatResponse nodeHeartbeat = nm1.NodeHeartbeat(true);

            NUnit.Framework.Assert.IsTrue(NodeAction.Normal.Equals(nodeHeartbeat.GetNodeAction
                                                                       ()));
            nodeHeartbeat = nm2.NodeHeartbeat(true);
            NUnit.Framework.Assert.IsTrue(NodeAction.Normal.Equals(nodeHeartbeat.GetNodeAction
                                                                       ()));
            dispatcher.Await();
            // To test that IPs also work
            string ip = NetUtils.NormalizeHostName("localhost");

            WriteToHostsFile("host2", ip);
            rm.GetNodesListManager().RefreshNodes(conf);
            CheckDecommissionedNMCount(rm, metricCount + 2);
            nodeHeartbeat = nm1.NodeHeartbeat(true);
            NUnit.Framework.Assert.IsTrue(NodeAction.Normal.Equals(nodeHeartbeat.GetNodeAction
                                                                       ()));
            nodeHeartbeat = nm2.NodeHeartbeat(true);
            NUnit.Framework.Assert.IsTrue("The decommisioned metrics are not updated", NodeAction
                                          .Shutdown.Equals(nodeHeartbeat.GetNodeAction()));
            nodeHeartbeat = nm3.NodeHeartbeat(true);
            NUnit.Framework.Assert.IsTrue("The decommisioned metrics are not updated", NodeAction
                                          .Shutdown.Equals(nodeHeartbeat.GetNodeAction()));
            dispatcher.Await();
            WriteToHostsFile(string.Empty);
            rm.GetNodesListManager().RefreshNodes(conf);
            nm3 = rm.RegisterNode("localhost:4433", 1024);
            dispatcher.Await();
            nodeHeartbeat = nm3.NodeHeartbeat(true);
            dispatcher.Await();
            NUnit.Framework.Assert.IsTrue(NodeAction.Normal.Equals(nodeHeartbeat.GetNodeAction
                                                                       ()));
            // decommissined node is 1 since 1 node is rejoined after updating exclude
            // file
            CheckDecommissionedNMCount(rm, metricCount + 1);
        }
示例#6
0
        // Test even if AM container is allocated with containerId not equal to 1, the
        // following allocate requests from AM should be able to retrieve the
        // corresponding NM Token.
        /// <exception cref="System.Exception"/>
        public virtual void TestNMTokenSentForNormalContainer()
        {
            conf.Set(YarnConfiguration.RmScheduler, typeof(CapacityScheduler).GetCanonicalName
                         ());
            MockRM rm = new MockRM(conf);

            rm.Start();
            MockNM       nm1     = rm.RegisterNode("h1:1234", 5120);
            RMApp        app     = rm.SubmitApp(2000);
            RMAppAttempt attempt = app.GetCurrentAppAttempt();
            // Call getNewContainerId to increase container Id so that the AM container
            // Id doesn't equal to one.
            CapacityScheduler cs = (CapacityScheduler)rm.GetResourceScheduler();

            cs.GetApplicationAttempt(attempt.GetAppAttemptId()).GetNewContainerId();
            // kick the scheduling
            nm1.NodeHeartbeat(true);
            MockAM am = MockRM.LaunchAM(app, rm, nm1);

            // am container Id not equal to 1.
            NUnit.Framework.Assert.IsTrue(attempt.GetMasterContainer().GetId().GetContainerId
                                              () != 1);
            // NMSecretManager doesn't record the node on which the am is allocated.
            NUnit.Framework.Assert.IsFalse(rm.GetRMContext().GetNMTokenSecretManager().IsApplicationAttemptNMTokenPresent
                                               (attempt.GetAppAttemptId(), nm1.GetNodeId()));
            am.RegisterAppAttempt();
            rm.WaitForState(app.GetApplicationId(), RMAppState.Running);
            int NumContainers            = 1;
            IList <Container> containers = new AList <Container>();
            // nmTokens keeps track of all the nmTokens issued in the allocate call.
            IList <NMToken> expectedNMTokens = new AList <NMToken>();

            // am1 allocate 1 container on nm1.
            while (true)
            {
                AllocateResponse response = am.Allocate("127.0.0.1", 2000, NumContainers, new AList
                                                        <ContainerId>());
                nm1.NodeHeartbeat(true);
                Sharpen.Collections.AddAll(containers, response.GetAllocatedContainers());
                Sharpen.Collections.AddAll(expectedNMTokens, response.GetNMTokens());
                if (containers.Count == NumContainers)
                {
                    break;
                }
                Sharpen.Thread.Sleep(200);
                System.Console.Out.WriteLine("Waiting for container to be allocated.");
            }
            NodeId nodeId = expectedNMTokens[0].GetNodeId();

            // NMToken is sent for the allocated container.
            NUnit.Framework.Assert.AreEqual(nm1.GetNodeId(), nodeId);
        }
示例#7
0
        /// <exception cref="System.Exception"/>
        protected internal virtual void AllocateContainersAndValidateNMTokens(MockAM am,
                                                                              AList <Container> containersReceived, int totalContainerRequested, Dictionary <string
                                                                                                                                                             , Token> nmTokens, MockNM nm)
        {
            AList <ContainerId>     releaseContainerList = new AList <ContainerId>();
            AllocateResponse        response;
            AList <ResourceRequest> resourceRequest = new AList <ResourceRequest>();

            while (containersReceived.Count < totalContainerRequested)
            {
                nm.NodeHeartbeat(true);
                Log.Info("requesting containers..");
                response = am.Allocate(resourceRequest, releaseContainerList);
                Sharpen.Collections.AddAll(containersReceived, response.GetAllocatedContainers());
                if (!response.GetNMTokens().IsEmpty())
                {
                    foreach (NMToken nmToken in response.GetNMTokens())
                    {
                        string nodeId = nmToken.GetNodeId().ToString();
                        if (nmTokens.Contains(nodeId))
                        {
                            NUnit.Framework.Assert.Fail("Duplicate NMToken received for : " + nodeId);
                        }
                        nmTokens[nodeId] = nmToken.GetToken();
                    }
                }
                Log.Info("Got " + containersReceived.Count + " containers. Waiting to get " + totalContainerRequested
                         );
                Sharpen.Thread.Sleep(WaitSleepMs);
            }
        }
        public virtual void TestRetriesOnFailures()
        {
            ContainerManagementProtocol mockProxy = Org.Mockito.Mockito.Mock <ContainerManagementProtocol
                                                                              >();
            StartContainersResponse mockResponse = Org.Mockito.Mockito.Mock <StartContainersResponse
                                                                             >();

            Org.Mockito.Mockito.When(mockProxy.StartContainers(Matchers.Any <StartContainersRequest
                                                                             >())).ThenThrow(new NMNotYetReadyException("foo")).ThenReturn(mockResponse);
            Configuration conf = new Configuration();

            conf.SetInt(YarnConfiguration.RmAmMaxAttempts, 1);
            conf.SetInt(YarnConfiguration.ClientNmConnectRetryIntervalMs, 1);
            DrainDispatcher dispatcher = new DrainDispatcher();
            MockRM          rm         = new _MockRMWithCustomAMLauncher_206(dispatcher, mockProxy, conf, null
                                                                             );

            rm.Start();
            MockNM nm1 = rm.RegisterNode("127.0.0.1:1234", 5120);
            RMApp  app = rm.SubmitApp(2000);
            ApplicationAttemptId appAttemptId = app.GetCurrentAppAttempt().GetAppAttemptId();

            // kick the scheduling
            nm1.NodeHeartbeat(true);
            dispatcher.Await();
            rm.WaitForState(appAttemptId, RMAppAttemptState.Launched, 500);
        }
        /// <exception cref="System.Exception"/>
        public virtual void TestAppCleanupWhenNMReconnects()
        {
            conf.SetInt(YarnConfiguration.RmAmMaxAttempts, 1);
            MemoryRMStateStore memStore = new MemoryRMStateStore();

            memStore.Init(conf);
            // start RM
            MockRM rm1 = new MockRM(conf, memStore);

            rm1.Start();
            MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.GetResourceTrackerService());

            nm1.RegisterNode();
            // create app and launch the AM
            RMApp  app0 = rm1.SubmitApp(200);
            MockAM am0  = LaunchAM(app0, rm1, nm1);

            nm1.NodeHeartbeat(am0.GetApplicationAttemptId(), 1, ContainerState.Complete);
            rm1.WaitForState(app0.GetApplicationId(), RMAppState.Failed);
            // wait for application cleanup message received
            WaitForAppCleanupMessageRecved(nm1, app0.GetApplicationId());
            // reconnect NM with application still active
            nm1.RegisterNode(Arrays.AsList(app0.GetApplicationId()));
            WaitForAppCleanupMessageRecved(nm1, app0.GetApplicationId());
            rm1.Stop();
        }
示例#10
0
        /// <exception cref="System.Exception"/>
        public virtual void TestInvalidatedAMHostPortOnAMRestart()
        {
            MockRM rm1 = new MockRM(conf);

            rm1.Start();
            MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.GetResourceTrackerService());

            nm1.RegisterNode();
            // a failed app
            RMApp  app2 = rm1.SubmitApp(200);
            MockAM am2  = MockRM.LaunchAndRegisterAM(app2, rm1, nm1);

            nm1.NodeHeartbeat(am2.GetApplicationAttemptId(), 1, ContainerState.Complete);
            am2.WaitForState(RMAppAttemptState.Failed);
            rm1.WaitForState(app2.GetApplicationId(), RMAppState.Accepted);
            // before new attempt is launched, the app report returns the invalid AM
            // host and port.
            GetApplicationReportRequest request1 = GetApplicationReportRequest.NewInstance(app2
                                                                                           .GetApplicationId());
            ApplicationReport report1 = rm1.GetClientRMService().GetApplicationReport(request1
                                                                                      ).GetApplicationReport();

            NUnit.Framework.Assert.AreEqual("N/A", report1.GetHost());
            NUnit.Framework.Assert.AreEqual(-1, report1.GetRpcPort());
        }
        /// <exception cref="System.Exception"/>
        protected internal virtual void WaitForContainerCleanup(DrainDispatcher dispatcher
                                                                , MockNM nm, NodeHeartbeatResponse resp)
        {
            int waitCount    = 0;
            int cleanedConts = 0;
            IList <ContainerId> contsToClean;

            do
            {
                dispatcher.Await();
                contsToClean  = resp.GetContainersToCleanup();
                cleanedConts += contsToClean.Count;
                if (cleanedConts >= 1)
                {
                    break;
                }
                Sharpen.Thread.Sleep(100);
                resp = nm.NodeHeartbeat(true);
            }while (waitCount++ < 200);
            if (contsToClean.IsEmpty())
            {
                Log.Error("Failed to get any containers to cleanup");
            }
            else
            {
                Log.Info("Got cleanup for " + contsToClean[0]);
            }
            NUnit.Framework.Assert.AreEqual(1, cleanedConts);
        }
示例#12
0
        /// <exception cref="System.Exception"/>
        private void TestMinimumAllocation(YarnConfiguration conf, int testAlloc)
        {
            MockRM rm = new MockRM(conf);

            rm.Start();
            // Register node1
            MockNM nm1 = rm.RegisterNode("127.0.0.1:1234", 6 * Gb);
            // Submit an application
            RMApp app1 = rm.SubmitApp(testAlloc);

            // kick the scheduling
            nm1.NodeHeartbeat(true);
            RMAppAttempt attempt1 = app1.GetCurrentAppAttempt();
            MockAM       am1      = rm.SendAMLaunched(attempt1.GetAppAttemptId());

            am1.RegisterAppAttempt();
            SchedulerNodeReport report_nm1 = rm.GetResourceScheduler().GetNodeReport(nm1.GetNodeId
                                                                                         ());
            int checkAlloc = conf.GetInt(YarnConfiguration.RmSchedulerMinimumAllocationMb, YarnConfiguration
                                         .DefaultRmSchedulerMinimumAllocationMb);

            NUnit.Framework.Assert.AreEqual(checkAlloc, report_nm1.GetUsedResource().GetMemory
                                                ());
            rm.Stop();
        }
示例#13
0
        public virtual void TestDecommissionWithIncludeHosts()
        {
            WriteToHostsFile("localhost", "host1", "host2");
            Configuration conf = new Configuration();

            conf.Set(YarnConfiguration.RmNodesIncludeFilePath, hostFile.GetAbsolutePath());
            rm = new MockRM(conf);
            rm.Start();
            MockNM         nm1     = rm.RegisterNode("host1:1234", 5120);
            MockNM         nm2     = rm.RegisterNode("host2:5678", 10240);
            MockNM         nm3     = rm.RegisterNode("localhost:4433", 1024);
            ClusterMetrics metrics = ClusterMetrics.GetMetrics();

            System.Diagnostics.Debug.Assert((metrics != null));
            int metricCount = metrics.GetNumDecommisionedNMs();
            NodeHeartbeatResponse nodeHeartbeat = nm1.NodeHeartbeat(true);

            NUnit.Framework.Assert.IsTrue(NodeAction.Normal.Equals(nodeHeartbeat.GetNodeAction
                                                                       ()));
            nodeHeartbeat = nm2.NodeHeartbeat(true);
            NUnit.Framework.Assert.IsTrue(NodeAction.Normal.Equals(nodeHeartbeat.GetNodeAction
                                                                       ()));
            nodeHeartbeat = nm3.NodeHeartbeat(true);
            NUnit.Framework.Assert.IsTrue(NodeAction.Normal.Equals(nodeHeartbeat.GetNodeAction
                                                                       ()));
            // To test that IPs also work
            string ip = NetUtils.NormalizeHostName("localhost");

            WriteToHostsFile("host1", ip);
            rm.GetNodesListManager().RefreshNodes(conf);
            CheckDecommissionedNMCount(rm, ++metricCount);
            nodeHeartbeat = nm1.NodeHeartbeat(true);
            NUnit.Framework.Assert.IsTrue(NodeAction.Normal.Equals(nodeHeartbeat.GetNodeAction
                                                                       ()));
            NUnit.Framework.Assert.AreEqual(1, ClusterMetrics.GetMetrics().GetNumDecommisionedNMs
                                                ());
            nodeHeartbeat = nm2.NodeHeartbeat(true);
            NUnit.Framework.Assert.IsTrue("Node is not decommisioned.", NodeAction.Shutdown.Equals
                                              (nodeHeartbeat.GetNodeAction()));
            nodeHeartbeat = nm3.NodeHeartbeat(true);
            NUnit.Framework.Assert.IsTrue(NodeAction.Normal.Equals(nodeHeartbeat.GetNodeAction
                                                                       ()));
            NUnit.Framework.Assert.AreEqual(metricCount, ClusterMetrics.GetMetrics().GetNumDecommisionedNMs
                                                ());
        }
示例#14
0
        /// <exception cref="System.Exception"/>
        public virtual IList <Container> AllocateAndWaitForContainers(int nContainer, int
                                                                      memory, MockNM nm)
        {
            // AM request for containers
            Allocate("ANY", memory, nContainer, null);
            // kick the scheduler
            nm.NodeHeartbeat(true);
            IList <Container> conts = Allocate(new AList <ResourceRequest>(), null).GetAllocatedContainers
                                          ();

            while (conts.Count < nContainer)
            {
                nm.NodeHeartbeat(true);
                Sharpen.Collections.AddAll(conts, Allocate(new AList <ResourceRequest>(), new AList
                                                           <ContainerId>()).GetAllocatedContainers());
                Sharpen.Thread.Sleep(500);
            }
            return(conts);
        }
示例#15
0
        public virtual void TestUnhealthyNodeStatus()
        {
            Configuration conf = new Configuration();

            conf.Set(YarnConfiguration.RmNodesExcludeFilePath, hostFile.GetAbsolutePath());
            rm = new MockRM(conf);
            rm.Start();
            MockNM nm1 = rm.RegisterNode("host1:1234", 5120);

            NUnit.Framework.Assert.AreEqual(0, ClusterMetrics.GetMetrics().GetUnhealthyNMs());
            // node healthy
            nm1.NodeHeartbeat(true);
            // node unhealthy
            nm1.NodeHeartbeat(false);
            CheckUnealthyNMCount(rm, nm1, true, 1);
            // node healthy again
            nm1.NodeHeartbeat(true);
            CheckUnealthyNMCount(rm, nm1, false, 0);
        }
        /// <exception cref="System.Exception"/>
        private MockAM LaunchAM(RMApp app, MockRM rm, MockNM nm)
        {
            RMAppAttempt attempt = app.GetCurrentAppAttempt();

            nm.NodeHeartbeat(true);
            MockAM am = rm.SendAMLaunched(attempt.GetAppAttemptId());

            am.RegisterAppAttempt();
            rm.WaitForState(app.GetApplicationId(), RMAppState.Running);
            return(am);
        }
示例#17
0
        /// <exception cref="System.Exception"/>
        public virtual void WaitForContainerAllocated(MockNM nm, ContainerId containerId)
        {
            int timeoutSecs = 0;

            while (GetResourceScheduler().GetRMContainer(containerId) == null && timeoutSecs++
                   < 40)
            {
                System.Console.Out.WriteLine("Waiting for" + containerId + " to be allocated.");
                nm.NodeHeartbeat(true);
                Sharpen.Thread.Sleep(200);
            }
        }
示例#18
0
        public virtual void TestAuthorizedAccess()
        {
            TestAMAuthorization.MyContainerManager containerManager = new TestAMAuthorization.MyContainerManager
                                                                          ();
            rm = new TestAMAuthorization.MockRMWithAMS(conf, containerManager);
            rm.Start();
            MockNM nm1 = rm.RegisterNode("localhost:1234", 5120);
            IDictionary <ApplicationAccessType, string> acls = new Dictionary <ApplicationAccessType
                                                                               , string>(2);

            acls[ApplicationAccessType.ViewApp] = "*";
            RMApp app = rm.SubmitApp(1024, "appname", "appuser", acls);

            nm1.NodeHeartbeat(true);
            int waitCount = 0;

            while (containerManager.containerTokens == null && waitCount++ < 20)
            {
                Log.Info("Waiting for AM Launch to happen..");
                Sharpen.Thread.Sleep(1000);
            }
            NUnit.Framework.Assert.IsNotNull(containerManager.containerTokens);
            RMAppAttempt         attempt = app.GetCurrentAppAttempt();
            ApplicationAttemptId applicationAttemptId = attempt.GetAppAttemptId();

            WaitForLaunchedState(attempt);
            // Create a client to the RM.
            Configuration        conf        = rm.GetConfig();
            YarnRPC              rpc         = YarnRPC.Create(conf);
            UserGroupInformation currentUser = UserGroupInformation.CreateRemoteUser(applicationAttemptId
                                                                                     .ToString());
            Credentials credentials   = containerManager.GetContainerCredentials();
            IPEndPoint  rmBindAddress = rm.GetApplicationMasterService().GetBindAddress();

            Org.Apache.Hadoop.Security.Token.Token <TokenIdentifier> amRMToken = TestAMAuthorization.MockRMWithAMS
                                                                                 .SetupAndReturnAMRMToken(rmBindAddress, credentials.GetAllTokens());
            currentUser.AddToken(amRMToken);
            ApplicationMasterProtocol client = currentUser.DoAs(new _PrivilegedAction_206(this
                                                                                          , rpc, conf));
            RegisterApplicationMasterRequest request = Org.Apache.Hadoop.Yarn.Util.Records.NewRecord
                                                       <RegisterApplicationMasterRequest>();
            RegisterApplicationMasterResponse response = client.RegisterApplicationMaster(request
                                                                                          );

            NUnit.Framework.Assert.IsNotNull(response.GetClientToAMTokenMasterKey());
            if (UserGroupInformation.IsSecurityEnabled())
            {
                NUnit.Framework.Assert.IsTrue(((byte[])response.GetClientToAMTokenMasterKey().Array
                                                   ()).Length > 0);
            }
            NUnit.Framework.Assert.AreEqual("Register response has bad ACLs", "*", response.GetApplicationACLs
                                                ()[ApplicationAccessType.ViewApp]);
        }
示例#19
0
        // Disable webapp
        /// <exception cref="System.Exception"/>
        public static void FinishAMAndVerifyAppState(RMApp rmApp, Org.Apache.Hadoop.Yarn.Server.Resourcemanager.MockRM
                                                     rm, MockNM nm, MockAM am)
        {
            FinishApplicationMasterRequest req = FinishApplicationMasterRequest.NewInstance(FinalApplicationStatus
                                                                                            .Succeeded, string.Empty, string.Empty);

            am.UnregisterAppAttempt(req, true);
            am.WaitForState(RMAppAttemptState.Finishing);
            nm.NodeHeartbeat(am.GetApplicationAttemptId(), 1, ContainerState.Complete);
            am.WaitForState(RMAppAttemptState.Finished);
            rm.WaitForState(rmApp.GetApplicationId(), RMAppState.Finished);
        }
示例#20
0
        // This is to test AM Host and rpc port are invalidated after the am attempt
        // is killed or failed, so that client doesn't get the wrong information.
        /// <exception cref="System.Exception"/>
        public virtual void TestInvalidateAMHostPortWhenAMFailedOrKilled()
        {
            conf.SetInt(YarnConfiguration.RmAmMaxAttempts, 1);
            MockRM rm1 = new MockRM(conf);

            rm1.Start();
            // a succeeded app
            RMApp  app1 = rm1.SubmitApp(200);
            MockNM nm1  = new MockNM("127.0.0.1:1234", 15120, rm1.GetResourceTrackerService());

            nm1.RegisterNode();
            MockAM am1 = MockRM.LaunchAndRegisterAM(app1, rm1, nm1);

            MockRM.FinishAMAndVerifyAppState(app1, rm1, nm1, am1);
            // a failed app
            RMApp  app2 = rm1.SubmitApp(200);
            MockAM am2  = MockRM.LaunchAndRegisterAM(app2, rm1, nm1);

            nm1.NodeHeartbeat(am2.GetApplicationAttemptId(), 1, ContainerState.Complete);
            am2.WaitForState(RMAppAttemptState.Failed);
            rm1.WaitForState(app2.GetApplicationId(), RMAppState.Failed);
            // a killed app
            RMApp  app3 = rm1.SubmitApp(200);
            MockAM am3  = MockRM.LaunchAndRegisterAM(app3, rm1, nm1);

            rm1.KillApp(app3.GetApplicationId());
            rm1.WaitForState(app3.GetApplicationId(), RMAppState.Killed);
            rm1.WaitForState(am3.GetApplicationAttemptId(), RMAppAttemptState.Killed);
            GetApplicationsRequest request1 = GetApplicationsRequest.NewInstance(EnumSet.Of(YarnApplicationState
                                                                                            .Finished, YarnApplicationState.Killed, YarnApplicationState.Failed));
            GetApplicationsResponse response1 = rm1.GetClientRMService().GetApplications(request1
                                                                                         );
            IList <ApplicationReport> appList1 = response1.GetApplicationList();

            NUnit.Framework.Assert.AreEqual(3, appList1.Count);
            foreach (ApplicationReport report in appList1)
            {
                // killed/failed apps host and rpc port are invalidated.
                if (report.GetApplicationId().Equals(app2.GetApplicationId()) || report.GetApplicationId
                        ().Equals(app3.GetApplicationId()))
                {
                    NUnit.Framework.Assert.AreEqual("N/A", report.GetHost());
                    NUnit.Framework.Assert.AreEqual(-1, report.GetRpcPort());
                }
                // succeeded app's host and rpc port is not invalidated
                if (report.GetApplicationId().Equals(app1.GetApplicationId()))
                {
                    NUnit.Framework.Assert.IsFalse(report.GetHost().Equals("N/A"));
                    NUnit.Framework.Assert.IsTrue(report.GetRpcPort() != -1);
                }
            }
        }
示例#21
0
        /// <exception cref="System.Exception"/>
        public virtual void TestAppWithNoContainers()
        {
            Logger rootLogger = LogManager.GetRootLogger();

            rootLogger.SetLevel(Level.Debug);
            MockRM rm = new MockRM(conf);

            rm.Start();
            MockNM nm1 = rm.RegisterNode("h1:1234", 5120);
            RMApp  app = rm.SubmitApp(2000);

            //kick the scheduling
            nm1.NodeHeartbeat(true);
            RMAppAttempt attempt = app.GetCurrentAppAttempt();
            MockAM       am      = rm.SendAMLaunched(attempt.GetAppAttemptId());

            am.RegisterAppAttempt();
            am.UnregisterAppAttempt();
            nm1.NodeHeartbeat(attempt.GetAppAttemptId(), 1, ContainerState.Complete);
            am.WaitForState(RMAppAttemptState.Finished);
            rm.Stop();
        }
示例#22
0
        /// <exception cref="System.Exception"/>
        public static MockAM LaunchAM(RMApp app, Org.Apache.Hadoop.Yarn.Server.Resourcemanager.MockRM
                                      rm, MockNM nm)
        {
            rm.WaitForState(app.GetApplicationId(), RMAppState.Accepted);
            RMAppAttempt attempt = app.GetCurrentAppAttempt();

            System.Console.Out.WriteLine("Launch AM " + attempt.GetAppAttemptId());
            nm.NodeHeartbeat(true);
            MockAM am = rm.SendAMLaunched(attempt.GetAppAttemptId());

            rm.WaitForState(attempt.GetAppAttemptId(), RMAppAttemptState.Launched);
            return(am);
        }
        /// <exception cref="System.Exception"/>
        public virtual void TestAppCleanupWhenRMRestartedBeforeAppFinished()
        {
            conf.SetInt(YarnConfiguration.RmAmMaxAttempts, 1);
            MemoryRMStateStore memStore = new MemoryRMStateStore();

            memStore.Init(conf);
            // start RM
            MockRM rm1 = new MockRM(conf, memStore);

            rm1.Start();
            MockNM nm1 = new MockNM("127.0.0.1:1234", 1024, rm1.GetResourceTrackerService());

            nm1.RegisterNode();
            MockNM nm2 = new MockNM("127.0.0.1:5678", 1024, rm1.GetResourceTrackerService());

            nm2.RegisterNode();
            // create app and launch the AM
            RMApp  app0 = rm1.SubmitApp(200);
            MockAM am0  = LaunchAM(app0, rm1, nm1);
            // alloc another container on nm2
            AllocateResponse allocResponse = am0.Allocate(Arrays.AsList(ResourceRequest.NewInstance
                                                                            (Priority.NewInstance(1), "*", Resource.NewInstance(1024, 0), 1)), null);

            while (null == allocResponse.GetAllocatedContainers() || allocResponse.GetAllocatedContainers
                       ().IsEmpty())
            {
                nm2.NodeHeartbeat(true);
                allocResponse = am0.Allocate(null, null);
                Sharpen.Thread.Sleep(1000);
            }
            // start new RM
            MockRM rm2 = new MockRM(conf, memStore);

            rm2.Start();
            // nm1/nm2 register to rm2, and do a heartbeat
            nm1.SetResourceTrackerService(rm2.GetResourceTrackerService());
            nm1.RegisterNode(Arrays.AsList(NMContainerStatus.NewInstance(ContainerId.NewContainerId
                                                                             (am0.GetApplicationAttemptId(), 1), ContainerState.Complete, Resource.NewInstance
                                                                             (1024, 1), string.Empty, 0, Priority.NewInstance(0), 1234)), Arrays.AsList(app0.
                                                                                                                                                        GetApplicationId()));
            nm2.SetResourceTrackerService(rm2.GetResourceTrackerService());
            nm2.RegisterNode(Arrays.AsList(app0.GetApplicationId()));
            // assert app state has been saved.
            rm2.WaitForState(app0.GetApplicationId(), RMAppState.Failed);
            // wait for application cleanup message received on NM1
            WaitForAppCleanupMessageRecved(nm1, app0.GetApplicationId());
            // wait for application cleanup message received on NM2
            WaitForAppCleanupMessageRecved(nm2, app0.GetApplicationId());
            rm1.Stop();
            rm2.Stop();
        }
示例#24
0
        /// <exception cref="System.Exception"/>
        public virtual void TestRMIdentifierOnContainerAllocation()
        {
            MockRM rm = new MockRM(conf);

            rm.Start();
            // Register node1
            MockNM nm1 = rm.RegisterNode("127.0.0.1:1234", 6 * Gb);
            // Submit an application
            RMApp app1 = rm.SubmitApp(2048);

            // kick the scheduling
            nm1.NodeHeartbeat(true);
            RMAppAttempt attempt1 = app1.GetCurrentAppAttempt();
            MockAM       am1      = rm.SendAMLaunched(attempt1.GetAppAttemptId());

            am1.RegisterAppAttempt();
            am1.AddRequests(new string[] { "127.0.0.1" }, Gb, 1, 1);
            AllocateResponse alloc1Response = am1.Schedule();

            // send the request
            // kick the scheduler
            nm1.NodeHeartbeat(true);
            while (alloc1Response.GetAllocatedContainers().Count < 1)
            {
                Log.Info("Waiting for containers to be created for app 1...");
                Sharpen.Thread.Sleep(1000);
                alloc1Response = am1.Schedule();
            }
            // assert RMIdentifer is set properly in allocated containers
            Container allocatedContainer     = alloc1Response.GetAllocatedContainers()[0];
            ContainerTokenIdentifier tokenId = BuilderUtils.NewContainerTokenIdentifier(allocatedContainer
                                                                                        .GetContainerToken());

            NUnit.Framework.Assert.AreEqual(MockRM.GetClusterTimeStamp(), tokenId.GetRMIdentifier
                                                ());
            rm.Stop();
        }
        /// <exception cref="System.Exception"/>
        public virtual void TestContainerCleanupWhenRMRestartedAppNotRegistered()
        {
            conf.SetInt(YarnConfiguration.RmAmMaxAttempts, 1);
            MemoryRMStateStore memStore = new MemoryRMStateStore();

            memStore.Init(conf);
            // start RM
            DrainDispatcher dispatcher = new DrainDispatcher();
            MockRM          rm1        = new _MockRM_413(dispatcher, conf, memStore);

            rm1.Start();
            MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.GetResourceTrackerService());

            nm1.RegisterNode();
            // create app and launch the AM
            RMApp  app0 = rm1.SubmitApp(200);
            MockAM am0  = LaunchAM(app0, rm1, nm1);

            nm1.NodeHeartbeat(am0.GetApplicationAttemptId(), 1, ContainerState.Running);
            rm1.WaitForState(app0.GetApplicationId(), RMAppState.Running);
            // start new RM
            DrainDispatcher dispatcher2 = new DrainDispatcher();
            MockRM          rm2         = new _MockRM_432(dispatcher2, conf, memStore);

            rm2.Start();
            // nm1 register to rm2, and do a heartbeat
            nm1.SetResourceTrackerService(rm2.GetResourceTrackerService());
            nm1.RegisterNode(Arrays.AsList(app0.GetApplicationId()));
            rm2.WaitForState(app0.GetApplicationId(), RMAppState.Accepted);
            // Add unknown container for application unknown to scheduler
            NodeHeartbeatResponse response = nm1.NodeHeartbeat(am0.GetApplicationAttemptId(),
                                                               2, ContainerState.Running);

            WaitForContainerCleanup(dispatcher2, nm1, response);
            rm1.Stop();
            rm2.Stop();
        }
 /// <exception cref="System.Exception"/>
 private void WaitForAppCleanupMessageRecved(MockNM nm, ApplicationId appId)
 {
     while (true)
     {
         NodeHeartbeatResponse response = nm.NodeHeartbeat(true);
         if (response.GetApplicationsToCleanup() != null && response.GetApplicationsToCleanup
                 ().Count == 1 && appId.Equals(response.GetApplicationsToCleanup()[0]))
         {
             return;
         }
         Log.Info("Haven't got application=" + appId.ToString() + " in cleanup list from node heartbeat response, "
                  + "sleep for a while before next heartbeat");
         Sharpen.Thread.Sleep(1000);
     }
 }
示例#27
0
        /// <exception cref="System.Exception"/>
        public virtual bool WaitForState(MockNM nm, ContainerId containerId, RMContainerState
                                         containerState, int timeoutMillisecs)
        {
            RMContainer container   = GetResourceScheduler().GetRMContainer(containerId);
            int         timeoutSecs = 0;

            while (container == null && timeoutSecs++ < timeoutMillisecs / 100)
            {
                nm.NodeHeartbeat(true);
                container = GetResourceScheduler().GetRMContainer(containerId);
                System.Console.Out.WriteLine("Waiting for container " + containerId + " to be allocated."
                                             );
                Sharpen.Thread.Sleep(100);
                if (timeoutMillisecs <= timeoutSecs * 100)
                {
                    return(false);
                }
            }
            NUnit.Framework.Assert.IsNotNull("Container shouldn't be null", container);
            while (!containerState.Equals(container.GetState()) && timeoutSecs++ < timeoutMillisecs
                   / 100)
            {
                System.Console.Out.WriteLine("Container : " + containerId + " State is : " + container
                                             .GetState() + " Waiting for state : " + containerState);
                nm.NodeHeartbeat(true);
                Sharpen.Thread.Sleep(100);
                if (timeoutMillisecs <= timeoutSecs * 100)
                {
                    return(false);
                }
            }
            System.Console.Out.WriteLine("Container State is : " + container.GetState());
            NUnit.Framework.Assert.AreEqual("Container state is not correct (timedout)", containerState
                                            , container.GetState());
            return(true);
        }
示例#28
0
        public virtual void TestAllocateContainerOnNodeWithoutOffSwitchSpecified()
        {
            Logger rootLogger = LogManager.GetRootLogger();

            rootLogger.SetLevel(Level.Debug);
            MockRM rm = new MockRM(conf);

            rm.Start();
            MockNM nm1  = rm.RegisterNode("127.0.0.1:1234", 6 * Gb);
            RMApp  app1 = rm.SubmitApp(2048);

            // kick the scheduling, 2 GB given to AM1, remaining 4GB on nm1
            nm1.NodeHeartbeat(true);
            RMAppAttempt attempt1 = app1.GetCurrentAppAttempt();
            MockAM       am1      = rm.SendAMLaunched(attempt1.GetAppAttemptId());

            am1.RegisterAppAttempt();
            // add request for containers
            IList <ResourceRequest> requests = new AList <ResourceRequest>();

            requests.AddItem(am1.CreateResourceReq("127.0.0.1", 1 * Gb, 1, 1));
            requests.AddItem(am1.CreateResourceReq("/default-rack", 1 * Gb, 1, 1));
            am1.Allocate(requests, null);
            // send the request
            try
            {
                // kick the schedule
                nm1.NodeHeartbeat(true);
            }
            catch (ArgumentNullException)
            {
                NUnit.Framework.Assert.Fail("NPE when allocating container on node but " + "forget to set off-switch request should be handled"
                                            );
            }
            rm.Stop();
        }
        /// <exception cref="System.Exception"/>
        private void WaitForClusterMemory(MockNM nm1, ResourceScheduler rs, int clusterMemory
                                          )
        {
            int counter = 0;

            while (rs.GetRootQueueMetrics().GetAllocatedMB() != clusterMemory)
            {
                nm1.NodeHeartbeat(true);
                Sharpen.Thread.Sleep(100);
                if (counter++ == 50)
                {
                    NUnit.Framework.Assert.Fail("Wait for cluster memory is timed out.Expected=" + clusterMemory
                                                + " Actual=" + rs.GetRootQueueMetrics().GetAllocatedMB());
                }
            }
        }
示例#30
0
        /// <summary>
        /// Test RM read NM next heartBeat Interval correctly from Configuration file,
        /// and NM get next heartBeat Interval from RM correctly
        /// </summary>
        /// <exception cref="System.Exception"/>
        public virtual void TestGetNextHeartBeatInterval()
        {
            Configuration conf = new Configuration();

            conf.Set(YarnConfiguration.RmNmHeartbeatIntervalMs, "4000");
            rm = new MockRM(conf);
            rm.Start();
            MockNM nm1 = rm.RegisterNode("host1:1234", 5120);
            MockNM nm2 = rm.RegisterNode("host2:5678", 10240);
            NodeHeartbeatResponse nodeHeartbeat = nm1.NodeHeartbeat(true);

            NUnit.Framework.Assert.AreEqual(4000, nodeHeartbeat.GetNextHeartBeatInterval());
            NodeHeartbeatResponse nodeHeartbeat2 = nm2.NodeHeartbeat(true);

            NUnit.Framework.Assert.AreEqual(4000, nodeHeartbeat2.GetNextHeartBeatInterval());
        }