public virtual void TestAddNewExcludePathToConfiguration() { Configuration conf = new Configuration(); rm = new MockRM(conf); rm.Start(); MockNM nm1 = rm.RegisterNode("host1:1234", 5120); MockNM nm2 = rm.RegisterNode("host2:5678", 10240); ClusterMetrics metrics = ClusterMetrics.GetMetrics(); System.Diagnostics.Debug.Assert((metrics != null)); int initialMetricCount = metrics.GetNumDecommisionedNMs(); NodeHeartbeatResponse nodeHeartbeat = nm1.NodeHeartbeat(true); NUnit.Framework.Assert.AreEqual(NodeAction.Normal, nodeHeartbeat.GetNodeAction()); nodeHeartbeat = nm2.NodeHeartbeat(true); NUnit.Framework.Assert.AreEqual(NodeAction.Normal, nodeHeartbeat.GetNodeAction()); WriteToHostsFile("host2"); conf.Set(YarnConfiguration.RmNodesExcludeFilePath, hostFile.GetAbsolutePath()); rm.GetNodesListManager().RefreshNodes(conf); CheckDecommissionedNMCount(rm, ++initialMetricCount); nodeHeartbeat = nm1.NodeHeartbeat(true); NUnit.Framework.Assert.AreEqual("Node should not have been decomissioned.", NodeAction .Normal, nodeHeartbeat.GetNodeAction()); nodeHeartbeat = nm2.NodeHeartbeat(true); NUnit.Framework.Assert.AreEqual("Node should have been decomissioned but is in state" + nodeHeartbeat.GetNodeAction(), NodeAction.Shutdown, nodeHeartbeat.GetNodeAction ()); }
/// <exception cref="System.Exception"/> public virtual void TestInvalidContainerReleaseRequest() { MockRM rm = new MockRM(conf); try { rm.Start(); // Register node1 MockNM nm1 = rm.RegisterNode("127.0.0.1:1234", 6 * Gb); // Submit an application RMApp app1 = rm.SubmitApp(1024); // kick the scheduling nm1.NodeHeartbeat(true); RMAppAttempt attempt1 = app1.GetCurrentAppAttempt(); MockAM am1 = rm.SendAMLaunched(attempt1.GetAppAttemptId()); am1.RegisterAppAttempt(); am1.AddRequests(new string[] { "127.0.0.1" }, Gb, 1, 1); AllocateResponse alloc1Response = am1.Schedule(); // send the request // kick the scheduler nm1.NodeHeartbeat(true); while (alloc1Response.GetAllocatedContainers().Count < 1) { Log.Info("Waiting for containers to be created for app 1..."); Sharpen.Thread.Sleep(1000); alloc1Response = am1.Schedule(); } NUnit.Framework.Assert.IsTrue(alloc1Response.GetAllocatedContainers().Count > 0); RMApp app2 = rm.SubmitApp(1024); nm1.NodeHeartbeat(true); RMAppAttempt attempt2 = app2.GetCurrentAppAttempt(); MockAM am2 = rm.SendAMLaunched(attempt2.GetAppAttemptId()); am2.RegisterAppAttempt(); // Now trying to release container allocated for app1 -> appAttempt1. ContainerId cId = alloc1Response.GetAllocatedContainers()[0].GetId(); am2.AddContainerToBeReleased(cId); try { am2.Schedule(); NUnit.Framework.Assert.Fail("Exception was expected!!"); } catch (InvalidContainerReleaseException e) { StringBuilder sb = new StringBuilder("Cannot release container : "); sb.Append(cId.ToString()); sb.Append(" not belonging to this application attempt : "); sb.Append(attempt2.GetAppAttemptId().ToString()); NUnit.Framework.Assert.IsTrue(e.Message.Contains(sb.ToString())); } } finally { if (rm != null) { rm.Stop(); } } }
/// <exception cref="System.Exception"/> public virtual void TestAppOnMultiNode() { Logger rootLogger = LogManager.GetRootLogger(); rootLogger.SetLevel(Level.Debug); conf.Set("yarn.scheduler.capacity.node-locality-delay", "-1"); MockRM rm = new MockRM(conf); rm.Start(); MockNM nm1 = rm.RegisterNode("h1:1234", 5120); MockNM nm2 = rm.RegisterNode("h2:5678", 10240); RMApp app = rm.SubmitApp(2000); //kick the scheduling nm1.NodeHeartbeat(true); RMAppAttempt attempt = app.GetCurrentAppAttempt(); MockAM am = rm.SendAMLaunched(attempt.GetAppAttemptId()); am.RegisterAppAttempt(); //request for containers int request = 13; am.Allocate("h1", 1000, request, new AList <ContainerId>()); //kick the scheduler IList <Container> conts = am.Allocate(new AList <ResourceRequest>(), new AList <ContainerId >()).GetAllocatedContainers(); int contReceived = conts.Count; while (contReceived < 3) { //only 3 containers are available on node1 nm1.NodeHeartbeat(true); Sharpen.Collections.AddAll(conts, am.Allocate(new AList <ResourceRequest>(), new AList <ContainerId>()).GetAllocatedContainers()); contReceived = conts.Count; Log.Info("Got " + contReceived + " containers. Waiting to get " + 3); Sharpen.Thread.Sleep(WaitSleepMs); } NUnit.Framework.Assert.AreEqual(3, conts.Count); //send node2 heartbeat conts = am.Allocate(new AList <ResourceRequest>(), new AList <ContainerId>()).GetAllocatedContainers (); contReceived = conts.Count; while (contReceived < 10) { nm2.NodeHeartbeat(true); Sharpen.Collections.AddAll(conts, am.Allocate(new AList <ResourceRequest>(), new AList <ContainerId>()).GetAllocatedContainers()); contReceived = conts.Count; Log.Info("Got " + contReceived + " containers. Waiting to get " + 10); Sharpen.Thread.Sleep(WaitSleepMs); } NUnit.Framework.Assert.AreEqual(10, conts.Count); am.UnregisterAppAttempt(); nm1.NodeHeartbeat(attempt.GetAppAttemptId(), 1, ContainerState.Complete); am.WaitForState(RMAppAttemptState.Finished); rm.Stop(); }
public virtual void TestAMLaunchAndCleanup() { Logger rootLogger = LogManager.GetRootLogger(); rootLogger.SetLevel(Level.Debug); TestApplicationMasterLauncher.MyContainerManagerImpl containerManager = new TestApplicationMasterLauncher.MyContainerManagerImpl (); MockRMWithCustomAMLauncher rm = new MockRMWithCustomAMLauncher(containerManager); rm.Start(); MockNM nm1 = rm.RegisterNode("127.0.0.1:1234", 5120); RMApp app = rm.SubmitApp(2000); // kick the scheduling nm1.NodeHeartbeat(true); int waitCount = 0; while (containerManager.launched == false && waitCount++ < 20) { Log.Info("Waiting for AM Launch to happen.."); Sharpen.Thread.Sleep(1000); } NUnit.Framework.Assert.IsTrue(containerManager.launched); RMAppAttempt attempt = app.GetCurrentAppAttempt(); ApplicationAttemptId appAttemptId = attempt.GetAppAttemptId(); NUnit.Framework.Assert.AreEqual(appAttemptId.ToString(), containerManager.attemptIdAtContainerManager ); NUnit.Framework.Assert.AreEqual(app.GetSubmitTime(), containerManager.submitTimeAtContainerManager ); NUnit.Framework.Assert.AreEqual(app.GetRMAppAttempt(appAttemptId).GetMasterContainer ().GetId().ToString(), containerManager.containerIdAtContainerManager); NUnit.Framework.Assert.AreEqual(nm1.GetNodeId().ToString(), containerManager.nmHostAtContainerManager ); NUnit.Framework.Assert.AreEqual(YarnConfiguration.DefaultRmAmMaxAttempts, containerManager .maxAppAttempts); MockAM am = new MockAM(rm.GetRMContext(), rm.GetApplicationMasterService(), appAttemptId ); am.RegisterAppAttempt(); am.UnregisterAppAttempt(); //complete the AM container to finish the app normally nm1.NodeHeartbeat(attempt.GetAppAttemptId(), 1, ContainerState.Complete); am.WaitForState(RMAppAttemptState.Finished); waitCount = 0; while (containerManager.cleanedup == false && waitCount++ < 20) { Log.Info("Waiting for AM Cleanup to happen.."); Sharpen.Thread.Sleep(1000); } NUnit.Framework.Assert.IsTrue(containerManager.cleanedup); am.WaitForState(RMAppAttemptState.Finished); rm.Stop(); }
public virtual void TestDecommissionWithExcludeHosts() { Configuration conf = new Configuration(); conf.Set(YarnConfiguration.RmNodesExcludeFilePath, hostFile.GetAbsolutePath()); WriteToHostsFile(string.Empty); DrainDispatcher dispatcher = new DrainDispatcher(); rm = new _MockRM_162(dispatcher, conf); rm.Start(); MockNM nm1 = rm.RegisterNode("host1:1234", 5120); MockNM nm2 = rm.RegisterNode("host2:5678", 10240); MockNM nm3 = rm.RegisterNode("localhost:4433", 1024); dispatcher.Await(); int metricCount = ClusterMetrics.GetMetrics().GetNumDecommisionedNMs(); NodeHeartbeatResponse nodeHeartbeat = nm1.NodeHeartbeat(true); NUnit.Framework.Assert.IsTrue(NodeAction.Normal.Equals(nodeHeartbeat.GetNodeAction ())); nodeHeartbeat = nm2.NodeHeartbeat(true); NUnit.Framework.Assert.IsTrue(NodeAction.Normal.Equals(nodeHeartbeat.GetNodeAction ())); dispatcher.Await(); // To test that IPs also work string ip = NetUtils.NormalizeHostName("localhost"); WriteToHostsFile("host2", ip); rm.GetNodesListManager().RefreshNodes(conf); CheckDecommissionedNMCount(rm, metricCount + 2); nodeHeartbeat = nm1.NodeHeartbeat(true); NUnit.Framework.Assert.IsTrue(NodeAction.Normal.Equals(nodeHeartbeat.GetNodeAction ())); nodeHeartbeat = nm2.NodeHeartbeat(true); NUnit.Framework.Assert.IsTrue("The decommisioned metrics are not updated", NodeAction .Shutdown.Equals(nodeHeartbeat.GetNodeAction())); nodeHeartbeat = nm3.NodeHeartbeat(true); NUnit.Framework.Assert.IsTrue("The decommisioned metrics are not updated", NodeAction .Shutdown.Equals(nodeHeartbeat.GetNodeAction())); dispatcher.Await(); WriteToHostsFile(string.Empty); rm.GetNodesListManager().RefreshNodes(conf); nm3 = rm.RegisterNode("localhost:4433", 1024); dispatcher.Await(); nodeHeartbeat = nm3.NodeHeartbeat(true); dispatcher.Await(); NUnit.Framework.Assert.IsTrue(NodeAction.Normal.Equals(nodeHeartbeat.GetNodeAction ())); // decommissined node is 1 since 1 node is rejoined after updating exclude // file CheckDecommissionedNMCount(rm, metricCount + 1); }
// Test even if AM container is allocated with containerId not equal to 1, the // following allocate requests from AM should be able to retrieve the // corresponding NM Token. /// <exception cref="System.Exception"/> public virtual void TestNMTokenSentForNormalContainer() { conf.Set(YarnConfiguration.RmScheduler, typeof(CapacityScheduler).GetCanonicalName ()); MockRM rm = new MockRM(conf); rm.Start(); MockNM nm1 = rm.RegisterNode("h1:1234", 5120); RMApp app = rm.SubmitApp(2000); RMAppAttempt attempt = app.GetCurrentAppAttempt(); // Call getNewContainerId to increase container Id so that the AM container // Id doesn't equal to one. CapacityScheduler cs = (CapacityScheduler)rm.GetResourceScheduler(); cs.GetApplicationAttempt(attempt.GetAppAttemptId()).GetNewContainerId(); // kick the scheduling nm1.NodeHeartbeat(true); MockAM am = MockRM.LaunchAM(app, rm, nm1); // am container Id not equal to 1. NUnit.Framework.Assert.IsTrue(attempt.GetMasterContainer().GetId().GetContainerId () != 1); // NMSecretManager doesn't record the node on which the am is allocated. NUnit.Framework.Assert.IsFalse(rm.GetRMContext().GetNMTokenSecretManager().IsApplicationAttemptNMTokenPresent (attempt.GetAppAttemptId(), nm1.GetNodeId())); am.RegisterAppAttempt(); rm.WaitForState(app.GetApplicationId(), RMAppState.Running); int NumContainers = 1; IList <Container> containers = new AList <Container>(); // nmTokens keeps track of all the nmTokens issued in the allocate call. IList <NMToken> expectedNMTokens = new AList <NMToken>(); // am1 allocate 1 container on nm1. while (true) { AllocateResponse response = am.Allocate("127.0.0.1", 2000, NumContainers, new AList <ContainerId>()); nm1.NodeHeartbeat(true); Sharpen.Collections.AddAll(containers, response.GetAllocatedContainers()); Sharpen.Collections.AddAll(expectedNMTokens, response.GetNMTokens()); if (containers.Count == NumContainers) { break; } Sharpen.Thread.Sleep(200); System.Console.Out.WriteLine("Waiting for container to be allocated."); } NodeId nodeId = expectedNMTokens[0].GetNodeId(); // NMToken is sent for the allocated container. NUnit.Framework.Assert.AreEqual(nm1.GetNodeId(), nodeId); }
/// <exception cref="System.Exception"/> protected internal virtual void AllocateContainersAndValidateNMTokens(MockAM am, AList <Container> containersReceived, int totalContainerRequested, Dictionary <string , Token> nmTokens, MockNM nm) { AList <ContainerId> releaseContainerList = new AList <ContainerId>(); AllocateResponse response; AList <ResourceRequest> resourceRequest = new AList <ResourceRequest>(); while (containersReceived.Count < totalContainerRequested) { nm.NodeHeartbeat(true); Log.Info("requesting containers.."); response = am.Allocate(resourceRequest, releaseContainerList); Sharpen.Collections.AddAll(containersReceived, response.GetAllocatedContainers()); if (!response.GetNMTokens().IsEmpty()) { foreach (NMToken nmToken in response.GetNMTokens()) { string nodeId = nmToken.GetNodeId().ToString(); if (nmTokens.Contains(nodeId)) { NUnit.Framework.Assert.Fail("Duplicate NMToken received for : " + nodeId); } nmTokens[nodeId] = nmToken.GetToken(); } } Log.Info("Got " + containersReceived.Count + " containers. Waiting to get " + totalContainerRequested ); Sharpen.Thread.Sleep(WaitSleepMs); } }
public virtual void TestRetriesOnFailures() { ContainerManagementProtocol mockProxy = Org.Mockito.Mockito.Mock <ContainerManagementProtocol >(); StartContainersResponse mockResponse = Org.Mockito.Mockito.Mock <StartContainersResponse >(); Org.Mockito.Mockito.When(mockProxy.StartContainers(Matchers.Any <StartContainersRequest >())).ThenThrow(new NMNotYetReadyException("foo")).ThenReturn(mockResponse); Configuration conf = new Configuration(); conf.SetInt(YarnConfiguration.RmAmMaxAttempts, 1); conf.SetInt(YarnConfiguration.ClientNmConnectRetryIntervalMs, 1); DrainDispatcher dispatcher = new DrainDispatcher(); MockRM rm = new _MockRMWithCustomAMLauncher_206(dispatcher, mockProxy, conf, null ); rm.Start(); MockNM nm1 = rm.RegisterNode("127.0.0.1:1234", 5120); RMApp app = rm.SubmitApp(2000); ApplicationAttemptId appAttemptId = app.GetCurrentAppAttempt().GetAppAttemptId(); // kick the scheduling nm1.NodeHeartbeat(true); dispatcher.Await(); rm.WaitForState(appAttemptId, RMAppAttemptState.Launched, 500); }
/// <exception cref="System.Exception"/> public virtual void TestAppCleanupWhenNMReconnects() { conf.SetInt(YarnConfiguration.RmAmMaxAttempts, 1); MemoryRMStateStore memStore = new MemoryRMStateStore(); memStore.Init(conf); // start RM MockRM rm1 = new MockRM(conf, memStore); rm1.Start(); MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.GetResourceTrackerService()); nm1.RegisterNode(); // create app and launch the AM RMApp app0 = rm1.SubmitApp(200); MockAM am0 = LaunchAM(app0, rm1, nm1); nm1.NodeHeartbeat(am0.GetApplicationAttemptId(), 1, ContainerState.Complete); rm1.WaitForState(app0.GetApplicationId(), RMAppState.Failed); // wait for application cleanup message received WaitForAppCleanupMessageRecved(nm1, app0.GetApplicationId()); // reconnect NM with application still active nm1.RegisterNode(Arrays.AsList(app0.GetApplicationId())); WaitForAppCleanupMessageRecved(nm1, app0.GetApplicationId()); rm1.Stop(); }
/// <exception cref="System.Exception"/> public virtual void TestInvalidatedAMHostPortOnAMRestart() { MockRM rm1 = new MockRM(conf); rm1.Start(); MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.GetResourceTrackerService()); nm1.RegisterNode(); // a failed app RMApp app2 = rm1.SubmitApp(200); MockAM am2 = MockRM.LaunchAndRegisterAM(app2, rm1, nm1); nm1.NodeHeartbeat(am2.GetApplicationAttemptId(), 1, ContainerState.Complete); am2.WaitForState(RMAppAttemptState.Failed); rm1.WaitForState(app2.GetApplicationId(), RMAppState.Accepted); // before new attempt is launched, the app report returns the invalid AM // host and port. GetApplicationReportRequest request1 = GetApplicationReportRequest.NewInstance(app2 .GetApplicationId()); ApplicationReport report1 = rm1.GetClientRMService().GetApplicationReport(request1 ).GetApplicationReport(); NUnit.Framework.Assert.AreEqual("N/A", report1.GetHost()); NUnit.Framework.Assert.AreEqual(-1, report1.GetRpcPort()); }
/// <exception cref="System.Exception"/> protected internal virtual void WaitForContainerCleanup(DrainDispatcher dispatcher , MockNM nm, NodeHeartbeatResponse resp) { int waitCount = 0; int cleanedConts = 0; IList <ContainerId> contsToClean; do { dispatcher.Await(); contsToClean = resp.GetContainersToCleanup(); cleanedConts += contsToClean.Count; if (cleanedConts >= 1) { break; } Sharpen.Thread.Sleep(100); resp = nm.NodeHeartbeat(true); }while (waitCount++ < 200); if (contsToClean.IsEmpty()) { Log.Error("Failed to get any containers to cleanup"); } else { Log.Info("Got cleanup for " + contsToClean[0]); } NUnit.Framework.Assert.AreEqual(1, cleanedConts); }
/// <exception cref="System.Exception"/> private void TestMinimumAllocation(YarnConfiguration conf, int testAlloc) { MockRM rm = new MockRM(conf); rm.Start(); // Register node1 MockNM nm1 = rm.RegisterNode("127.0.0.1:1234", 6 * Gb); // Submit an application RMApp app1 = rm.SubmitApp(testAlloc); // kick the scheduling nm1.NodeHeartbeat(true); RMAppAttempt attempt1 = app1.GetCurrentAppAttempt(); MockAM am1 = rm.SendAMLaunched(attempt1.GetAppAttemptId()); am1.RegisterAppAttempt(); SchedulerNodeReport report_nm1 = rm.GetResourceScheduler().GetNodeReport(nm1.GetNodeId ()); int checkAlloc = conf.GetInt(YarnConfiguration.RmSchedulerMinimumAllocationMb, YarnConfiguration .DefaultRmSchedulerMinimumAllocationMb); NUnit.Framework.Assert.AreEqual(checkAlloc, report_nm1.GetUsedResource().GetMemory ()); rm.Stop(); }
public virtual void TestDecommissionWithIncludeHosts() { WriteToHostsFile("localhost", "host1", "host2"); Configuration conf = new Configuration(); conf.Set(YarnConfiguration.RmNodesIncludeFilePath, hostFile.GetAbsolutePath()); rm = new MockRM(conf); rm.Start(); MockNM nm1 = rm.RegisterNode("host1:1234", 5120); MockNM nm2 = rm.RegisterNode("host2:5678", 10240); MockNM nm3 = rm.RegisterNode("localhost:4433", 1024); ClusterMetrics metrics = ClusterMetrics.GetMetrics(); System.Diagnostics.Debug.Assert((metrics != null)); int metricCount = metrics.GetNumDecommisionedNMs(); NodeHeartbeatResponse nodeHeartbeat = nm1.NodeHeartbeat(true); NUnit.Framework.Assert.IsTrue(NodeAction.Normal.Equals(nodeHeartbeat.GetNodeAction ())); nodeHeartbeat = nm2.NodeHeartbeat(true); NUnit.Framework.Assert.IsTrue(NodeAction.Normal.Equals(nodeHeartbeat.GetNodeAction ())); nodeHeartbeat = nm3.NodeHeartbeat(true); NUnit.Framework.Assert.IsTrue(NodeAction.Normal.Equals(nodeHeartbeat.GetNodeAction ())); // To test that IPs also work string ip = NetUtils.NormalizeHostName("localhost"); WriteToHostsFile("host1", ip); rm.GetNodesListManager().RefreshNodes(conf); CheckDecommissionedNMCount(rm, ++metricCount); nodeHeartbeat = nm1.NodeHeartbeat(true); NUnit.Framework.Assert.IsTrue(NodeAction.Normal.Equals(nodeHeartbeat.GetNodeAction ())); NUnit.Framework.Assert.AreEqual(1, ClusterMetrics.GetMetrics().GetNumDecommisionedNMs ()); nodeHeartbeat = nm2.NodeHeartbeat(true); NUnit.Framework.Assert.IsTrue("Node is not decommisioned.", NodeAction.Shutdown.Equals (nodeHeartbeat.GetNodeAction())); nodeHeartbeat = nm3.NodeHeartbeat(true); NUnit.Framework.Assert.IsTrue(NodeAction.Normal.Equals(nodeHeartbeat.GetNodeAction ())); NUnit.Framework.Assert.AreEqual(metricCount, ClusterMetrics.GetMetrics().GetNumDecommisionedNMs ()); }
/// <exception cref="System.Exception"/> public virtual IList <Container> AllocateAndWaitForContainers(int nContainer, int memory, MockNM nm) { // AM request for containers Allocate("ANY", memory, nContainer, null); // kick the scheduler nm.NodeHeartbeat(true); IList <Container> conts = Allocate(new AList <ResourceRequest>(), null).GetAllocatedContainers (); while (conts.Count < nContainer) { nm.NodeHeartbeat(true); Sharpen.Collections.AddAll(conts, Allocate(new AList <ResourceRequest>(), new AList <ContainerId>()).GetAllocatedContainers()); Sharpen.Thread.Sleep(500); } return(conts); }
public virtual void TestUnhealthyNodeStatus() { Configuration conf = new Configuration(); conf.Set(YarnConfiguration.RmNodesExcludeFilePath, hostFile.GetAbsolutePath()); rm = new MockRM(conf); rm.Start(); MockNM nm1 = rm.RegisterNode("host1:1234", 5120); NUnit.Framework.Assert.AreEqual(0, ClusterMetrics.GetMetrics().GetUnhealthyNMs()); // node healthy nm1.NodeHeartbeat(true); // node unhealthy nm1.NodeHeartbeat(false); CheckUnealthyNMCount(rm, nm1, true, 1); // node healthy again nm1.NodeHeartbeat(true); CheckUnealthyNMCount(rm, nm1, false, 0); }
/// <exception cref="System.Exception"/> private MockAM LaunchAM(RMApp app, MockRM rm, MockNM nm) { RMAppAttempt attempt = app.GetCurrentAppAttempt(); nm.NodeHeartbeat(true); MockAM am = rm.SendAMLaunched(attempt.GetAppAttemptId()); am.RegisterAppAttempt(); rm.WaitForState(app.GetApplicationId(), RMAppState.Running); return(am); }
/// <exception cref="System.Exception"/> public virtual void WaitForContainerAllocated(MockNM nm, ContainerId containerId) { int timeoutSecs = 0; while (GetResourceScheduler().GetRMContainer(containerId) == null && timeoutSecs++ < 40) { System.Console.Out.WriteLine("Waiting for" + containerId + " to be allocated."); nm.NodeHeartbeat(true); Sharpen.Thread.Sleep(200); } }
public virtual void TestAuthorizedAccess() { TestAMAuthorization.MyContainerManager containerManager = new TestAMAuthorization.MyContainerManager (); rm = new TestAMAuthorization.MockRMWithAMS(conf, containerManager); rm.Start(); MockNM nm1 = rm.RegisterNode("localhost:1234", 5120); IDictionary <ApplicationAccessType, string> acls = new Dictionary <ApplicationAccessType , string>(2); acls[ApplicationAccessType.ViewApp] = "*"; RMApp app = rm.SubmitApp(1024, "appname", "appuser", acls); nm1.NodeHeartbeat(true); int waitCount = 0; while (containerManager.containerTokens == null && waitCount++ < 20) { Log.Info("Waiting for AM Launch to happen.."); Sharpen.Thread.Sleep(1000); } NUnit.Framework.Assert.IsNotNull(containerManager.containerTokens); RMAppAttempt attempt = app.GetCurrentAppAttempt(); ApplicationAttemptId applicationAttemptId = attempt.GetAppAttemptId(); WaitForLaunchedState(attempt); // Create a client to the RM. Configuration conf = rm.GetConfig(); YarnRPC rpc = YarnRPC.Create(conf); UserGroupInformation currentUser = UserGroupInformation.CreateRemoteUser(applicationAttemptId .ToString()); Credentials credentials = containerManager.GetContainerCredentials(); IPEndPoint rmBindAddress = rm.GetApplicationMasterService().GetBindAddress(); Org.Apache.Hadoop.Security.Token.Token <TokenIdentifier> amRMToken = TestAMAuthorization.MockRMWithAMS .SetupAndReturnAMRMToken(rmBindAddress, credentials.GetAllTokens()); currentUser.AddToken(amRMToken); ApplicationMasterProtocol client = currentUser.DoAs(new _PrivilegedAction_206(this , rpc, conf)); RegisterApplicationMasterRequest request = Org.Apache.Hadoop.Yarn.Util.Records.NewRecord <RegisterApplicationMasterRequest>(); RegisterApplicationMasterResponse response = client.RegisterApplicationMaster(request ); NUnit.Framework.Assert.IsNotNull(response.GetClientToAMTokenMasterKey()); if (UserGroupInformation.IsSecurityEnabled()) { NUnit.Framework.Assert.IsTrue(((byte[])response.GetClientToAMTokenMasterKey().Array ()).Length > 0); } NUnit.Framework.Assert.AreEqual("Register response has bad ACLs", "*", response.GetApplicationACLs ()[ApplicationAccessType.ViewApp]); }
// Disable webapp /// <exception cref="System.Exception"/> public static void FinishAMAndVerifyAppState(RMApp rmApp, Org.Apache.Hadoop.Yarn.Server.Resourcemanager.MockRM rm, MockNM nm, MockAM am) { FinishApplicationMasterRequest req = FinishApplicationMasterRequest.NewInstance(FinalApplicationStatus .Succeeded, string.Empty, string.Empty); am.UnregisterAppAttempt(req, true); am.WaitForState(RMAppAttemptState.Finishing); nm.NodeHeartbeat(am.GetApplicationAttemptId(), 1, ContainerState.Complete); am.WaitForState(RMAppAttemptState.Finished); rm.WaitForState(rmApp.GetApplicationId(), RMAppState.Finished); }
// This is to test AM Host and rpc port are invalidated after the am attempt // is killed or failed, so that client doesn't get the wrong information. /// <exception cref="System.Exception"/> public virtual void TestInvalidateAMHostPortWhenAMFailedOrKilled() { conf.SetInt(YarnConfiguration.RmAmMaxAttempts, 1); MockRM rm1 = new MockRM(conf); rm1.Start(); // a succeeded app RMApp app1 = rm1.SubmitApp(200); MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.GetResourceTrackerService()); nm1.RegisterNode(); MockAM am1 = MockRM.LaunchAndRegisterAM(app1, rm1, nm1); MockRM.FinishAMAndVerifyAppState(app1, rm1, nm1, am1); // a failed app RMApp app2 = rm1.SubmitApp(200); MockAM am2 = MockRM.LaunchAndRegisterAM(app2, rm1, nm1); nm1.NodeHeartbeat(am2.GetApplicationAttemptId(), 1, ContainerState.Complete); am2.WaitForState(RMAppAttemptState.Failed); rm1.WaitForState(app2.GetApplicationId(), RMAppState.Failed); // a killed app RMApp app3 = rm1.SubmitApp(200); MockAM am3 = MockRM.LaunchAndRegisterAM(app3, rm1, nm1); rm1.KillApp(app3.GetApplicationId()); rm1.WaitForState(app3.GetApplicationId(), RMAppState.Killed); rm1.WaitForState(am3.GetApplicationAttemptId(), RMAppAttemptState.Killed); GetApplicationsRequest request1 = GetApplicationsRequest.NewInstance(EnumSet.Of(YarnApplicationState .Finished, YarnApplicationState.Killed, YarnApplicationState.Failed)); GetApplicationsResponse response1 = rm1.GetClientRMService().GetApplications(request1 ); IList <ApplicationReport> appList1 = response1.GetApplicationList(); NUnit.Framework.Assert.AreEqual(3, appList1.Count); foreach (ApplicationReport report in appList1) { // killed/failed apps host and rpc port are invalidated. if (report.GetApplicationId().Equals(app2.GetApplicationId()) || report.GetApplicationId ().Equals(app3.GetApplicationId())) { NUnit.Framework.Assert.AreEqual("N/A", report.GetHost()); NUnit.Framework.Assert.AreEqual(-1, report.GetRpcPort()); } // succeeded app's host and rpc port is not invalidated if (report.GetApplicationId().Equals(app1.GetApplicationId())) { NUnit.Framework.Assert.IsFalse(report.GetHost().Equals("N/A")); NUnit.Framework.Assert.IsTrue(report.GetRpcPort() != -1); } } }
/// <exception cref="System.Exception"/> public virtual void TestAppWithNoContainers() { Logger rootLogger = LogManager.GetRootLogger(); rootLogger.SetLevel(Level.Debug); MockRM rm = new MockRM(conf); rm.Start(); MockNM nm1 = rm.RegisterNode("h1:1234", 5120); RMApp app = rm.SubmitApp(2000); //kick the scheduling nm1.NodeHeartbeat(true); RMAppAttempt attempt = app.GetCurrentAppAttempt(); MockAM am = rm.SendAMLaunched(attempt.GetAppAttemptId()); am.RegisterAppAttempt(); am.UnregisterAppAttempt(); nm1.NodeHeartbeat(attempt.GetAppAttemptId(), 1, ContainerState.Complete); am.WaitForState(RMAppAttemptState.Finished); rm.Stop(); }
/// <exception cref="System.Exception"/> public static MockAM LaunchAM(RMApp app, Org.Apache.Hadoop.Yarn.Server.Resourcemanager.MockRM rm, MockNM nm) { rm.WaitForState(app.GetApplicationId(), RMAppState.Accepted); RMAppAttempt attempt = app.GetCurrentAppAttempt(); System.Console.Out.WriteLine("Launch AM " + attempt.GetAppAttemptId()); nm.NodeHeartbeat(true); MockAM am = rm.SendAMLaunched(attempt.GetAppAttemptId()); rm.WaitForState(attempt.GetAppAttemptId(), RMAppAttemptState.Launched); return(am); }
/// <exception cref="System.Exception"/> public virtual void TestAppCleanupWhenRMRestartedBeforeAppFinished() { conf.SetInt(YarnConfiguration.RmAmMaxAttempts, 1); MemoryRMStateStore memStore = new MemoryRMStateStore(); memStore.Init(conf); // start RM MockRM rm1 = new MockRM(conf, memStore); rm1.Start(); MockNM nm1 = new MockNM("127.0.0.1:1234", 1024, rm1.GetResourceTrackerService()); nm1.RegisterNode(); MockNM nm2 = new MockNM("127.0.0.1:5678", 1024, rm1.GetResourceTrackerService()); nm2.RegisterNode(); // create app and launch the AM RMApp app0 = rm1.SubmitApp(200); MockAM am0 = LaunchAM(app0, rm1, nm1); // alloc another container on nm2 AllocateResponse allocResponse = am0.Allocate(Arrays.AsList(ResourceRequest.NewInstance (Priority.NewInstance(1), "*", Resource.NewInstance(1024, 0), 1)), null); while (null == allocResponse.GetAllocatedContainers() || allocResponse.GetAllocatedContainers ().IsEmpty()) { nm2.NodeHeartbeat(true); allocResponse = am0.Allocate(null, null); Sharpen.Thread.Sleep(1000); } // start new RM MockRM rm2 = new MockRM(conf, memStore); rm2.Start(); // nm1/nm2 register to rm2, and do a heartbeat nm1.SetResourceTrackerService(rm2.GetResourceTrackerService()); nm1.RegisterNode(Arrays.AsList(NMContainerStatus.NewInstance(ContainerId.NewContainerId (am0.GetApplicationAttemptId(), 1), ContainerState.Complete, Resource.NewInstance (1024, 1), string.Empty, 0, Priority.NewInstance(0), 1234)), Arrays.AsList(app0. GetApplicationId())); nm2.SetResourceTrackerService(rm2.GetResourceTrackerService()); nm2.RegisterNode(Arrays.AsList(app0.GetApplicationId())); // assert app state has been saved. rm2.WaitForState(app0.GetApplicationId(), RMAppState.Failed); // wait for application cleanup message received on NM1 WaitForAppCleanupMessageRecved(nm1, app0.GetApplicationId()); // wait for application cleanup message received on NM2 WaitForAppCleanupMessageRecved(nm2, app0.GetApplicationId()); rm1.Stop(); rm2.Stop(); }
/// <exception cref="System.Exception"/> public virtual void TestRMIdentifierOnContainerAllocation() { MockRM rm = new MockRM(conf); rm.Start(); // Register node1 MockNM nm1 = rm.RegisterNode("127.0.0.1:1234", 6 * Gb); // Submit an application RMApp app1 = rm.SubmitApp(2048); // kick the scheduling nm1.NodeHeartbeat(true); RMAppAttempt attempt1 = app1.GetCurrentAppAttempt(); MockAM am1 = rm.SendAMLaunched(attempt1.GetAppAttemptId()); am1.RegisterAppAttempt(); am1.AddRequests(new string[] { "127.0.0.1" }, Gb, 1, 1); AllocateResponse alloc1Response = am1.Schedule(); // send the request // kick the scheduler nm1.NodeHeartbeat(true); while (alloc1Response.GetAllocatedContainers().Count < 1) { Log.Info("Waiting for containers to be created for app 1..."); Sharpen.Thread.Sleep(1000); alloc1Response = am1.Schedule(); } // assert RMIdentifer is set properly in allocated containers Container allocatedContainer = alloc1Response.GetAllocatedContainers()[0]; ContainerTokenIdentifier tokenId = BuilderUtils.NewContainerTokenIdentifier(allocatedContainer .GetContainerToken()); NUnit.Framework.Assert.AreEqual(MockRM.GetClusterTimeStamp(), tokenId.GetRMIdentifier ()); rm.Stop(); }
/// <exception cref="System.Exception"/> public virtual void TestContainerCleanupWhenRMRestartedAppNotRegistered() { conf.SetInt(YarnConfiguration.RmAmMaxAttempts, 1); MemoryRMStateStore memStore = new MemoryRMStateStore(); memStore.Init(conf); // start RM DrainDispatcher dispatcher = new DrainDispatcher(); MockRM rm1 = new _MockRM_413(dispatcher, conf, memStore); rm1.Start(); MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.GetResourceTrackerService()); nm1.RegisterNode(); // create app and launch the AM RMApp app0 = rm1.SubmitApp(200); MockAM am0 = LaunchAM(app0, rm1, nm1); nm1.NodeHeartbeat(am0.GetApplicationAttemptId(), 1, ContainerState.Running); rm1.WaitForState(app0.GetApplicationId(), RMAppState.Running); // start new RM DrainDispatcher dispatcher2 = new DrainDispatcher(); MockRM rm2 = new _MockRM_432(dispatcher2, conf, memStore); rm2.Start(); // nm1 register to rm2, and do a heartbeat nm1.SetResourceTrackerService(rm2.GetResourceTrackerService()); nm1.RegisterNode(Arrays.AsList(app0.GetApplicationId())); rm2.WaitForState(app0.GetApplicationId(), RMAppState.Accepted); // Add unknown container for application unknown to scheduler NodeHeartbeatResponse response = nm1.NodeHeartbeat(am0.GetApplicationAttemptId(), 2, ContainerState.Running); WaitForContainerCleanup(dispatcher2, nm1, response); rm1.Stop(); rm2.Stop(); }
/// <exception cref="System.Exception"/> private void WaitForAppCleanupMessageRecved(MockNM nm, ApplicationId appId) { while (true) { NodeHeartbeatResponse response = nm.NodeHeartbeat(true); if (response.GetApplicationsToCleanup() != null && response.GetApplicationsToCleanup ().Count == 1 && appId.Equals(response.GetApplicationsToCleanup()[0])) { return; } Log.Info("Haven't got application=" + appId.ToString() + " in cleanup list from node heartbeat response, " + "sleep for a while before next heartbeat"); Sharpen.Thread.Sleep(1000); } }
/// <exception cref="System.Exception"/> public virtual bool WaitForState(MockNM nm, ContainerId containerId, RMContainerState containerState, int timeoutMillisecs) { RMContainer container = GetResourceScheduler().GetRMContainer(containerId); int timeoutSecs = 0; while (container == null && timeoutSecs++ < timeoutMillisecs / 100) { nm.NodeHeartbeat(true); container = GetResourceScheduler().GetRMContainer(containerId); System.Console.Out.WriteLine("Waiting for container " + containerId + " to be allocated." ); Sharpen.Thread.Sleep(100); if (timeoutMillisecs <= timeoutSecs * 100) { return(false); } } NUnit.Framework.Assert.IsNotNull("Container shouldn't be null", container); while (!containerState.Equals(container.GetState()) && timeoutSecs++ < timeoutMillisecs / 100) { System.Console.Out.WriteLine("Container : " + containerId + " State is : " + container .GetState() + " Waiting for state : " + containerState); nm.NodeHeartbeat(true); Sharpen.Thread.Sleep(100); if (timeoutMillisecs <= timeoutSecs * 100) { return(false); } } System.Console.Out.WriteLine("Container State is : " + container.GetState()); NUnit.Framework.Assert.AreEqual("Container state is not correct (timedout)", containerState , container.GetState()); return(true); }
public virtual void TestAllocateContainerOnNodeWithoutOffSwitchSpecified() { Logger rootLogger = LogManager.GetRootLogger(); rootLogger.SetLevel(Level.Debug); MockRM rm = new MockRM(conf); rm.Start(); MockNM nm1 = rm.RegisterNode("127.0.0.1:1234", 6 * Gb); RMApp app1 = rm.SubmitApp(2048); // kick the scheduling, 2 GB given to AM1, remaining 4GB on nm1 nm1.NodeHeartbeat(true); RMAppAttempt attempt1 = app1.GetCurrentAppAttempt(); MockAM am1 = rm.SendAMLaunched(attempt1.GetAppAttemptId()); am1.RegisterAppAttempt(); // add request for containers IList <ResourceRequest> requests = new AList <ResourceRequest>(); requests.AddItem(am1.CreateResourceReq("127.0.0.1", 1 * Gb, 1, 1)); requests.AddItem(am1.CreateResourceReq("/default-rack", 1 * Gb, 1, 1)); am1.Allocate(requests, null); // send the request try { // kick the schedule nm1.NodeHeartbeat(true); } catch (ArgumentNullException) { NUnit.Framework.Assert.Fail("NPE when allocating container on node but " + "forget to set off-switch request should be handled" ); } rm.Stop(); }
/// <exception cref="System.Exception"/> private void WaitForClusterMemory(MockNM nm1, ResourceScheduler rs, int clusterMemory ) { int counter = 0; while (rs.GetRootQueueMetrics().GetAllocatedMB() != clusterMemory) { nm1.NodeHeartbeat(true); Sharpen.Thread.Sleep(100); if (counter++ == 50) { NUnit.Framework.Assert.Fail("Wait for cluster memory is timed out.Expected=" + clusterMemory + " Actual=" + rs.GetRootQueueMetrics().GetAllocatedMB()); } } }
/// <summary> /// Test RM read NM next heartBeat Interval correctly from Configuration file, /// and NM get next heartBeat Interval from RM correctly /// </summary> /// <exception cref="System.Exception"/> public virtual void TestGetNextHeartBeatInterval() { Configuration conf = new Configuration(); conf.Set(YarnConfiguration.RmNmHeartbeatIntervalMs, "4000"); rm = new MockRM(conf); rm.Start(); MockNM nm1 = rm.RegisterNode("host1:1234", 5120); MockNM nm2 = rm.RegisterNode("host2:5678", 10240); NodeHeartbeatResponse nodeHeartbeat = nm1.NodeHeartbeat(true); NUnit.Framework.Assert.AreEqual(4000, nodeHeartbeat.GetNextHeartBeatInterval()); NodeHeartbeatResponse nodeHeartbeat2 = nm2.NodeHeartbeat(true); NUnit.Framework.Assert.AreEqual(4000, nodeHeartbeat2.GetNextHeartBeatInterval()); }