public virtual void TestHandleContainerStatusInvalidCompletions() { rm = new MockRM(new YarnConfiguration()); rm.Start(); EventHandler handler = Org.Mockito.Mockito.Spy(rm.GetRMContext().GetDispatcher(). GetEventHandler()); // Case 1: Unmanaged AM RMApp app = rm.SubmitApp(1024, true); // Case 1.1: AppAttemptId is null NMContainerStatus report = NMContainerStatus.NewInstance(ContainerId.NewContainerId (ApplicationAttemptId.NewInstance(app.GetApplicationId(), 2), 1), ContainerState .Complete, Resource.NewInstance(1024, 1), "Dummy Completed", 0, Priority.NewInstance (10), 1234); rm.GetResourceTrackerService().HandleNMContainerStatus(report, null); Org.Mockito.Mockito.Verify(handler, Org.Mockito.Mockito.Never()).Handle((Org.Apache.Hadoop.Yarn.Event.Event )Matchers.Any()); // Case 1.2: Master container is null RMAppAttemptImpl currentAttempt = (RMAppAttemptImpl)app.GetCurrentAppAttempt(); currentAttempt.SetMasterContainer(null); report = NMContainerStatus.NewInstance(ContainerId.NewContainerId(currentAttempt. GetAppAttemptId(), 0), ContainerState.Complete, Resource.NewInstance(1024, 1), "Dummy Completed" , 0, Priority.NewInstance(10), 1234); rm.GetResourceTrackerService().HandleNMContainerStatus(report, null); Org.Mockito.Mockito.Verify(handler, Org.Mockito.Mockito.Never()).Handle((Org.Apache.Hadoop.Yarn.Event.Event )Matchers.Any()); // Case 2: Managed AM app = rm.SubmitApp(1024); // Case 2.1: AppAttemptId is null report = NMContainerStatus.NewInstance(ContainerId.NewContainerId(ApplicationAttemptId .NewInstance(app.GetApplicationId(), 2), 1), ContainerState.Complete, Resource.NewInstance (1024, 1), "Dummy Completed", 0, Priority.NewInstance(10), 1234); try { rm.GetResourceTrackerService().HandleNMContainerStatus(report, null); } catch (Exception) { } // expected - ignore Org.Mockito.Mockito.Verify(handler, Org.Mockito.Mockito.Never()).Handle((Org.Apache.Hadoop.Yarn.Event.Event )Matchers.Any()); // Case 2.2: Master container is null currentAttempt = (RMAppAttemptImpl)app.GetCurrentAppAttempt(); currentAttempt.SetMasterContainer(null); report = NMContainerStatus.NewInstance(ContainerId.NewContainerId(currentAttempt. GetAppAttemptId(), 0), ContainerState.Complete, Resource.NewInstance(1024, 1), "Dummy Completed" , 0, Priority.NewInstance(10), 1234); try { rm.GetResourceTrackerService().HandleNMContainerStatus(report, null); } catch (Exception) { } // expected - ignore Org.Mockito.Mockito.Verify(handler, Org.Mockito.Mockito.Never()).Handle((Org.Apache.Hadoop.Yarn.Event.Event )Matchers.Any()); }
/// <exception cref="System.Exception"/> protected internal virtual MockAM LaunchAM(RMApp app, MockRM rm, MockNM nm) { RMAppAttempt attempt = app.GetCurrentAppAttempt(); nm.NodeHeartbeat(true); MockAM am = rm.SendAMLaunched(attempt.GetAppAttemptId()); am.RegisterAppAttempt(); rm.WaitForState(app.GetApplicationId(), RMAppState.Running); rm.WaitForState(app.GetCurrentAppAttempt().GetAppAttemptId(), RMAppAttemptState.Running ); return(am); }
/// <exception cref="System.Exception"/> private void TestMinimumAllocation(YarnConfiguration conf, int testAlloc) { MockRM rm = new MockRM(conf); rm.Start(); // Register node1 MockNM nm1 = rm.RegisterNode("127.0.0.1:1234", 6 * Gb); // Submit an application RMApp app1 = rm.SubmitApp(testAlloc); // kick the scheduling nm1.NodeHeartbeat(true); RMAppAttempt attempt1 = app1.GetCurrentAppAttempt(); MockAM am1 = rm.SendAMLaunched(attempt1.GetAppAttemptId()); am1.RegisterAppAttempt(); SchedulerNodeReport report_nm1 = rm.GetResourceScheduler().GetNodeReport(nm1.GetNodeId ()); int checkAlloc = conf.GetInt(YarnConfiguration.RmSchedulerMinimumAllocationMb, YarnConfiguration .DefaultRmSchedulerMinimumAllocationMb); NUnit.Framework.Assert.AreEqual(checkAlloc, report_nm1.GetUsedResource().GetMemory ()); rm.Stop(); }
// This is to test fetching AM container will be retried, if AM container is // not fetchable since DNS is unavailable causing container token/NMtoken // creation failure. /// <exception cref="System.Exception"/> public virtual void TestAMContainerAllocationWhenDNSUnavailable() { MockRM rm1 = new _MockRM_303(this, conf); rm1.Start(); MockNM nm1 = rm1.RegisterNode("unknownhost:1234", 8000); SecurityUtilTestHelper.SetTokenServiceUseIp(true); RMApp app1 = rm1.SubmitApp(200); RMAppAttempt attempt = app1.GetCurrentAppAttempt(); nm1.NodeHeartbeat(true); // fetching am container will fail, keep retrying 5 times. while (numRetries <= 5) { nm1.NodeHeartbeat(true); Sharpen.Thread.Sleep(1000); NUnit.Framework.Assert.AreEqual(RMAppAttemptState.Scheduled, attempt.GetAppAttemptState ()); System.Console.Out.WriteLine("Waiting for am container to be allocated."); } SecurityUtilTestHelper.SetTokenServiceUseIp(false); rm1.WaitForState(attempt.GetAppAttemptId(), RMAppAttemptState.Allocated); MockRM.LaunchAndRegisterAM(app1, rm1, nm1); }
/// <summary>create a summary of the application's runtime.</summary> /// <param name="app"> /// /// <see cref="Org.Apache.Hadoop.Yarn.Server.Resourcemanager.Rmapp.RMApp"/> /// whose summary is to be created, cannot /// be <code>null</code>. /// </param> public static RMAppManager.ApplicationSummary.SummaryBuilder CreateAppSummary(RMApp app) { string trackingUrl = "N/A"; string host = "N/A"; RMAppAttempt attempt = app.GetCurrentAppAttempt(); if (attempt != null) { trackingUrl = attempt.GetTrackingUrl(); host = attempt.GetHost(); } RMAppMetrics metrics = app.GetRMAppMetrics(); RMAppManager.ApplicationSummary.SummaryBuilder summary = new RMAppManager.ApplicationSummary.SummaryBuilder ().Add("appId", app.GetApplicationId()).Add("name", app.GetName()).Add("user", app .GetUser()).Add("queue", app.GetQueue()).Add("state", app.GetState()).Add("trackingUrl" , trackingUrl).Add("appMasterHost", host).Add("startTime", app.GetStartTime()).Add ("finishTime", app.GetFinishTime()).Add("finalStatus", app.GetFinalApplicationStatus ()).Add("memorySeconds", metrics.GetMemorySeconds()).Add("vcoreSeconds", metrics .GetVcoreSeconds()).Add("preemptedAMContainers", metrics.GetNumAMContainersPreempted ()).Add("preemptedNonAMContainers", metrics.GetNumNonAMContainersPreempted()).Add ("preemptedResources", metrics.GetResourcePreempted()).Add("applicationType", app .GetApplicationType()); return(summary); }
private static RMApp CreateRMApp(ApplicationId appId) { RMApp app = Org.Mockito.Mockito.Mock <RMApp>(); Org.Mockito.Mockito.When(app.GetApplicationId()).ThenReturn(appId); Org.Mockito.Mockito.When(app.GetName()).ThenReturn("test app"); Org.Mockito.Mockito.When(app.GetApplicationType()).ThenReturn("test app type"); Org.Mockito.Mockito.When(app.GetUser()).ThenReturn("test user"); Org.Mockito.Mockito.When(app.GetQueue()).ThenReturn("test queue"); Org.Mockito.Mockito.When(app.GetSubmitTime()).ThenReturn(int.MaxValue + 1L); Org.Mockito.Mockito.When(app.GetStartTime()).ThenReturn(int.MaxValue + 2L); Org.Mockito.Mockito.When(app.GetFinishTime()).ThenReturn(int.MaxValue + 3L); Org.Mockito.Mockito.When(app.GetDiagnostics()).ThenReturn(new StringBuilder("test diagnostics info" )); RMAppAttempt appAttempt = Org.Mockito.Mockito.Mock <RMAppAttempt>(); Org.Mockito.Mockito.When(appAttempt.GetAppAttemptId()).ThenReturn(ApplicationAttemptId .NewInstance(appId, 1)); Org.Mockito.Mockito.When(app.GetCurrentAppAttempt()).ThenReturn(appAttempt); Org.Mockito.Mockito.When(app.GetFinalApplicationStatus()).ThenReturn(FinalApplicationStatus .Undefined); Org.Mockito.Mockito.When(app.GetRMAppMetrics()).ThenReturn(new RMAppMetrics(null, 0, 0, int.MaxValue, long.MaxValue)); return(app); }
protected internal virtual ApplicationAttemptId CreateSchedulingRequest(int memory , int vcores, string queueId, string userId, int numContainers, int priority) { ApplicationAttemptId id = CreateAppAttemptId(this.AppId++, this.AttemptId++); scheduler.AddApplication(id.GetApplicationId(), queueId, userId, false); // This conditional is for testAclSubmitApplication where app is rejected // and no app is added. if (scheduler.GetSchedulerApplications().Contains(id.GetApplicationId())) { scheduler.AddApplicationAttempt(id, false, false); } IList <ResourceRequest> ask = new AList <ResourceRequest>(); ResourceRequest request = CreateResourceRequest(memory, vcores, ResourceRequest.Any , priority, numContainers, true); ask.AddItem(request); RMApp rmApp = Org.Mockito.Mockito.Mock <RMApp>(); RMAppAttempt rmAppAttempt = Org.Mockito.Mockito.Mock <RMAppAttempt>(); Org.Mockito.Mockito.When(rmApp.GetCurrentAppAttempt()).ThenReturn(rmAppAttempt); Org.Mockito.Mockito.When(rmAppAttempt.GetRMAppAttemptMetrics()).ThenReturn(new RMAppAttemptMetrics (id, resourceManager.GetRMContext())); resourceManager.GetRMContext().GetRMApps()[id.GetApplicationId()] = rmApp; scheduler.Allocate(id, ask, new AList <ContainerId>(), null, null); return(id); }
// Test regular RM restart/failover, new RM should not count // AM failure towards the max-retry-account and should be able to // re-launch the AM. /// <exception cref="System.Exception"/> public virtual void TestRMRestartOrFailoverNotCountedForAMFailures() { YarnConfiguration conf = new YarnConfiguration(); conf.SetClass(YarnConfiguration.RmScheduler, typeof(CapacityScheduler), typeof(ResourceScheduler )); conf.SetBoolean(YarnConfiguration.RecoveryEnabled, true); conf.SetBoolean(YarnConfiguration.RmWorkPreservingRecoveryEnabled, false); conf.Set(YarnConfiguration.RmStore, typeof(MemoryRMStateStore).FullName); // explicitly set max-am-retry count as 1. conf.SetInt(YarnConfiguration.RmAmMaxAttempts, 1); MemoryRMStateStore memStore = new MemoryRMStateStore(); memStore.Init(conf); MockRM rm1 = new MockRM(conf, memStore); rm1.Start(); MockNM nm1 = new MockNM("127.0.0.1:1234", 8000, rm1.GetResourceTrackerService()); nm1.RegisterNode(); RMApp app1 = rm1.SubmitApp(200); // AM should be restarted even though max-am-attempt is 1. MockAM am1 = MockRM.LaunchAndRegisterAM(app1, rm1, nm1); RMAppAttempt attempt1 = app1.GetCurrentAppAttempt(); NUnit.Framework.Assert.IsTrue(((RMAppAttemptImpl)attempt1).MayBeLastAttempt()); // Restart rm. MockRM rm2 = new MockRM(conf, memStore); rm2.Start(); ApplicationStateData appState = memStore.GetState().GetApplicationState()[app1.GetApplicationId ()]; // re-register the NM nm1.SetResourceTrackerService(rm2.GetResourceTrackerService()); NMContainerStatus status = Org.Apache.Hadoop.Yarn.Util.Records.NewRecord <NMContainerStatus >(); status.SetContainerExitStatus(ContainerExitStatus.KilledByResourcemanager); status.SetContainerId(attempt1.GetMasterContainer().GetId()); status.SetContainerState(ContainerState.Complete); status.SetDiagnostics(string.Empty); nm1.RegisterNode(Sharpen.Collections.SingletonList(status), null); rm2.WaitForState(attempt1.GetAppAttemptId(), RMAppAttemptState.Failed); NUnit.Framework.Assert.AreEqual(ContainerExitStatus.KilledByResourcemanager, appState .GetAttempt(am1.GetApplicationAttemptId()).GetAMContainerExitStatus()); // Will automatically start a new AppAttempt in rm2 rm2.WaitForState(app1.GetApplicationId(), RMAppState.Accepted); MockAM am2 = rm2.WaitForNewAMToLaunchAndRegister(app1.GetApplicationId(), 2, nm1); MockRM.FinishAMAndVerifyAppState(app1, rm2, nm1, am2); RMAppAttempt attempt3 = rm2.GetRMContext().GetRMApps()[app1.GetApplicationId()].GetCurrentAppAttempt (); NUnit.Framework.Assert.IsTrue(attempt3.ShouldCountTowardsMaxAttemptRetry()); NUnit.Framework.Assert.AreEqual(ContainerExitStatus.Invalid, appState.GetAttempt( am2.GetApplicationAttemptId()).GetAMContainerExitStatus()); rm1.Stop(); rm2.Stop(); }
public virtual void TestRetriesOnFailures() { ContainerManagementProtocol mockProxy = Org.Mockito.Mockito.Mock <ContainerManagementProtocol >(); StartContainersResponse mockResponse = Org.Mockito.Mockito.Mock <StartContainersResponse >(); Org.Mockito.Mockito.When(mockProxy.StartContainers(Matchers.Any <StartContainersRequest >())).ThenThrow(new NMNotYetReadyException("foo")).ThenReturn(mockResponse); Configuration conf = new Configuration(); conf.SetInt(YarnConfiguration.RmAmMaxAttempts, 1); conf.SetInt(YarnConfiguration.ClientNmConnectRetryIntervalMs, 1); DrainDispatcher dispatcher = new DrainDispatcher(); MockRM rm = new _MockRMWithCustomAMLauncher_206(dispatcher, mockProxy, conf, null ); rm.Start(); MockNM nm1 = rm.RegisterNode("127.0.0.1:1234", 5120); RMApp app = rm.SubmitApp(2000); ApplicationAttemptId appAttemptId = app.GetCurrentAppAttempt().GetAppAttemptId(); // kick the scheduling nm1.NodeHeartbeat(true); dispatcher.Await(); rm.WaitForState(appAttemptId, RMAppAttemptState.Launched, 500); }
// Test RM restarts after AM container is preempted, new RM should not count // AM preemption failure towards the max-retry-account and should be able to // re-launch the AM. /// <exception cref="System.Exception"/> public virtual void TestPreemptedAMRestartOnRMRestart() { YarnConfiguration conf = new YarnConfiguration(); conf.SetClass(YarnConfiguration.RmScheduler, typeof(CapacityScheduler), typeof(ResourceScheduler )); conf.SetBoolean(YarnConfiguration.RecoveryEnabled, true); conf.SetBoolean(YarnConfiguration.RmWorkPreservingRecoveryEnabled, false); conf.Set(YarnConfiguration.RmStore, typeof(MemoryRMStateStore).FullName); // explicitly set max-am-retry count as 1. conf.SetInt(YarnConfiguration.RmAmMaxAttempts, 1); MemoryRMStateStore memStore = new MemoryRMStateStore(); memStore.Init(conf); MockRM rm1 = new MockRM(conf, memStore); rm1.Start(); MockNM nm1 = new MockNM("127.0.0.1:1234", 8000, rm1.GetResourceTrackerService()); nm1.RegisterNode(); RMApp app1 = rm1.SubmitApp(200); RMAppAttempt attempt1 = app1.GetCurrentAppAttempt(); MockAM am1 = MockRM.LaunchAndRegisterAM(app1, rm1, nm1); CapacityScheduler scheduler = (CapacityScheduler)rm1.GetResourceScheduler(); ContainerId amContainer = ContainerId.NewContainerId(am1.GetApplicationAttemptId( ), 1); // Forcibly preempt the am container; scheduler.KillContainer(scheduler.GetRMContainer(amContainer)); am1.WaitForState(RMAppAttemptState.Failed); NUnit.Framework.Assert.IsTrue(!attempt1.ShouldCountTowardsMaxAttemptRetry()); rm1.WaitForState(app1.GetApplicationId(), RMAppState.Accepted); // state store has 1 attempt stored. ApplicationStateData appState = memStore.GetState().GetApplicationState()[app1.GetApplicationId ()]; NUnit.Framework.Assert.AreEqual(1, appState.GetAttemptCount()); // attempt stored has the preempted container exit status. NUnit.Framework.Assert.AreEqual(ContainerExitStatus.Preempted, appState.GetAttempt (am1.GetApplicationAttemptId()).GetAMContainerExitStatus()); // Restart rm. MockRM rm2 = new MockRM(conf, memStore); nm1.SetResourceTrackerService(rm2.GetResourceTrackerService()); nm1.RegisterNode(); rm2.Start(); // Restarted RM should re-launch the am. MockAM am2 = rm2.WaitForNewAMToLaunchAndRegister(app1.GetApplicationId(), 2, nm1); MockRM.FinishAMAndVerifyAppState(app1, rm2, nm1, am2); RMAppAttempt attempt2 = rm2.GetRMContext().GetRMApps()[app1.GetApplicationId()].GetCurrentAppAttempt (); NUnit.Framework.Assert.IsTrue(attempt2.ShouldCountTowardsMaxAttemptRetry()); NUnit.Framework.Assert.AreEqual(ContainerExitStatus.Invalid, appState.GetAttempt( am2.GetApplicationAttemptId()).GetAMContainerExitStatus()); rm1.Stop(); rm2.Stop(); }
/// <exception cref="System.Exception"/> public virtual void TestAppOnMultiNode() { Logger rootLogger = LogManager.GetRootLogger(); rootLogger.SetLevel(Level.Debug); conf.Set("yarn.scheduler.capacity.node-locality-delay", "-1"); MockRM rm = new MockRM(conf); rm.Start(); MockNM nm1 = rm.RegisterNode("h1:1234", 5120); MockNM nm2 = rm.RegisterNode("h2:5678", 10240); RMApp app = rm.SubmitApp(2000); //kick the scheduling nm1.NodeHeartbeat(true); RMAppAttempt attempt = app.GetCurrentAppAttempt(); MockAM am = rm.SendAMLaunched(attempt.GetAppAttemptId()); am.RegisterAppAttempt(); //request for containers int request = 13; am.Allocate("h1", 1000, request, new AList <ContainerId>()); //kick the scheduler IList <Container> conts = am.Allocate(new AList <ResourceRequest>(), new AList <ContainerId >()).GetAllocatedContainers(); int contReceived = conts.Count; while (contReceived < 3) { //only 3 containers are available on node1 nm1.NodeHeartbeat(true); Sharpen.Collections.AddAll(conts, am.Allocate(new AList <ResourceRequest>(), new AList <ContainerId>()).GetAllocatedContainers()); contReceived = conts.Count; Log.Info("Got " + contReceived + " containers. Waiting to get " + 3); Sharpen.Thread.Sleep(WaitSleepMs); } NUnit.Framework.Assert.AreEqual(3, conts.Count); //send node2 heartbeat conts = am.Allocate(new AList <ResourceRequest>(), new AList <ContainerId>()).GetAllocatedContainers (); contReceived = conts.Count; while (contReceived < 10) { nm2.NodeHeartbeat(true); Sharpen.Collections.AddAll(conts, am.Allocate(new AList <ResourceRequest>(), new AList <ContainerId>()).GetAllocatedContainers()); contReceived = conts.Count; Log.Info("Got " + contReceived + " containers. Waiting to get " + 10); Sharpen.Thread.Sleep(WaitSleepMs); } NUnit.Framework.Assert.AreEqual(10, conts.Count); am.UnregisterAppAttempt(); nm1.NodeHeartbeat(attempt.GetAppAttemptId(), 1, ContainerState.Complete); am.WaitForState(RMAppAttemptState.Finished); rm.Stop(); }
/// <exception cref="System.Exception"/> public virtual void TestInvalidContainerReleaseRequest() { MockRM rm = new MockRM(conf); try { rm.Start(); // Register node1 MockNM nm1 = rm.RegisterNode("127.0.0.1:1234", 6 * Gb); // Submit an application RMApp app1 = rm.SubmitApp(1024); // kick the scheduling nm1.NodeHeartbeat(true); RMAppAttempt attempt1 = app1.GetCurrentAppAttempt(); MockAM am1 = rm.SendAMLaunched(attempt1.GetAppAttemptId()); am1.RegisterAppAttempt(); am1.AddRequests(new string[] { "127.0.0.1" }, Gb, 1, 1); AllocateResponse alloc1Response = am1.Schedule(); // send the request // kick the scheduler nm1.NodeHeartbeat(true); while (alloc1Response.GetAllocatedContainers().Count < 1) { Log.Info("Waiting for containers to be created for app 1..."); Sharpen.Thread.Sleep(1000); alloc1Response = am1.Schedule(); } NUnit.Framework.Assert.IsTrue(alloc1Response.GetAllocatedContainers().Count > 0); RMApp app2 = rm.SubmitApp(1024); nm1.NodeHeartbeat(true); RMAppAttempt attempt2 = app2.GetCurrentAppAttempt(); MockAM am2 = rm.SendAMLaunched(attempt2.GetAppAttemptId()); am2.RegisterAppAttempt(); // Now trying to release container allocated for app1 -> appAttempt1. ContainerId cId = alloc1Response.GetAllocatedContainers()[0].GetId(); am2.AddContainerToBeReleased(cId); try { am2.Schedule(); NUnit.Framework.Assert.Fail("Exception was expected!!"); } catch (InvalidContainerReleaseException e) { StringBuilder sb = new StringBuilder("Cannot release container : "); sb.Append(cId.ToString()); sb.Append(" not belonging to this application attempt : "); sb.Append(attempt2.GetAppAttemptId().ToString()); NUnit.Framework.Assert.IsTrue(e.Message.Contains(sb.ToString())); } } finally { if (rm != null) { rm.Stop(); } } }
public virtual void TestAMLaunchAndCleanup() { Logger rootLogger = LogManager.GetRootLogger(); rootLogger.SetLevel(Level.Debug); TestApplicationMasterLauncher.MyContainerManagerImpl containerManager = new TestApplicationMasterLauncher.MyContainerManagerImpl (); MockRMWithCustomAMLauncher rm = new MockRMWithCustomAMLauncher(containerManager); rm.Start(); MockNM nm1 = rm.RegisterNode("127.0.0.1:1234", 5120); RMApp app = rm.SubmitApp(2000); // kick the scheduling nm1.NodeHeartbeat(true); int waitCount = 0; while (containerManager.launched == false && waitCount++ < 20) { Log.Info("Waiting for AM Launch to happen.."); Sharpen.Thread.Sleep(1000); } NUnit.Framework.Assert.IsTrue(containerManager.launched); RMAppAttempt attempt = app.GetCurrentAppAttempt(); ApplicationAttemptId appAttemptId = attempt.GetAppAttemptId(); NUnit.Framework.Assert.AreEqual(appAttemptId.ToString(), containerManager.attemptIdAtContainerManager ); NUnit.Framework.Assert.AreEqual(app.GetSubmitTime(), containerManager.submitTimeAtContainerManager ); NUnit.Framework.Assert.AreEqual(app.GetRMAppAttempt(appAttemptId).GetMasterContainer ().GetId().ToString(), containerManager.containerIdAtContainerManager); NUnit.Framework.Assert.AreEqual(nm1.GetNodeId().ToString(), containerManager.nmHostAtContainerManager ); NUnit.Framework.Assert.AreEqual(YarnConfiguration.DefaultRmAmMaxAttempts, containerManager .maxAppAttempts); MockAM am = new MockAM(rm.GetRMContext(), rm.GetApplicationMasterService(), appAttemptId ); am.RegisterAppAttempt(); am.UnregisterAppAttempt(); //complete the AM container to finish the app normally nm1.NodeHeartbeat(attempt.GetAppAttemptId(), 1, ContainerState.Complete); am.WaitForState(RMAppAttemptState.Finished); waitCount = 0; while (containerManager.cleanedup == false && waitCount++ < 20) { Log.Info("Waiting for AM Cleanup to happen.."); Sharpen.Thread.Sleep(1000); } NUnit.Framework.Assert.IsTrue(containerManager.cleanedup); am.WaitForState(RMAppAttemptState.Finished); rm.Stop(); }
public virtual void TestValidateResourceBlacklistRequest() { TestAMAuthorization.MyContainerManager containerManager = new TestAMAuthorization.MyContainerManager (); TestAMAuthorization.MockRMWithAMS rm = new TestAMAuthorization.MockRMWithAMS(new YarnConfiguration(), containerManager); rm.Start(); MockNM nm1 = rm.RegisterNode("localhost:1234", 5120); IDictionary <ApplicationAccessType, string> acls = new Dictionary <ApplicationAccessType , string>(2); acls[ApplicationAccessType.ViewApp] = "*"; RMApp app = rm.SubmitApp(1024, "appname", "appuser", acls); nm1.NodeHeartbeat(true); RMAppAttempt attempt = app.GetCurrentAppAttempt(); ApplicationAttemptId applicationAttemptId = attempt.GetAppAttemptId(); WaitForLaunchedState(attempt); // Create a client to the RM. Configuration conf = rm.GetConfig(); YarnRPC rpc = YarnRPC.Create(conf); UserGroupInformation currentUser = UserGroupInformation.CreateRemoteUser(applicationAttemptId .ToString()); Credentials credentials = containerManager.GetContainerCredentials(); IPEndPoint rmBindAddress = rm.GetApplicationMasterService().GetBindAddress(); Org.Apache.Hadoop.Security.Token.Token <TokenIdentifier> amRMToken = TestAMAuthorization.MockRMWithAMS .SetupAndReturnAMRMToken(rmBindAddress, credentials.GetAllTokens()); currentUser.AddToken(amRMToken); ApplicationMasterProtocol client = currentUser.DoAs(new _PrivilegedAction_626(rpc , rmBindAddress, conf)); RegisterApplicationMasterRequest request = Org.Apache.Hadoop.Yarn.Util.Records.NewRecord <RegisterApplicationMasterRequest>(); client.RegisterApplicationMaster(request); ResourceBlacklistRequest blacklistRequest = ResourceBlacklistRequest.NewInstance( Sharpen.Collections.SingletonList(ResourceRequest.Any), null); AllocateRequest allocateRequest = AllocateRequest.NewInstance(0, 0.0f, null, null , blacklistRequest); bool error = false; try { client.Allocate(allocateRequest); } catch (InvalidResourceBlacklistRequestException) { error = true; } rm.Stop(); NUnit.Framework.Assert.IsTrue("Didn't not catch InvalidResourceBlacklistRequestException" , error); }
public virtual void TestAuthorizedAccess() { TestAMAuthorization.MyContainerManager containerManager = new TestAMAuthorization.MyContainerManager (); rm = new TestAMAuthorization.MockRMWithAMS(conf, containerManager); rm.Start(); MockNM nm1 = rm.RegisterNode("localhost:1234", 5120); IDictionary <ApplicationAccessType, string> acls = new Dictionary <ApplicationAccessType , string>(2); acls[ApplicationAccessType.ViewApp] = "*"; RMApp app = rm.SubmitApp(1024, "appname", "appuser", acls); nm1.NodeHeartbeat(true); int waitCount = 0; while (containerManager.containerTokens == null && waitCount++ < 20) { Log.Info("Waiting for AM Launch to happen.."); Sharpen.Thread.Sleep(1000); } NUnit.Framework.Assert.IsNotNull(containerManager.containerTokens); RMAppAttempt attempt = app.GetCurrentAppAttempt(); ApplicationAttemptId applicationAttemptId = attempt.GetAppAttemptId(); WaitForLaunchedState(attempt); // Create a client to the RM. Configuration conf = rm.GetConfig(); YarnRPC rpc = YarnRPC.Create(conf); UserGroupInformation currentUser = UserGroupInformation.CreateRemoteUser(applicationAttemptId .ToString()); Credentials credentials = containerManager.GetContainerCredentials(); IPEndPoint rmBindAddress = rm.GetApplicationMasterService().GetBindAddress(); Org.Apache.Hadoop.Security.Token.Token <TokenIdentifier> amRMToken = TestAMAuthorization.MockRMWithAMS .SetupAndReturnAMRMToken(rmBindAddress, credentials.GetAllTokens()); currentUser.AddToken(amRMToken); ApplicationMasterProtocol client = currentUser.DoAs(new _PrivilegedAction_206(this , rpc, conf)); RegisterApplicationMasterRequest request = Org.Apache.Hadoop.Yarn.Util.Records.NewRecord <RegisterApplicationMasterRequest>(); RegisterApplicationMasterResponse response = client.RegisterApplicationMaster(request ); NUnit.Framework.Assert.IsNotNull(response.GetClientToAMTokenMasterKey()); if (UserGroupInformation.IsSecurityEnabled()) { NUnit.Framework.Assert.IsTrue(((byte[])response.GetClientToAMTokenMasterKey().Array ()).Length > 0); } NUnit.Framework.Assert.AreEqual("Register response has bad ACLs", "*", response.GetApplicationACLs ()[ApplicationAccessType.ViewApp]); }
// Test even if AM container is allocated with containerId not equal to 1, the // following allocate requests from AM should be able to retrieve the // corresponding NM Token. /// <exception cref="System.Exception"/> public virtual void TestNMTokenSentForNormalContainer() { conf.Set(YarnConfiguration.RmScheduler, typeof(CapacityScheduler).GetCanonicalName ()); MockRM rm = new MockRM(conf); rm.Start(); MockNM nm1 = rm.RegisterNode("h1:1234", 5120); RMApp app = rm.SubmitApp(2000); RMAppAttempt attempt = app.GetCurrentAppAttempt(); // Call getNewContainerId to increase container Id so that the AM container // Id doesn't equal to one. CapacityScheduler cs = (CapacityScheduler)rm.GetResourceScheduler(); cs.GetApplicationAttempt(attempt.GetAppAttemptId()).GetNewContainerId(); // kick the scheduling nm1.NodeHeartbeat(true); MockAM am = MockRM.LaunchAM(app, rm, nm1); // am container Id not equal to 1. NUnit.Framework.Assert.IsTrue(attempt.GetMasterContainer().GetId().GetContainerId () != 1); // NMSecretManager doesn't record the node on which the am is allocated. NUnit.Framework.Assert.IsFalse(rm.GetRMContext().GetNMTokenSecretManager().IsApplicationAttemptNMTokenPresent (attempt.GetAppAttemptId(), nm1.GetNodeId())); am.RegisterAppAttempt(); rm.WaitForState(app.GetApplicationId(), RMAppState.Running); int NumContainers = 1; IList <Container> containers = new AList <Container>(); // nmTokens keeps track of all the nmTokens issued in the allocate call. IList <NMToken> expectedNMTokens = new AList <NMToken>(); // am1 allocate 1 container on nm1. while (true) { AllocateResponse response = am.Allocate("127.0.0.1", 2000, NumContainers, new AList <ContainerId>()); nm1.NodeHeartbeat(true); Sharpen.Collections.AddAll(containers, response.GetAllocatedContainers()); Sharpen.Collections.AddAll(expectedNMTokens, response.GetNMTokens()); if (containers.Count == NumContainers) { break; } Sharpen.Thread.Sleep(200); System.Console.Out.WriteLine("Waiting for container to be allocated."); } NodeId nodeId = expectedNMTokens[0].GetNodeId(); // NMToken is sent for the allocated container. NUnit.Framework.Assert.AreEqual(nm1.GetNodeId(), nodeId); }
/// <exception cref="System.Exception"/> public static MockAM LaunchAM(RMApp app, Org.Apache.Hadoop.Yarn.Server.Resourcemanager.MockRM rm, MockNM nm) { rm.WaitForState(app.GetApplicationId(), RMAppState.Accepted); RMAppAttempt attempt = app.GetCurrentAppAttempt(); System.Console.Out.WriteLine("Launch AM " + attempt.GetAppAttemptId()); nm.NodeHeartbeat(true); MockAM am = rm.SendAMLaunched(attempt.GetAppAttemptId()); rm.WaitForState(attempt.GetAppAttemptId(), RMAppAttemptState.Launched); return(am); }
/// <exception cref="System.Exception"/> private void TestRMWritingMassiveHistory(MockRM rm) { rm.Start(); MockNM nm = rm.RegisterNode("127.0.0.1:1234", 1024 * 10100); RMApp app = rm.SubmitApp(1024); nm.NodeHeartbeat(true); RMAppAttempt attempt = app.GetCurrentAppAttempt(); MockAM am = rm.SendAMLaunched(attempt.GetAppAttemptId()); am.RegisterAppAttempt(); int request = 10000; am.Allocate("127.0.0.1", 1024, request, new AList <ContainerId>()); nm.NodeHeartbeat(true); IList <Container> allocated = am.Allocate(new AList <ResourceRequest>(), new AList < ContainerId>()).GetAllocatedContainers(); int waitCount = 0; int allocatedSize = allocated.Count; while (allocatedSize < request && waitCount++ < 200) { Sharpen.Thread.Sleep(300); allocated = am.Allocate(new AList <ResourceRequest>(), new AList <ContainerId>()).GetAllocatedContainers (); allocatedSize += allocated.Count; nm.NodeHeartbeat(true); } NUnit.Framework.Assert.AreEqual(request, allocatedSize); am.UnregisterAppAttempt(); am.WaitForState(RMAppAttemptState.Finishing); nm.NodeHeartbeat(am.GetApplicationAttemptId(), 1, ContainerState.Complete); am.WaitForState(RMAppAttemptState.Finished); NodeHeartbeatResponse resp = nm.NodeHeartbeat(true); IList <ContainerId> cleaned = resp.GetContainersToCleanup(); int cleanedSize = cleaned.Count; waitCount = 0; while (cleanedSize < allocatedSize && waitCount++ < 200) { Sharpen.Thread.Sleep(300); resp = nm.NodeHeartbeat(true); cleaned = resp.GetContainersToCleanup(); cleanedSize += cleaned.Count; } NUnit.Framework.Assert.AreEqual(allocatedSize, cleanedSize); rm.WaitForState(app.GetApplicationId(), RMAppState.Finished); rm.Stop(); }
protected override void CreateApplicationMetricsTable(HtmlBlock.Block html) { RMApp rmApp = this.rm.GetRMContext().GetRMApps()[appID]; RMAppMetrics appMetrics = rmApp == null ? null : rmApp.GetRMAppMetrics(); // Get attempt metrics and fields, it is possible currentAttempt of RMApp is // null. In that case, we will assume resource preempted and number of Non // AM container preempted on that attempt is 0 RMAppAttemptMetrics attemptMetrics; if (rmApp == null || null == rmApp.GetCurrentAppAttempt()) { attemptMetrics = null; } else { attemptMetrics = rmApp.GetCurrentAppAttempt().GetRMAppAttemptMetrics(); } Org.Apache.Hadoop.Yarn.Api.Records.Resource attemptResourcePreempted = attemptMetrics == null?Resources.None() : attemptMetrics.GetResourcePreempted(); int attemptNumNonAMContainerPreempted = attemptMetrics == null ? 0 : attemptMetrics .GetNumNonAMContainersPreempted(); Hamlet.DIV <Org.Apache.Hadoop.Yarn.Webapp.Hamlet.Hamlet> pdiv = html.(typeof(InfoBlock )).Div(JQueryUI.InfoWrap); Info("Application Overview").Clear(); Info("Application Metrics").("Total Resource Preempted:", appMetrics == null ? "N/A" : appMetrics.GetResourcePreempted()).("Total Number of Non-AM Containers Preempted:" , appMetrics == null ? "N/A" : appMetrics.GetNumNonAMContainersPreempted()).("Total Number of AM Containers Preempted:" , appMetrics == null ? "N/A" : appMetrics.GetNumAMContainersPreempted()).("Resource Preempted from Current Attempt:" , attemptResourcePreempted).("Number of Non-AM Containers Preempted from Current Attempt:" , attemptNumNonAMContainerPreempted).("Aggregate Resource Allocation:", string.Format ("%d MB-seconds, %d vcore-seconds", appMetrics == null ? "N/A" : appMetrics.GetMemorySeconds (), appMetrics == null ? "N/A" : appMetrics.GetVcoreSeconds())); pdiv.(); }
/// <exception cref="System.Exception"/> public virtual void TestUsageWithOneAttemptAndOneContainer() { MockRM rm = new MockRM(conf); rm.Start(); MockNM nm = new MockNM("127.0.0.1:1234", 15120, rm.GetResourceTrackerService()); nm.RegisterNode(); RMApp app0 = rm.SubmitApp(200); RMAppMetrics rmAppMetrics = app0.GetRMAppMetrics(); NUnit.Framework.Assert.IsTrue("Before app submittion, memory seconds should have been 0 but was " + rmAppMetrics.GetMemorySeconds(), rmAppMetrics.GetMemorySeconds() == 0); NUnit.Framework.Assert.IsTrue("Before app submission, vcore seconds should have been 0 but was " + rmAppMetrics.GetVcoreSeconds(), rmAppMetrics.GetVcoreSeconds() == 0); RMAppAttempt attempt0 = app0.GetCurrentAppAttempt(); nm.NodeHeartbeat(true); MockAM am0 = rm.SendAMLaunched(attempt0.GetAppAttemptId()); am0.RegisterAppAttempt(); RMContainer rmContainer = rm.GetResourceScheduler().GetRMContainer(attempt0.GetMasterContainer ().GetId()); // Allow metrics to accumulate. int sleepInterval = 1000; int cumulativeSleepTime = 0; while (rmAppMetrics.GetMemorySeconds() <= 0 && cumulativeSleepTime < 5000) { Sharpen.Thread.Sleep(sleepInterval); cumulativeSleepTime += sleepInterval; } rmAppMetrics = app0.GetRMAppMetrics(); NUnit.Framework.Assert.IsTrue("While app is running, memory seconds should be >0 but is " + rmAppMetrics.GetMemorySeconds(), rmAppMetrics.GetMemorySeconds() > 0); NUnit.Framework.Assert.IsTrue("While app is running, vcore seconds should be >0 but is " + rmAppMetrics.GetVcoreSeconds(), rmAppMetrics.GetVcoreSeconds() > 0); MockRM.FinishAMAndVerifyAppState(app0, rm, nm, am0); AggregateAppResourceUsage ru = CalculateContainerResourceMetrics(rmContainer); rmAppMetrics = app0.GetRMAppMetrics(); NUnit.Framework.Assert.AreEqual("Unexcpected MemorySeconds value", ru.GetMemorySeconds (), rmAppMetrics.GetMemorySeconds()); NUnit.Framework.Assert.AreEqual("Unexpected VcoreSeconds value", ru.GetVcoreSeconds (), rmAppMetrics.GetVcoreSeconds()); rm.Stop(); }
/// <exception cref="System.Exception"/> public virtual void TestResourceTypes() { Dictionary <YarnConfiguration, EnumSet <YarnServiceProtos.SchedulerResourceTypes> > driver = new Dictionary <YarnConfiguration, EnumSet <YarnServiceProtos.SchedulerResourceTypes > >(); CapacitySchedulerConfiguration csconf = new CapacitySchedulerConfiguration(); csconf.SetResourceComparator(typeof(DominantResourceCalculator)); YarnConfiguration testCapacityDRConf = new YarnConfiguration(csconf); testCapacityDRConf.SetClass(YarnConfiguration.RmScheduler, typeof(CapacityScheduler ), typeof(ResourceScheduler)); YarnConfiguration testCapacityDefConf = new YarnConfiguration(); testCapacityDefConf.SetClass(YarnConfiguration.RmScheduler, typeof(CapacityScheduler ), typeof(ResourceScheduler)); YarnConfiguration testFairDefConf = new YarnConfiguration(); testFairDefConf.SetClass(YarnConfiguration.RmScheduler, typeof(FairScheduler), typeof( ResourceScheduler)); driver[conf] = EnumSet.Of(YarnServiceProtos.SchedulerResourceTypes.Memory); driver[testCapacityDRConf] = EnumSet.Of(YarnServiceProtos.SchedulerResourceTypes. Cpu, YarnServiceProtos.SchedulerResourceTypes.Memory); driver[testCapacityDefConf] = EnumSet.Of(YarnServiceProtos.SchedulerResourceTypes .Memory); driver[testFairDefConf] = EnumSet.Of(YarnServiceProtos.SchedulerResourceTypes.Memory , YarnServiceProtos.SchedulerResourceTypes.Cpu); foreach (KeyValuePair <YarnConfiguration, EnumSet <YarnServiceProtos.SchedulerResourceTypes > > entry in driver) { EnumSet <YarnServiceProtos.SchedulerResourceTypes> expectedValue = entry.Value; MockRM rm = new MockRM(entry.Key); rm.Start(); MockNM nm1 = rm.RegisterNode("127.0.0.1:1234", 6 * Gb); RMApp app1 = rm.SubmitApp(2048); nm1.NodeHeartbeat(true); RMAppAttempt attempt1 = app1.GetCurrentAppAttempt(); MockAM am1 = rm.SendAMLaunched(attempt1.GetAppAttemptId()); RegisterApplicationMasterResponse resp = am1.RegisterAppAttempt(); EnumSet <YarnServiceProtos.SchedulerResourceTypes> types = resp.GetSchedulerResourceTypes (); Log.Info("types = " + types.ToString()); NUnit.Framework.Assert.AreEqual(expectedValue, types); rm.Stop(); } }
protected internal virtual RMApp CreateMockRMApp(ApplicationAttemptId attemptId) { RMApp app = Org.Mockito.Mockito.Mock <RMAppImpl>(); Org.Mockito.Mockito.When(app.GetApplicationId()).ThenReturn(attemptId.GetApplicationId ()); RMAppAttemptImpl attempt = Org.Mockito.Mockito.Mock <RMAppAttemptImpl>(); Org.Mockito.Mockito.When(attempt.GetAppAttemptId()).ThenReturn(attemptId); RMAppAttemptMetrics attemptMetric = Org.Mockito.Mockito.Mock <RMAppAttemptMetrics> (); Org.Mockito.Mockito.When(attempt.GetRMAppAttemptMetrics()).ThenReturn(attemptMetric ); Org.Mockito.Mockito.When(app.GetCurrentAppAttempt()).ThenReturn(attempt); resourceManager.GetRMContext().GetRMApps()[attemptId.GetApplicationId()] = app; return(app); }
/// <exception cref="System.Exception"/> public virtual void TestAppWithNoContainers() { Logger rootLogger = LogManager.GetRootLogger(); rootLogger.SetLevel(Level.Debug); MockRM rm = new MockRM(conf); rm.Start(); MockNM nm1 = rm.RegisterNode("h1:1234", 5120); RMApp app = rm.SubmitApp(2000); //kick the scheduling nm1.NodeHeartbeat(true); RMAppAttempt attempt = app.GetCurrentAppAttempt(); MockAM am = rm.SendAMLaunched(attempt.GetAppAttemptId()); am.RegisterAppAttempt(); am.UnregisterAppAttempt(); nm1.NodeHeartbeat(attempt.GetAppAttemptId(), 1, ContainerState.Complete); am.WaitForState(RMAppAttemptState.Finished); rm.Stop(); }
/// <exception cref="System.Exception"/> private void CheckActiveRMFunctionality() { NUnit.Framework.Assert.AreEqual(StateErr, HAServiceProtocol.HAServiceState.Active , rm.adminService.GetServiceStatus().GetState()); NUnit.Framework.Assert.IsTrue("Active RM services aren't started", rm.AreActiveServicesRunning ()); NUnit.Framework.Assert.IsTrue("RM is not ready to become active", rm.adminService .GetServiceStatus().IsReadyToBecomeActive()); try { rm.GetNewAppId(); rm.RegisterNode("127.0.0.1:1", 2048); app = rm.SubmitApp(1024); attempt = app.GetCurrentAppAttempt(); rm.WaitForState(attempt.GetAppAttemptId(), RMAppAttemptState.Scheduled); } catch (Exception e) { NUnit.Framework.Assert.Fail("Unable to perform Active RM functions"); Log.Error("ActiveRM check failed", e); } CheckActiveRMWebServices(); }
/// <exception cref="System.Exception"/> public virtual void TestRMIdentifierOnContainerAllocation() { MockRM rm = new MockRM(conf); rm.Start(); // Register node1 MockNM nm1 = rm.RegisterNode("127.0.0.1:1234", 6 * Gb); // Submit an application RMApp app1 = rm.SubmitApp(2048); // kick the scheduling nm1.NodeHeartbeat(true); RMAppAttempt attempt1 = app1.GetCurrentAppAttempt(); MockAM am1 = rm.SendAMLaunched(attempt1.GetAppAttemptId()); am1.RegisterAppAttempt(); am1.AddRequests(new string[] { "127.0.0.1" }, Gb, 1, 1); AllocateResponse alloc1Response = am1.Schedule(); // send the request // kick the scheduler nm1.NodeHeartbeat(true); while (alloc1Response.GetAllocatedContainers().Count < 1) { Log.Info("Waiting for containers to be created for app 1..."); Sharpen.Thread.Sleep(1000); alloc1Response = am1.Schedule(); } // assert RMIdentifer is set properly in allocated containers Container allocatedContainer = alloc1Response.GetAllocatedContainers()[0]; ContainerTokenIdentifier tokenId = BuilderUtils.NewContainerTokenIdentifier(allocatedContainer .GetContainerToken()); NUnit.Framework.Assert.AreEqual(MockRM.GetClusterTimeStamp(), tokenId.GetRMIdentifier ()); rm.Stop(); }
public virtual void TestARRMResponseId() { MockNM nm1 = rm.RegisterNode("h1:1234", 5000); RMApp app = rm.SubmitApp(2000); // Trigger the scheduling so the AM gets 'launched' nm1.NodeHeartbeat(true); RMAppAttempt attempt = app.GetCurrentAppAttempt(); MockAM am = rm.SendAMLaunched(attempt.GetAppAttemptId()); am.RegisterAppAttempt(); AllocateRequest allocateRequest = AllocateRequest.NewInstance(0, 0F, null, null, null); AllocateResponse response = Allocate(attempt.GetAppAttemptId(), allocateRequest); NUnit.Framework.Assert.AreEqual(1, response.GetResponseId()); NUnit.Framework.Assert.IsTrue(response.GetAMCommand() == null); allocateRequest = AllocateRequest.NewInstance(response.GetResponseId(), 0F, null, null, null); response = Allocate(attempt.GetAppAttemptId(), allocateRequest); NUnit.Framework.Assert.AreEqual(2, response.GetResponseId()); /* try resending */ response = Allocate(attempt.GetAppAttemptId(), allocateRequest); NUnit.Framework.Assert.AreEqual(2, response.GetResponseId()); allocateRequest = AllocateRequest.NewInstance(0, 0F, null, null, null); try { Allocate(attempt.GetAppAttemptId(), allocateRequest); NUnit.Framework.Assert.Fail(); } catch (Exception e) { NUnit.Framework.Assert.IsTrue(e.InnerException is InvalidApplicationMasterRequestException ); } }
public virtual void TestAllocateContainerOnNodeWithoutOffSwitchSpecified() { Logger rootLogger = LogManager.GetRootLogger(); rootLogger.SetLevel(Level.Debug); MockRM rm = new MockRM(conf); rm.Start(); MockNM nm1 = rm.RegisterNode("127.0.0.1:1234", 6 * Gb); RMApp app1 = rm.SubmitApp(2048); // kick the scheduling, 2 GB given to AM1, remaining 4GB on nm1 nm1.NodeHeartbeat(true); RMAppAttempt attempt1 = app1.GetCurrentAppAttempt(); MockAM am1 = rm.SendAMLaunched(attempt1.GetAppAttemptId()); am1.RegisterAppAttempt(); // add request for containers IList <ResourceRequest> requests = new AList <ResourceRequest>(); requests.AddItem(am1.CreateResourceReq("127.0.0.1", 1 * Gb, 1, 1)); requests.AddItem(am1.CreateResourceReq("/default-rack", 1 * Gb, 1, 1)); am1.Allocate(requests, null); // send the request try { // kick the schedule nm1.NodeHeartbeat(true); } catch (ArgumentNullException) { NUnit.Framework.Assert.Fail("NPE when allocating container on node but " + "forget to set off-switch request should be handled" ); } rm.Stop(); }
/// <exception cref="System.Exception"/> public virtual void TestActivatingApplicationAfterAddingNM() { MockRM rm1 = new MockRM(conf); // start like normal because state is empty rm1.Start(); // app that gets launched RMApp app1 = rm1.SubmitApp(200); // app that does not get launched RMApp app2 = rm1.SubmitApp(200); // app1 and app2 should be scheduled, but because no resource is available, // they are not activated. RMAppAttempt attempt1 = app1.GetCurrentAppAttempt(); ApplicationAttemptId attemptId1 = attempt1.GetAppAttemptId(); rm1.WaitForState(attemptId1, RMAppAttemptState.Scheduled); RMAppAttempt attempt2 = app2.GetCurrentAppAttempt(); ApplicationAttemptId attemptId2 = attempt2.GetAppAttemptId(); rm1.WaitForState(attemptId2, RMAppAttemptState.Scheduled); MockNM nm1 = new MockNM("h1:1234", 15120, rm1.GetResourceTrackerService()); MockNM nm2 = new MockNM("h2:5678", 15120, rm1.GetResourceTrackerService()); nm1.RegisterNode(); nm2.RegisterNode(); //kick the scheduling nm1.NodeHeartbeat(true); // app1 should be allocated now rm1.WaitForState(attemptId1, RMAppAttemptState.Allocated); rm1.WaitForState(attemptId2, RMAppAttemptState.Scheduled); nm2.NodeHeartbeat(true); // app2 should be allocated now rm1.WaitForState(attemptId1, RMAppAttemptState.Allocated); rm1.WaitForState(attemptId2, RMAppAttemptState.Allocated); rm1.Stop(); }
/// <exception cref="System.Exception"/> public virtual void TestRMAppAttemptFailuresValidityInterval() { YarnConfiguration conf = new YarnConfiguration(); conf.SetClass(YarnConfiguration.RmScheduler, typeof(CapacityScheduler), typeof(ResourceScheduler )); conf.SetBoolean(YarnConfiguration.RecoveryEnabled, true); conf.SetBoolean(YarnConfiguration.RmWorkPreservingRecoveryEnabled, false); conf.Set(YarnConfiguration.RmStore, typeof(MemoryRMStateStore).FullName); // explicitly set max-am-retry count as 2. conf.SetInt(YarnConfiguration.RmAmMaxAttempts, 2); MemoryRMStateStore memStore = new MemoryRMStateStore(); memStore.Init(conf); MockRM rm1 = new MockRM(conf, memStore); rm1.Start(); MockNM nm1 = new MockNM("127.0.0.1:1234", 8000, rm1.GetResourceTrackerService()); nm1.RegisterNode(); // set window size to a larger number : 20s // we will verify the app should be failed if // two continuous attempts failed in 20s. RMApp app = rm1.SubmitApp(200, 20000); MockAM am = MockRM.LaunchAM(app, rm1, nm1); // Fail current attempt normally nm1.NodeHeartbeat(am.GetApplicationAttemptId(), 1, ContainerState.Complete); am.WaitForState(RMAppAttemptState.Failed); // launch the second attempt rm1.WaitForState(app.GetApplicationId(), RMAppState.Accepted); NUnit.Framework.Assert.AreEqual(2, app.GetAppAttempts().Count); NUnit.Framework.Assert.IsTrue(((RMAppAttemptImpl)app.GetCurrentAppAttempt()).MayBeLastAttempt ()); MockAM am_2 = MockRM.LaunchAndRegisterAM(app, rm1, nm1); am_2.WaitForState(RMAppAttemptState.Running); nm1.NodeHeartbeat(am_2.GetApplicationAttemptId(), 1, ContainerState.Complete); am_2.WaitForState(RMAppAttemptState.Failed); // current app should be failed. rm1.WaitForState(app.GetApplicationId(), RMAppState.Failed); ControlledClock clock = new ControlledClock(new SystemClock()); // set window size to 6s RMAppImpl app1 = (RMAppImpl)rm1.SubmitApp(200, 6000); app1.SetSystemClock(clock); MockAM am1 = MockRM.LaunchAndRegisterAM(app1, rm1, nm1); // Fail attempt1 normally nm1.NodeHeartbeat(am1.GetApplicationAttemptId(), 1, ContainerState.Complete); am1.WaitForState(RMAppAttemptState.Failed); // launch the second attempt rm1.WaitForState(app1.GetApplicationId(), RMAppState.Accepted); NUnit.Framework.Assert.AreEqual(2, app1.GetAppAttempts().Count); RMAppAttempt attempt2 = app1.GetCurrentAppAttempt(); NUnit.Framework.Assert.IsTrue(((RMAppAttemptImpl)attempt2).MayBeLastAttempt()); MockAM am2 = MockRM.LaunchAndRegisterAM(app1, rm1, nm1); am2.WaitForState(RMAppAttemptState.Running); // wait for 6 seconds clock.SetTime(Runtime.CurrentTimeMillis() + 6 * 1000); // Fail attempt2 normally nm1.NodeHeartbeat(am2.GetApplicationAttemptId(), 1, ContainerState.Complete); am2.WaitForState(RMAppAttemptState.Failed); // can launch the third attempt successfully rm1.WaitForState(app1.GetApplicationId(), RMAppState.Accepted); NUnit.Framework.Assert.AreEqual(3, app1.GetAppAttempts().Count); RMAppAttempt attempt3 = app1.GetCurrentAppAttempt(); clock.Reset(); MockAM am3 = MockRM.LaunchAndRegisterAM(app1, rm1, nm1); am3.WaitForState(RMAppAttemptState.Running); // Restart rm. MockRM rm2 = new MockRM(conf, memStore); rm2.Start(); // re-register the NM nm1.SetResourceTrackerService(rm2.GetResourceTrackerService()); NMContainerStatus status = Org.Apache.Hadoop.Yarn.Util.Records.NewRecord <NMContainerStatus >(); status.SetContainerExitStatus(ContainerExitStatus.KilledByResourcemanager); status.SetContainerId(attempt3.GetMasterContainer().GetId()); status.SetContainerState(ContainerState.Complete); status.SetDiagnostics(string.Empty); nm1.RegisterNode(Sharpen.Collections.SingletonList(status), null); rm2.WaitForState(attempt3.GetAppAttemptId(), RMAppAttemptState.Failed); rm2.WaitForState(app1.GetApplicationId(), RMAppState.Accepted); // Lauch Attempt 4 MockAM am4 = rm2.WaitForNewAMToLaunchAndRegister(app1.GetApplicationId(), 4, nm1); // wait for 6 seconds clock.SetTime(Runtime.CurrentTimeMillis() + 6 * 1000); // Fail attempt4 normally nm1.NodeHeartbeat(am4.GetApplicationAttemptId(), 1, ContainerState.Complete); am4.WaitForState(RMAppAttemptState.Failed); // can launch the 5th attempt successfully rm2.WaitForState(app1.GetApplicationId(), RMAppState.Accepted); MockAM am5 = rm2.WaitForNewAMToLaunchAndRegister(app1.GetApplicationId(), 5, nm1); clock.Reset(); am5.WaitForState(RMAppAttemptState.Running); // Fail attempt5 normally nm1.NodeHeartbeat(am5.GetApplicationAttemptId(), 1, ContainerState.Complete); am5.WaitForState(RMAppAttemptState.Failed); rm2.WaitForState(app1.GetApplicationId(), RMAppState.Failed); rm1.Stop(); rm2.Stop(); }
// AM container preempted, nm disk failure // should not be counted towards AM max retry count. /// <exception cref="System.Exception"/> public virtual void TestShouldNotCountFailureToMaxAttemptRetry() { YarnConfiguration conf = new YarnConfiguration(); conf.SetClass(YarnConfiguration.RmScheduler, typeof(CapacityScheduler), typeof(ResourceScheduler )); // explicitly set max-am-retry count as 1. conf.SetInt(YarnConfiguration.RmAmMaxAttempts, 1); conf.SetBoolean(YarnConfiguration.RecoveryEnabled, true); conf.Set(YarnConfiguration.RmStore, typeof(MemoryRMStateStore).FullName); MemoryRMStateStore memStore = new MemoryRMStateStore(); memStore.Init(conf); MockRM rm1 = new MockRM(conf, memStore); rm1.Start(); MockNM nm1 = new MockNM("127.0.0.1:1234", 8000, rm1.GetResourceTrackerService()); nm1.RegisterNode(); RMApp app1 = rm1.SubmitApp(200); RMAppAttempt attempt1 = app1.GetCurrentAppAttempt(); MockAM am1 = MockRM.LaunchAndRegisterAM(app1, rm1, nm1); CapacityScheduler scheduler = (CapacityScheduler)rm1.GetResourceScheduler(); ContainerId amContainer = ContainerId.NewContainerId(am1.GetApplicationAttemptId( ), 1); // Preempt the first attempt; scheduler.KillContainer(scheduler.GetRMContainer(amContainer)); am1.WaitForState(RMAppAttemptState.Failed); NUnit.Framework.Assert.IsTrue(!attempt1.ShouldCountTowardsMaxAttemptRetry()); rm1.WaitForState(app1.GetApplicationId(), RMAppState.Accepted); ApplicationStateData appState = memStore.GetState().GetApplicationState()[app1.GetApplicationId ()]; // AM should be restarted even though max-am-attempt is 1. MockAM am2 = rm1.WaitForNewAMToLaunchAndRegister(app1.GetApplicationId(), 2, nm1); RMAppAttempt attempt2 = app1.GetCurrentAppAttempt(); NUnit.Framework.Assert.IsTrue(((RMAppAttemptImpl)attempt2).MayBeLastAttempt()); // Preempt the second attempt. ContainerId amContainer2 = ContainerId.NewContainerId(am2.GetApplicationAttemptId (), 1); scheduler.KillContainer(scheduler.GetRMContainer(amContainer2)); am2.WaitForState(RMAppAttemptState.Failed); NUnit.Framework.Assert.IsTrue(!attempt2.ShouldCountTowardsMaxAttemptRetry()); rm1.WaitForState(app1.GetApplicationId(), RMAppState.Accepted); MockAM am3 = rm1.WaitForNewAMToLaunchAndRegister(app1.GetApplicationId(), 3, nm1); RMAppAttempt attempt3 = app1.GetCurrentAppAttempt(); NUnit.Framework.Assert.IsTrue(((RMAppAttemptImpl)attempt3).MayBeLastAttempt()); // mimic NM disk_failure ContainerStatus containerStatus = Org.Apache.Hadoop.Yarn.Util.Records.NewRecord <ContainerStatus >(); containerStatus.SetContainerId(attempt3.GetMasterContainer().GetId()); containerStatus.SetDiagnostics("mimic NM disk_failure"); containerStatus.SetState(ContainerState.Complete); containerStatus.SetExitStatus(ContainerExitStatus.DisksFailed); IDictionary <ApplicationId, IList <ContainerStatus> > conts = new Dictionary <ApplicationId , IList <ContainerStatus> >(); conts[app1.GetApplicationId()] = Sharpen.Collections.SingletonList(containerStatus ); nm1.NodeHeartbeat(conts, true); am3.WaitForState(RMAppAttemptState.Failed); NUnit.Framework.Assert.IsTrue(!attempt3.ShouldCountTowardsMaxAttemptRetry()); NUnit.Framework.Assert.AreEqual(ContainerExitStatus.DisksFailed, appState.GetAttempt (am3.GetApplicationAttemptId()).GetAMContainerExitStatus()); rm1.WaitForState(app1.GetApplicationId(), RMAppState.Accepted); MockAM am4 = rm1.WaitForNewAMToLaunchAndRegister(app1.GetApplicationId(), 4, nm1); RMAppAttempt attempt4 = app1.GetCurrentAppAttempt(); NUnit.Framework.Assert.IsTrue(((RMAppAttemptImpl)attempt4).MayBeLastAttempt()); // create second NM, and register to rm1 MockNM nm2 = new MockNM("127.0.0.1:2234", 8000, rm1.GetResourceTrackerService()); nm2.RegisterNode(); // nm1 heartbeats to report unhealthy // This will mimic ContainerExitStatus.ABORT nm1.NodeHeartbeat(false); am4.WaitForState(RMAppAttemptState.Failed); NUnit.Framework.Assert.IsTrue(!attempt4.ShouldCountTowardsMaxAttemptRetry()); NUnit.Framework.Assert.AreEqual(ContainerExitStatus.Aborted, appState.GetAttempt( am4.GetApplicationAttemptId()).GetAMContainerExitStatus()); // launch next AM in nm2 nm2.NodeHeartbeat(true); MockAM am5 = rm1.WaitForNewAMToLaunchAndRegister(app1.GetApplicationId(), 5, nm2); RMAppAttempt attempt5 = app1.GetCurrentAppAttempt(); NUnit.Framework.Assert.IsTrue(((RMAppAttemptImpl)attempt5).MayBeLastAttempt()); // fail the AM normally nm2.NodeHeartbeat(am5.GetApplicationAttemptId(), 1, ContainerState.Complete); am5.WaitForState(RMAppAttemptState.Failed); NUnit.Framework.Assert.IsTrue(attempt5.ShouldCountTowardsMaxAttemptRetry()); // AM should not be restarted. rm1.WaitForState(app1.GetApplicationId(), RMAppState.Failed); NUnit.Framework.Assert.AreEqual(5, app1.GetAppAttempts().Count); rm1.Stop(); }