/// <exception cref="System.Exception"/> public virtual void TestPublishAppAttemptMetrics() { ApplicationAttemptId appAttemptId = ApplicationAttemptId.NewInstance(ApplicationId .NewInstance(0, 1), 1); RMAppAttempt appAttempt = CreateRMAppAttempt(appAttemptId); metricsPublisher.AppAttemptRegistered(appAttempt, int.MaxValue + 1L); RMApp app = Org.Mockito.Mockito.Mock <RMApp>(); Org.Mockito.Mockito.When(app.GetFinalApplicationStatus()).ThenReturn(FinalApplicationStatus .Undefined); metricsPublisher.AppAttemptFinished(appAttempt, RMAppAttemptState.Finished, app, int.MaxValue + 2L); TimelineEntity entity = null; do { entity = store.GetEntity(appAttemptId.ToString(), AppAttemptMetricsConstants.EntityType , EnumSet.AllOf <TimelineReader.Field>()); }while (entity == null || entity.GetEvents().Count < 2); // ensure two events are both published before leaving the loop // verify all the fields NUnit.Framework.Assert.AreEqual(AppAttemptMetricsConstants.EntityType, entity.GetEntityType ()); NUnit.Framework.Assert.AreEqual(appAttemptId.ToString(), entity.GetEntityId()); NUnit.Framework.Assert.AreEqual(appAttemptId.GetApplicationId().ToString(), entity .GetPrimaryFilters()[AppAttemptMetricsConstants.ParentPrimaryFilter].GetEnumerator ().Next()); bool hasRegisteredEvent = false; bool hasFinishedEvent = false; foreach (TimelineEvent @event in entity.GetEvents()) { if (@event.GetEventType().Equals(AppAttemptMetricsConstants.RegisteredEventType)) { hasRegisteredEvent = true; NUnit.Framework.Assert.AreEqual(appAttempt.GetHost(), @event.GetEventInfo()[AppAttemptMetricsConstants .HostEventInfo]); NUnit.Framework.Assert.AreEqual(appAttempt.GetRpcPort(), @event.GetEventInfo()[AppAttemptMetricsConstants .RpcPortEventInfo]); NUnit.Framework.Assert.AreEqual(appAttempt.GetMasterContainer().GetId().ToString( ), @event.GetEventInfo()[AppAttemptMetricsConstants.MasterContainerEventInfo]); } else { if (@event.GetEventType().Equals(AppAttemptMetricsConstants.FinishedEventType)) { hasFinishedEvent = true; NUnit.Framework.Assert.AreEqual(appAttempt.GetDiagnostics(), @event.GetEventInfo( )[AppAttemptMetricsConstants.DiagnosticsInfoEventInfo]); NUnit.Framework.Assert.AreEqual(appAttempt.GetTrackingUrl(), @event.GetEventInfo( )[AppAttemptMetricsConstants.TrackingUrlEventInfo]); NUnit.Framework.Assert.AreEqual(appAttempt.GetOriginalTrackingUrl(), @event.GetEventInfo ()[AppAttemptMetricsConstants.OriginalTrackingUrlEventInfo]); NUnit.Framework.Assert.AreEqual(FinalApplicationStatus.Undefined.ToString(), @event .GetEventInfo()[AppAttemptMetricsConstants.FinalStatusEventInfo]); NUnit.Framework.Assert.AreEqual(YarnApplicationAttemptState.Finished.ToString(), @event.GetEventInfo()[AppAttemptMetricsConstants.StateEventInfo]); } } } NUnit.Framework.Assert.IsTrue(hasRegisteredEvent && hasFinishedEvent); }
public virtual void TestAppCleanup() { Logger rootLogger = LogManager.GetRootLogger(); rootLogger.SetLevel(Level.Debug); MockRM rm = new MockRM(); rm.Start(); MockNM nm1 = rm.RegisterNode("127.0.0.1:1234", 5000); RMApp app = rm.SubmitApp(2000); //kick the scheduling nm1.NodeHeartbeat(true); RMAppAttempt attempt = app.GetCurrentAppAttempt(); MockAM am = rm.SendAMLaunched(attempt.GetAppAttemptId()); am.RegisterAppAttempt(); //request for containers int request = 2; am.Allocate("127.0.0.1", 1000, request, new AList <ContainerId>()); //kick the scheduler nm1.NodeHeartbeat(true); IList <Container> conts = am.Allocate(new AList <ResourceRequest>(), new AList <ContainerId >()).GetAllocatedContainers(); int contReceived = conts.Count; int waitCount = 0; while (contReceived < request && waitCount++ < 200) { Log.Info("Got " + contReceived + " containers. Waiting to get " + request); Sharpen.Thread.Sleep(100); conts = am.Allocate(new AList <ResourceRequest>(), new AList <ContainerId>()).GetAllocatedContainers (); contReceived += conts.Count; nm1.NodeHeartbeat(true); } NUnit.Framework.Assert.AreEqual(request, contReceived); am.UnregisterAppAttempt(); NodeHeartbeatResponse resp = nm1.NodeHeartbeat(attempt.GetAppAttemptId(), 1, ContainerState .Complete); am.WaitForState(RMAppAttemptState.Finished); //currently only containers are cleaned via this //AM container is cleaned via container launcher resp = nm1.NodeHeartbeat(true); IList <ContainerId> containersToCleanup = resp.GetContainersToCleanup(); IList <ApplicationId> appsToCleanup = resp.GetApplicationsToCleanup(); int numCleanedContainers = containersToCleanup.Count; int numCleanedApps = appsToCleanup.Count; waitCount = 0; while ((numCleanedContainers < 2 || numCleanedApps < 1) && waitCount++ < 200) { Log.Info("Waiting to get cleanup events.. cleanedConts: " + numCleanedContainers + " cleanedApps: " + numCleanedApps); Sharpen.Thread.Sleep(100); resp = nm1.NodeHeartbeat(true); IList <ContainerId> deltaContainersToCleanup = resp.GetContainersToCleanup(); IList <ApplicationId> deltaAppsToCleanup = resp.GetApplicationsToCleanup(); // Add the deltas to the global list Sharpen.Collections.AddAll(containersToCleanup, deltaContainersToCleanup); Sharpen.Collections.AddAll(appsToCleanup, deltaAppsToCleanup); // Update counts now numCleanedContainers = containersToCleanup.Count; numCleanedApps = appsToCleanup.Count; } NUnit.Framework.Assert.AreEqual(1, appsToCleanup.Count); NUnit.Framework.Assert.AreEqual(app.GetApplicationId(), appsToCleanup[0]); NUnit.Framework.Assert.AreEqual(1, numCleanedApps); NUnit.Framework.Assert.AreEqual(2, numCleanedContainers); rm.Stop(); }
public virtual void RecoverContainersOnNode(IList <NMContainerStatus> containerReports , RMNode nm) { lock (this) { if (!rmContext.IsWorkPreservingRecoveryEnabled() || containerReports == null || ( containerReports != null && containerReports.IsEmpty())) { return; } foreach (NMContainerStatus container in containerReports) { ApplicationId appId = container.GetContainerId().GetApplicationAttemptId().GetApplicationId (); RMApp rmApp = rmContext.GetRMApps()[appId]; if (rmApp == null) { Log.Error("Skip recovering container " + container + " for unknown application."); KillOrphanContainerOnNode(nm, container); continue; } // Unmanaged AM recovery is addressed in YARN-1815 if (rmApp.GetApplicationSubmissionContext().GetUnmanagedAM()) { Log.Info("Skip recovering container " + container + " for unmanaged AM." + rmApp. GetApplicationId()); KillOrphanContainerOnNode(nm, container); continue; } SchedulerApplication <T> schedulerApp = applications[appId]; if (schedulerApp == null) { Log.Info("Skip recovering container " + container + " for unknown SchedulerApplication. Application current state is " + rmApp.GetState()); KillOrphanContainerOnNode(nm, container); continue; } Log.Info("Recovering container " + container); SchedulerApplicationAttempt schedulerAttempt = schedulerApp.GetCurrentAppAttempt( ); if (!rmApp.GetApplicationSubmissionContext().GetKeepContainersAcrossApplicationAttempts ()) { // Do not recover containers for stopped attempt or previous attempt. if (schedulerAttempt.IsStopped() || !schedulerAttempt.GetApplicationAttemptId().Equals (container.GetContainerId().GetApplicationAttemptId())) { Log.Info("Skip recovering container " + container + " for already stopped attempt." ); KillOrphanContainerOnNode(nm, container); continue; } } // create container RMContainer rmContainer = RecoverAndCreateContainer(container, nm); // recover RMContainer rmContainer.Handle(new RMContainerRecoverEvent(container.GetContainerId(), container )); // recover scheduler node nodes[nm.GetNodeID()].RecoverContainer(rmContainer); // recover queue: update headroom etc. Queue queue = schedulerAttempt.GetQueue(); queue.RecoverContainer(clusterResource, schedulerAttempt, rmContainer); // recover scheduler attempt schedulerAttempt.RecoverContainer(rmContainer); // set master container for the current running AMContainer for this // attempt. RMAppAttempt appAttempt = rmApp.GetCurrentAppAttempt(); if (appAttempt != null) { Container masterContainer = appAttempt.GetMasterContainer(); // Mark current running AMContainer's RMContainer based on the master // container ID stored in AppAttempt. if (masterContainer != null && masterContainer.GetId().Equals(rmContainer.GetContainerId ())) { ((RMContainerImpl)rmContainer).SetAMContainer(true); } } lock (schedulerAttempt) { ICollection <ContainerId> releases = schedulerAttempt.GetPendingRelease(); if (releases.Contains(container.GetContainerId())) { // release the container rmContainer.Handle(new RMContainerFinishedEvent(container.GetContainerId(), SchedulerUtils .CreateAbnormalContainerStatus(container.GetContainerId(), SchedulerUtils.ReleasedContainer ), RMContainerEventType.Released)); releases.Remove(container.GetContainerId()); Log.Info(container.GetContainerId() + " is released by application."); } } } } }
public virtual int Run(string[] args) { Log.Info("Starting ZKRMStateStorePerf ver." + version); int numApp = ZkPerfNumAppDefault; int numAppAttemptPerApp = ZkPerfNumAppattemptPerApp; string hostPort = null; bool launchLocalZK = true; if (args.Length == 0) { System.Console.Error.WriteLine("Missing arguments."); return(-1); } for (int i = 0; i < args.Length; i++) { // parse command line if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-appsize")) { numApp = System.Convert.ToInt32(args[++i]); } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-appattemptsize")) { numAppAttemptPerApp = System.Convert.ToInt32(args[++i]); } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-hostPort")) { hostPort = args[++i]; launchLocalZK = false; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-workingZnode")) { workingZnode = args[++i]; } else { System.Console.Error.WriteLine("Illegal argument: " + args[i]); return(-1); } } } } } if (launchLocalZK) { try { SetUp(); } catch (Exception e) { System.Console.Error.WriteLine("failed to setup. : " + e.Message); return(-1); } } InitStore(hostPort); long submitTime = Runtime.CurrentTimeMillis(); long startTime = Runtime.CurrentTimeMillis() + 1234; AList <ApplicationId> applicationIds = new AList <ApplicationId>(); AList <RMApp> rmApps = new AList <RMApp>(); AList <ApplicationAttemptId> attemptIds = new AList <ApplicationAttemptId>(); Dictionary <ApplicationId, ICollection <ApplicationAttemptId> > appIdsToAttemptId = new Dictionary <ApplicationId, ICollection <ApplicationAttemptId> >(); RMStateStoreTestBase.TestDispatcher dispatcher = new RMStateStoreTestBase.TestDispatcher (); store.SetRMDispatcher(dispatcher); for (int i_1 = 0; i_1 < numApp; i_1++) { ApplicationId appId = ApplicationId.NewInstance(clusterTimeStamp, i_1); applicationIds.AddItem(appId); AList <ApplicationAttemptId> attemptIdsForThisApp = new AList <ApplicationAttemptId >(); for (int j = 0; j < numAppAttemptPerApp; j++) { ApplicationAttemptId attemptId = ApplicationAttemptId.NewInstance(appId, j); attemptIdsForThisApp.AddItem(attemptId); } appIdsToAttemptId[appId] = new LinkedHashSet(attemptIdsForThisApp); Sharpen.Collections.AddAll(attemptIds, attemptIdsForThisApp); } foreach (ApplicationId appId_1 in applicationIds) { RMApp app = null; try { app = StoreApp(store, appId_1, submitTime, startTime); } catch (Exception e) { System.Console.Error.WriteLine("failed to create Application Znode. : " + e.Message ); return(-1); } WaitNotify(dispatcher); rmApps.AddItem(app); } foreach (ApplicationAttemptId attemptId_1 in attemptIds) { Org.Apache.Hadoop.Security.Token.Token <AMRMTokenIdentifier> tokenId = GenerateAMRMToken (attemptId_1, appTokenMgr); SecretKey clientTokenKey = clientToAMTokenMgr.CreateMasterKey(attemptId_1); try { StoreAttempt(store, attemptId_1, ContainerId.NewContainerId(attemptId_1, 0L).ToString (), tokenId, clientTokenKey, dispatcher); } catch (Exception e) { System.Console.Error.WriteLine("failed to create AppAttempt Znode. : " + e.Message ); return(-1); } } long storeStart = Runtime.CurrentTimeMillis(); try { store.LoadState(); } catch (Exception e) { System.Console.Error.WriteLine("failed to locaState from ZKRMStateStore. : " + e. Message); return(-1); } long storeEnd = Runtime.CurrentTimeMillis(); long loadTime = storeEnd - storeStart; string resultMsg = "ZKRMStateStore takes " + loadTime + " msec to loadState."; Log.Info(resultMsg); System.Console.Out.WriteLine(resultMsg); // cleanup try { foreach (RMApp app in rmApps) { ApplicationStateData appState = ApplicationStateData.NewInstance(app.GetSubmitTime (), app.GetStartTime(), app.GetApplicationSubmissionContext(), app.GetUser()); ApplicationId appId = app.GetApplicationId(); IDictionary m = Org.Mockito.Mockito.Mock <IDictionary>(); Org.Mockito.Mockito.When(m.Keys).ThenReturn(appIdsToAttemptId[appId_1]); appState.attempts = m; store.RemoveApplicationStateInternal(appState); } } catch (Exception e) { System.Console.Error.WriteLine("failed to cleanup. : " + e.Message); return(-1); } return(0); }
/// <exception cref="System.Exception"/> public virtual void TestClientTokenRace() { conf.Set(CommonConfigurationKeysPublic.HadoopSecurityAuthentication, "kerberos"); UserGroupInformation.SetConfiguration(conf); ContainerManagementProtocol containerManager = Org.Mockito.Mockito.Mock <ContainerManagementProtocol >(); StartContainersResponse mockResponse = Org.Mockito.Mockito.Mock <StartContainersResponse >(); Org.Mockito.Mockito.When(containerManager.StartContainers((StartContainersRequest )Matchers.Any())).ThenReturn(mockResponse); DrainDispatcher dispatcher = new DrainDispatcher(); MockRM rm = new _MockRMWithCustomAMLauncher_433(dispatcher, conf, containerManager ); rm.Start(); // Submit an app RMApp app = rm.SubmitApp(1024); // Set up a node. MockNM nm1 = rm.RegisterNode("localhost:1234", 3072); nm1.NodeHeartbeat(true); dispatcher.Await(); nm1.NodeHeartbeat(true); dispatcher.Await(); ApplicationAttemptId appAttempt = app.GetCurrentAppAttempt().GetAppAttemptId(); MockAM mockAM = new MockAM(rm.GetRMContext(), rm.GetApplicationMasterService(), app .GetCurrentAppAttempt().GetAppAttemptId()); UserGroupInformation appUgi = UserGroupInformation.CreateRemoteUser(appAttempt.ToString ()); RegisterApplicationMasterResponse response = appUgi.DoAs(new _PrivilegedAction_469 (mockAM)); // Get the app-report. GetApplicationReportRequest request = Org.Apache.Hadoop.Yarn.Util.Records.NewRecord <GetApplicationReportRequest>(); request.SetApplicationId(app.GetApplicationId()); GetApplicationReportResponse reportResponse = rm.GetClientRMService().GetApplicationReport (request); ApplicationReport appReport = reportResponse.GetApplicationReport(); Org.Apache.Hadoop.Yarn.Api.Records.Token originalClientToAMToken = appReport.GetClientToAMToken (); // ClientToAMToken master key should have been received on register // application master response. ByteBuffer clientMasterKey = response.GetClientToAMTokenMasterKey(); NUnit.Framework.Assert.IsNotNull(clientMasterKey); NUnit.Framework.Assert.IsTrue(((byte[])clientMasterKey.Array()).Length > 0); // Start the AM with the correct shared-secret. ApplicationAttemptId appAttemptId = app.GetAppAttempts().Keys.GetEnumerator().Next (); NUnit.Framework.Assert.IsNotNull(appAttemptId); TestClientToAMTokens.CustomAM am = new TestClientToAMTokens.CustomAM(appAttemptId , null); am.Init(conf); am.Start(); // Now the real test! // Set up clients to be able to pick up correct tokens. SecurityUtil.SetSecurityInfoProviders(new TestClientToAMTokens.CustomSecurityInfo ()); Org.Apache.Hadoop.Security.Token.Token <ClientToAMTokenIdentifier> token = ConverterUtils .ConvertFromYarn(originalClientToAMToken, am.address); // Schedule the key to be set after a significant delay Timer timer = new Timer(); TimerTask timerTask = new _TimerTask_516(am, clientMasterKey); timer.Schedule(timerTask, 250); // connect should pause waiting for the master key to arrive VerifyValidToken(conf, am, token); am.Stop(); rm.Stop(); }
/// <exception cref="System.Exception"/> public virtual void TestExcessReservationThanNodeManagerCapacity() { MockRM rm = new MockRM(conf); rm.Start(); // Register node1 MockNM nm1 = rm.RegisterNode("127.0.0.1:1234", 2 * Gb, 4); MockNM nm2 = rm.RegisterNode("127.0.0.1:2234", 3 * Gb, 4); nm1.NodeHeartbeat(true); nm2.NodeHeartbeat(true); // wait.. int waitCount = 20; int size = rm.GetRMContext().GetRMNodes().Count; while ((size = rm.GetRMContext().GetRMNodes().Count) != 2 && waitCount-- > 0) { Log.Info("Waiting for node managers to register : " + size); Sharpen.Thread.Sleep(100); } NUnit.Framework.Assert.AreEqual(2, rm.GetRMContext().GetRMNodes().Count); // Submit an application RMApp app1 = rm.SubmitApp(128); // kick the scheduling nm1.NodeHeartbeat(true); RMAppAttempt attempt1 = app1.GetCurrentAppAttempt(); MockAM am1 = rm.SendAMLaunched(attempt1.GetAppAttemptId()); am1.RegisterAppAttempt(); Log.Info("sending container requests "); am1.AddRequests(new string[] { "*" }, 2 * Gb, 1, 1); AllocateResponse alloc1Response = am1.Schedule(); // send the request // kick the scheduler nm1.NodeHeartbeat(true); int waitCounter = 20; Log.Info("heartbeating nm1"); while (alloc1Response.GetAllocatedContainers().Count < 1 && waitCounter-- > 0) { Log.Info("Waiting for containers to be created for app 1..."); Sharpen.Thread.Sleep(500); alloc1Response = am1.Schedule(); } Log.Info("received container : " + alloc1Response.GetAllocatedContainers().Count); // No container should be allocated. // Internally it should not been reserved. NUnit.Framework.Assert.IsTrue(alloc1Response.GetAllocatedContainers().Count == 0); Log.Info("heartbeating nm2"); waitCounter = 20; nm2.NodeHeartbeat(true); while (alloc1Response.GetAllocatedContainers().Count < 1 && waitCounter-- > 0) { Log.Info("Waiting for containers to be created for app 1..."); Sharpen.Thread.Sleep(500); alloc1Response = am1.Schedule(); } Log.Info("received container : " + alloc1Response.GetAllocatedContainers().Count); NUnit.Framework.Assert.IsTrue(alloc1Response.GetAllocatedContainers().Count == 1); rm.Stop(); }
/// <exception cref="System.Exception"/> public virtual void TestContainerAllocateWithDefaultQueueLabels() { // This test is pretty much similar to testContainerAllocateWithLabel. // Difference is, this test doesn't specify label expression in ResourceRequest, // instead, it uses default queue label expression // set node -> label mgr.AddToCluserNodeLabels(ImmutableSet.Of("x", "y")); mgr.AddLabelsToNode(ImmutableMap.Of(NodeId.NewInstance("h1", 0), ToSet("x"), NodeId .NewInstance("h2", 0), ToSet("y"))); // inject node label manager MockRM rm1 = new _MockRM_714(this, GetConfigurationWithDefaultQueueLabels(conf)); rm1.GetRMContext().SetNodeLabelManager(mgr); rm1.Start(); MockNM nm1 = rm1.RegisterNode("h1:1234", 8000); // label = x MockNM nm2 = rm1.RegisterNode("h2:1234", 8000); // label = y MockNM nm3 = rm1.RegisterNode("h3:1234", 8000); // label = <empty> ContainerId containerId; // launch an app to queue a1 (label = x), and check all container will // be allocated in h1 RMApp app1 = rm1.SubmitApp(200, "app", "user", null, "a1"); MockAM am1 = MockRM.LaunchAndRegisterAM(app1, rm1, nm1); // request a container. am1.Allocate("*", 1024, 1, new AList <ContainerId>()); containerId = ContainerId.NewContainerId(am1.GetApplicationAttemptId(), 2); NUnit.Framework.Assert.IsFalse(rm1.WaitForState(nm3, containerId, RMContainerState .Allocated, 10 * 1000)); NUnit.Framework.Assert.IsTrue(rm1.WaitForState(nm1, containerId, RMContainerState .Allocated, 10 * 1000)); CheckTaskContainersHost(am1.GetApplicationAttemptId(), containerId, rm1, "h1"); // launch an app to queue b1 (label = y), and check all container will // be allocated in h2 RMApp app2 = rm1.SubmitApp(200, "app", "user", null, "b1"); MockAM am2 = MockRM.LaunchAndRegisterAM(app2, rm1, nm2); // request a container. am2.Allocate("*", 1024, 1, new AList <ContainerId>()); containerId = ContainerId.NewContainerId(am2.GetApplicationAttemptId(), 2); NUnit.Framework.Assert.IsFalse(rm1.WaitForState(nm3, containerId, RMContainerState .Allocated, 10 * 1000)); NUnit.Framework.Assert.IsTrue(rm1.WaitForState(nm2, containerId, RMContainerState .Allocated, 10 * 1000)); CheckTaskContainersHost(am2.GetApplicationAttemptId(), containerId, rm1, "h2"); // launch an app to queue c1 (label = ""), and check all container will // be allocated in h3 RMApp app3 = rm1.SubmitApp(200, "app", "user", null, "c1"); MockAM am3 = MockRM.LaunchAndRegisterAM(app3, rm1, nm3); // request a container. am3.Allocate("*", 1024, 1, new AList <ContainerId>()); containerId = ContainerId.NewContainerId(am3.GetApplicationAttemptId(), 2); NUnit.Framework.Assert.IsFalse(rm1.WaitForState(nm2, containerId, RMContainerState .Allocated, 10 * 1000)); NUnit.Framework.Assert.IsTrue(rm1.WaitForState(nm3, containerId, RMContainerState .Allocated, 10 * 1000)); CheckTaskContainersHost(am3.GetApplicationAttemptId(), containerId, rm1, "h3"); rm1.Close(); }
/// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> /// <exception cref="System.IO.IOException"/> public virtual RegisterApplicationMasterResponse RegisterApplicationMaster(RegisterApplicationMasterRequest request) { AMRMTokenIdentifier amrmTokenIdentifier = AuthorizeRequest(); ApplicationAttemptId applicationAttemptId = amrmTokenIdentifier.GetApplicationAttemptId (); ApplicationId appID = applicationAttemptId.GetApplicationId(); ApplicationMasterService.AllocateResponseLock Lock = responseMap[applicationAttemptId ]; if (Lock == null) { RMAuditLogger.LogFailure(this.rmContext.GetRMApps()[appID].GetUser(), RMAuditLogger.AuditConstants .RegisterAm, "Application doesn't exist in cache " + applicationAttemptId, "ApplicationMasterService" , "Error in registering application master", appID, applicationAttemptId); ThrowApplicationDoesNotExistInCacheException(applicationAttemptId); } // Allow only one thread in AM to do registerApp at a time. lock (Lock) { AllocateResponse lastResponse = Lock.GetAllocateResponse(); if (HasApplicationMasterRegistered(applicationAttemptId)) { string message = "Application Master is already registered : " + appID; Log.Warn(message); RMAuditLogger.LogFailure(this.rmContext.GetRMApps()[appID].GetUser(), RMAuditLogger.AuditConstants .RegisterAm, string.Empty, "ApplicationMasterService", message, appID, applicationAttemptId ); throw new InvalidApplicationMasterRequestException(message); } this.amLivelinessMonitor.ReceivedPing(applicationAttemptId); RMApp app = this.rmContext.GetRMApps()[appID]; // Setting the response id to 0 to identify if the // application master is register for the respective attemptid lastResponse.SetResponseId(0); Lock.SetAllocateResponse(lastResponse); Log.Info("AM registration " + applicationAttemptId); this.rmContext.GetDispatcher().GetEventHandler().Handle(new RMAppAttemptRegistrationEvent (applicationAttemptId, request.GetHost(), request.GetRpcPort(), request.GetTrackingUrl ())); RMAuditLogger.LogSuccess(app.GetUser(), RMAuditLogger.AuditConstants.RegisterAm, "ApplicationMasterService", appID, applicationAttemptId); // Pick up min/max resource from scheduler... RegisterApplicationMasterResponse response = recordFactory.NewRecordInstance <RegisterApplicationMasterResponse >(); response.SetMaximumResourceCapability(rScheduler.GetMaximumResourceCapability(app .GetQueue())); response.SetApplicationACLs(app.GetRMAppAttempt(applicationAttemptId).GetSubmissionContext ().GetAMContainerSpec().GetApplicationACLs()); response.SetQueue(app.GetQueue()); if (UserGroupInformation.IsSecurityEnabled()) { Log.Info("Setting client token master key"); response.SetClientToAMTokenMasterKey(ByteBuffer.Wrap(rmContext.GetClientToAMTokenSecretManager ().GetMasterKey(applicationAttemptId).GetEncoded())); } // For work-preserving AM restart, retrieve previous attempts' containers // and corresponding NM tokens. if (app.GetApplicationSubmissionContext().GetKeepContainersAcrossApplicationAttempts ()) { IList <Container> transferredContainers = ((AbstractYarnScheduler)rScheduler).GetTransferredContainers (applicationAttemptId); if (!transferredContainers.IsEmpty()) { response.SetContainersFromPreviousAttempts(transferredContainers); IList <NMToken> nmTokens = new AList <NMToken>(); foreach (Container container in transferredContainers) { try { NMToken token = rmContext.GetNMTokenSecretManager().CreateAndGetNMToken(app.GetUser (), applicationAttemptId, container); if (null != token) { nmTokens.AddItem(token); } } catch (ArgumentException e) { // if it's a DNS issue, throw UnknowHostException directly and // that // will be automatically retried by RMProxy in RPC layer. if (e.InnerException is UnknownHostException) { throw (UnknownHostException)e.InnerException; } } } response.SetNMTokensFromPreviousAttempts(nmTokens); Log.Info("Application " + appID + " retrieved " + transferredContainers.Count + " containers from previous" + " attempts and " + nmTokens.Count + " NM tokens."); } } response.SetSchedulerResourceTypes(rScheduler.GetSchedulingResourceTypes()); return(response); } }
/// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> /// <exception cref="System.IO.IOException"/> public virtual AllocateResponse Allocate(AllocateRequest request) { AMRMTokenIdentifier amrmTokenIdentifier = AuthorizeRequest(); ApplicationAttemptId appAttemptId = amrmTokenIdentifier.GetApplicationAttemptId(); ApplicationId applicationId = appAttemptId.GetApplicationId(); this.amLivelinessMonitor.ReceivedPing(appAttemptId); /* check if its in cache */ ApplicationMasterService.AllocateResponseLock Lock = responseMap[appAttemptId]; if (Lock == null) { string message = "Application attempt " + appAttemptId + " doesn't exist in ApplicationMasterService cache."; Log.Error(message); throw new ApplicationAttemptNotFoundException(message); } lock (Lock) { AllocateResponse lastResponse = Lock.GetAllocateResponse(); if (!HasApplicationMasterRegistered(appAttemptId)) { string message = "AM is not registered for known application attempt: " + appAttemptId + " or RM had restarted after AM registered . AM should re-register."; Log.Info(message); RMAuditLogger.LogFailure(this.rmContext.GetRMApps()[appAttemptId.GetApplicationId ()].GetUser(), RMAuditLogger.AuditConstants.AmAllocate, string.Empty, "ApplicationMasterService" , message, applicationId, appAttemptId); throw new ApplicationMasterNotRegisteredException(message); } if ((request.GetResponseId() + 1) == lastResponse.GetResponseId()) { /* old heartbeat */ return(lastResponse); } else { if (request.GetResponseId() + 1 < lastResponse.GetResponseId()) { string message = "Invalid responseId in AllocateRequest from application attempt: " + appAttemptId + ", expect responseId to be " + (lastResponse.GetResponseId() + 1); throw new InvalidApplicationMasterRequestException(message); } } //filter illegal progress values float filteredProgress = request.GetProgress(); if (float.IsNaN(filteredProgress) || filteredProgress == float.NegativeInfinity || filteredProgress < 0) { request.SetProgress(0); } else { if (filteredProgress > 1 || filteredProgress == float.PositiveInfinity) { request.SetProgress(1); } } // Send the status update to the appAttempt. this.rmContext.GetDispatcher().GetEventHandler().Handle(new RMAppAttemptStatusupdateEvent (appAttemptId, request.GetProgress())); IList <ResourceRequest> ask = request.GetAskList(); IList <ContainerId> release = request.GetReleaseList(); ResourceBlacklistRequest blacklistRequest = request.GetResourceBlacklistRequest(); IList <string> blacklistAdditions = (blacklistRequest != null) ? blacklistRequest. GetBlacklistAdditions() : Sharpen.Collections.EmptyList; IList <string> blacklistRemovals = (blacklistRequest != null) ? blacklistRequest.GetBlacklistRemovals () : Sharpen.Collections.EmptyList; RMApp app = this.rmContext.GetRMApps()[applicationId]; // set label expression for Resource Requests if resourceName=ANY ApplicationSubmissionContext asc = app.GetApplicationSubmissionContext(); foreach (ResourceRequest req in ask) { if (null == req.GetNodeLabelExpression() && ResourceRequest.Any.Equals(req.GetResourceName ())) { req.SetNodeLabelExpression(asc.GetNodeLabelExpression()); } } // sanity check try { RMServerUtils.NormalizeAndValidateRequests(ask, rScheduler.GetMaximumResourceCapability (), app.GetQueue(), rScheduler, rmContext); } catch (InvalidResourceRequestException e) { Log.Warn("Invalid resource ask by application " + appAttemptId, e); throw; } try { RMServerUtils.ValidateBlacklistRequest(blacklistRequest); } catch (InvalidResourceBlacklistRequestException e) { Log.Warn("Invalid blacklist request by application " + appAttemptId, e); throw; } // In the case of work-preserving AM restart, it's possible for the // AM to release containers from the earlier attempt. if (!app.GetApplicationSubmissionContext().GetKeepContainersAcrossApplicationAttempts ()) { try { RMServerUtils.ValidateContainerReleaseRequest(release, appAttemptId); } catch (InvalidContainerReleaseException e) { Log.Warn("Invalid container release by application " + appAttemptId, e); throw; } } // Send new requests to appAttempt. Allocation allocation = this.rScheduler.Allocate(appAttemptId, ask, release, blacklistAdditions , blacklistRemovals); if (!blacklistAdditions.IsEmpty() || !blacklistRemovals.IsEmpty()) { Log.Info("blacklist are updated in Scheduler." + "blacklistAdditions: " + blacklistAdditions + ", " + "blacklistRemovals: " + blacklistRemovals); } RMAppAttempt appAttempt = app.GetRMAppAttempt(appAttemptId); AllocateResponse allocateResponse = recordFactory.NewRecordInstance <AllocateResponse >(); if (!allocation.GetContainers().IsEmpty()) { allocateResponse.SetNMTokens(allocation.GetNMTokens()); } // update the response with the deltas of node status changes IList <RMNode> updatedNodes = new AList <RMNode>(); if (app.PullRMNodeUpdates(updatedNodes) > 0) { IList <NodeReport> updatedNodeReports = new AList <NodeReport>(); foreach (RMNode rmNode in updatedNodes) { SchedulerNodeReport schedulerNodeReport = rScheduler.GetNodeReport(rmNode.GetNodeID ()); Resource used = BuilderUtils.NewResource(0, 0); int numContainers = 0; if (schedulerNodeReport != null) { used = schedulerNodeReport.GetUsedResource(); numContainers = schedulerNodeReport.GetNumContainers(); } NodeId nodeId = rmNode.GetNodeID(); NodeReport report = BuilderUtils.NewNodeReport(nodeId, rmNode.GetState(), rmNode. GetHttpAddress(), rmNode.GetRackName(), used, rmNode.GetTotalCapability(), numContainers , rmNode.GetHealthReport(), rmNode.GetLastHealthReportTime(), rmNode.GetNodeLabels ()); updatedNodeReports.AddItem(report); } allocateResponse.SetUpdatedNodes(updatedNodeReports); } allocateResponse.SetAllocatedContainers(allocation.GetContainers()); allocateResponse.SetCompletedContainersStatuses(appAttempt.PullJustFinishedContainers ()); allocateResponse.SetResponseId(lastResponse.GetResponseId() + 1); allocateResponse.SetAvailableResources(allocation.GetResourceLimit()); allocateResponse.SetNumClusterNodes(this.rScheduler.GetNumClusterNodes()); // add preemption to the allocateResponse message (if any) allocateResponse.SetPreemptionMessage(GeneratePreemptionMessage(allocation)); // update AMRMToken if the token is rolled-up MasterKeyData nextMasterKey = this.rmContext.GetAMRMTokenSecretManager().GetNextMasterKeyData (); if (nextMasterKey != null && nextMasterKey.GetMasterKey().GetKeyId() != amrmTokenIdentifier .GetKeyId()) { RMAppAttemptImpl appAttemptImpl = (RMAppAttemptImpl)appAttempt; Org.Apache.Hadoop.Security.Token.Token <AMRMTokenIdentifier> amrmToken = appAttempt .GetAMRMToken(); if (nextMasterKey.GetMasterKey().GetKeyId() != appAttemptImpl.GetAMRMTokenKeyId()) { Log.Info("The AMRMToken has been rolled-over. Send new AMRMToken back" + " to application: " + applicationId); amrmToken = rmContext.GetAMRMTokenSecretManager().CreateAndGetAMRMToken(appAttemptId ); appAttemptImpl.SetAMRMToken(amrmToken); } allocateResponse.SetAMRMToken(Org.Apache.Hadoop.Yarn.Api.Records.Token.NewInstance (amrmToken.GetIdentifier(), amrmToken.GetKind().ToString(), amrmToken.GetPassword (), amrmToken.GetService().ToString())); } /* * As we are updating the response inside the lock object so we don't * need to worry about unregister call occurring in between (which * removes the lock object). */ Lock.SetAllocateResponse(allocateResponse); return(allocateResponse); } }
/// <exception cref="System.Exception"/> internal virtual void TestRMAppStateStore(RMStateStoreTestBase.RMStateStoreHelper stateStoreHelper, RMStateStoreTestBase.StoreStateVerifier verifier) { long submitTime = Runtime.CurrentTimeMillis(); long startTime = Runtime.CurrentTimeMillis() + 1234; Configuration conf = new YarnConfiguration(); RMStateStore store = stateStoreHelper.GetRMStateStore(); RMStateStoreTestBase.TestDispatcher dispatcher = new RMStateStoreTestBase.TestDispatcher (); store.SetRMDispatcher(dispatcher); RMContext rmContext = Org.Mockito.Mockito.Mock <RMContext>(); Org.Mockito.Mockito.When(rmContext.GetStateStore()).ThenReturn(store); AMRMTokenSecretManager appTokenMgr = Org.Mockito.Mockito.Spy(new AMRMTokenSecretManager (conf, rmContext)); MasterKeyData masterKeyData = appTokenMgr.CreateNewMasterKey(); Org.Mockito.Mockito.When(appTokenMgr.GetMasterKey()).ThenReturn(masterKeyData); ClientToAMTokenSecretManagerInRM clientToAMTokenMgr = new ClientToAMTokenSecretManagerInRM (); ApplicationAttemptId attemptId1 = ConverterUtils.ToApplicationAttemptId("appattempt_1352994193343_0001_000001" ); ApplicationId appId1 = attemptId1.GetApplicationId(); StoreApp(store, appId1, submitTime, startTime); verifier.AfterStoreApp(store, appId1); // create application token and client token key for attempt1 Org.Apache.Hadoop.Security.Token.Token <AMRMTokenIdentifier> appAttemptToken1 = GenerateAMRMToken (attemptId1, appTokenMgr); SecretKey clientTokenKey1 = clientToAMTokenMgr.CreateMasterKey(attemptId1); ContainerId containerId1 = StoreAttempt(store, attemptId1, "container_1352994193343_0001_01_000001" , appAttemptToken1, clientTokenKey1, dispatcher); string appAttemptIdStr2 = "appattempt_1352994193343_0001_000002"; ApplicationAttemptId attemptId2 = ConverterUtils.ToApplicationAttemptId(appAttemptIdStr2 ); // create application token and client token key for attempt2 Org.Apache.Hadoop.Security.Token.Token <AMRMTokenIdentifier> appAttemptToken2 = GenerateAMRMToken (attemptId2, appTokenMgr); SecretKey clientTokenKey2 = clientToAMTokenMgr.CreateMasterKey(attemptId2); ContainerId containerId2 = StoreAttempt(store, attemptId2, "container_1352994193343_0001_02_000001" , appAttemptToken2, clientTokenKey2, dispatcher); ApplicationAttemptId attemptIdRemoved = ConverterUtils.ToApplicationAttemptId("appattempt_1352994193343_0002_000001" ); ApplicationId appIdRemoved = attemptIdRemoved.GetApplicationId(); StoreApp(store, appIdRemoved, submitTime, startTime); StoreAttempt(store, attemptIdRemoved, "container_1352994193343_0002_01_000001", null , null, dispatcher); verifier.AfterStoreAppAttempt(store, attemptIdRemoved); RMApp mockRemovedApp = Org.Mockito.Mockito.Mock <RMApp>(); RMAppAttemptMetrics mockRmAppAttemptMetrics = Org.Mockito.Mockito.Mock <RMAppAttemptMetrics >(); Dictionary <ApplicationAttemptId, RMAppAttempt> attempts = new Dictionary <ApplicationAttemptId , RMAppAttempt>(); ApplicationSubmissionContext context = new ApplicationSubmissionContextPBImpl(); context.SetApplicationId(appIdRemoved); Org.Mockito.Mockito.When(mockRemovedApp.GetSubmitTime()).ThenReturn(submitTime); Org.Mockito.Mockito.When(mockRemovedApp.GetApplicationSubmissionContext()).ThenReturn (context); Org.Mockito.Mockito.When(mockRemovedApp.GetAppAttempts()).ThenReturn(attempts); Org.Mockito.Mockito.When(mockRemovedApp.GetUser()).ThenReturn("user1"); RMAppAttempt mockRemovedAttempt = Org.Mockito.Mockito.Mock <RMAppAttempt>(); Org.Mockito.Mockito.When(mockRemovedAttempt.GetAppAttemptId()).ThenReturn(attemptIdRemoved ); Org.Mockito.Mockito.When(mockRemovedAttempt.GetRMAppAttemptMetrics()).ThenReturn( mockRmAppAttemptMetrics); Org.Mockito.Mockito.When(mockRmAppAttemptMetrics.GetAggregateAppResourceUsage()). ThenReturn(new AggregateAppResourceUsage(0, 0)); attempts[attemptIdRemoved] = mockRemovedAttempt; store.RemoveApplication(mockRemovedApp); // remove application directory recursively. StoreApp(store, appIdRemoved, submitTime, startTime); StoreAttempt(store, attemptIdRemoved, "container_1352994193343_0002_01_000001", null , null, dispatcher); store.RemoveApplication(mockRemovedApp); // let things settle down Sharpen.Thread.Sleep(1000); store.Close(); // give tester a chance to modify app state in the store ModifyAppState(); // load state store = stateStoreHelper.GetRMStateStore(); store.SetRMDispatcher(dispatcher); RMStateStore.RMState state = store.LoadState(); IDictionary <ApplicationId, ApplicationStateData> rmAppState = state.GetApplicationState (); ApplicationStateData appState = rmAppState[appId1]; // app is loaded NUnit.Framework.Assert.IsNotNull(appState); // app is loaded correctly NUnit.Framework.Assert.AreEqual(submitTime, appState.GetSubmitTime()); NUnit.Framework.Assert.AreEqual(startTime, appState.GetStartTime()); // submission context is loaded correctly NUnit.Framework.Assert.AreEqual(appId1, appState.GetApplicationSubmissionContext( ).GetApplicationId()); ApplicationAttemptStateData attemptState = appState.GetAttempt(attemptId1); // attempt1 is loaded correctly NUnit.Framework.Assert.IsNotNull(attemptState); NUnit.Framework.Assert.AreEqual(attemptId1, attemptState.GetAttemptId()); NUnit.Framework.Assert.AreEqual(-1000, attemptState.GetAMContainerExitStatus()); // attempt1 container is loaded correctly NUnit.Framework.Assert.AreEqual(containerId1, attemptState.GetMasterContainer().GetId ()); // attempt1 client token master key is loaded correctly Assert.AssertArrayEquals(clientTokenKey1.GetEncoded(), attemptState.GetAppAttemptTokens ().GetSecretKey(RMStateStore.AmClientTokenMasterKeyName)); attemptState = appState.GetAttempt(attemptId2); // attempt2 is loaded correctly NUnit.Framework.Assert.IsNotNull(attemptState); NUnit.Framework.Assert.AreEqual(attemptId2, attemptState.GetAttemptId()); // attempt2 container is loaded correctly NUnit.Framework.Assert.AreEqual(containerId2, attemptState.GetMasterContainer().GetId ()); // attempt2 client token master key is loaded correctly Assert.AssertArrayEquals(clientTokenKey2.GetEncoded(), attemptState.GetAppAttemptTokens ().GetSecretKey(RMStateStore.AmClientTokenMasterKeyName)); //******* update application/attempt state *******// ApplicationStateData appState2 = ApplicationStateData.NewInstance(appState.GetSubmitTime (), appState.GetStartTime(), appState.GetUser(), appState.GetApplicationSubmissionContext (), RMAppState.Finished, "appDiagnostics", 1234); appState2.attempts.PutAll(appState.attempts); store.UpdateApplicationState(appState2); ApplicationAttemptStateData oldAttemptState = attemptState; ApplicationAttemptStateData newAttemptState = ApplicationAttemptStateData.NewInstance (oldAttemptState.GetAttemptId(), oldAttemptState.GetMasterContainer(), oldAttemptState .GetAppAttemptTokens(), oldAttemptState.GetStartTime(), RMAppAttemptState.Finished , "myTrackingUrl", "attemptDiagnostics", FinalApplicationStatus.Succeeded, 100, oldAttemptState.GetFinishTime(), 0, 0); store.UpdateApplicationAttemptState(newAttemptState); // test updating the state of an app/attempt whose initial state was not // saved. ApplicationId dummyAppId = ApplicationId.NewInstance(1234, 10); ApplicationSubmissionContext dummyContext = new ApplicationSubmissionContextPBImpl (); dummyContext.SetApplicationId(dummyAppId); ApplicationStateData dummyApp = ApplicationStateData.NewInstance(appState.GetSubmitTime (), appState.GetStartTime(), appState.GetUser(), dummyContext, RMAppState.Finished , "appDiagnostics", 1234); store.UpdateApplicationState(dummyApp); ApplicationAttemptId dummyAttemptId = ApplicationAttemptId.NewInstance(dummyAppId , 6); ApplicationAttemptStateData dummyAttempt = ApplicationAttemptStateData.NewInstance (dummyAttemptId, oldAttemptState.GetMasterContainer(), oldAttemptState.GetAppAttemptTokens (), oldAttemptState.GetStartTime(), RMAppAttemptState.Finished, "myTrackingUrl", "attemptDiagnostics", FinalApplicationStatus.Succeeded, 111, oldAttemptState.GetFinishTime (), 0, 0); store.UpdateApplicationAttemptState(dummyAttempt); // let things settle down Sharpen.Thread.Sleep(1000); store.Close(); // check updated application state. store = stateStoreHelper.GetRMStateStore(); store.SetRMDispatcher(dispatcher); RMStateStore.RMState newRMState = store.LoadState(); IDictionary <ApplicationId, ApplicationStateData> newRMAppState = newRMState.GetApplicationState (); NUnit.Framework.Assert.IsNotNull(newRMAppState[dummyApp.GetApplicationSubmissionContext ().GetApplicationId()]); ApplicationStateData updatedAppState = newRMAppState[appId1]; NUnit.Framework.Assert.AreEqual(appState.GetApplicationSubmissionContext().GetApplicationId (), updatedAppState.GetApplicationSubmissionContext().GetApplicationId()); NUnit.Framework.Assert.AreEqual(appState.GetSubmitTime(), updatedAppState.GetSubmitTime ()); NUnit.Framework.Assert.AreEqual(appState.GetStartTime(), updatedAppState.GetStartTime ()); NUnit.Framework.Assert.AreEqual(appState.GetUser(), updatedAppState.GetUser()); // new app state fields NUnit.Framework.Assert.AreEqual(RMAppState.Finished, updatedAppState.GetState()); NUnit.Framework.Assert.AreEqual("appDiagnostics", updatedAppState.GetDiagnostics( )); NUnit.Framework.Assert.AreEqual(1234, updatedAppState.GetFinishTime()); // check updated attempt state NUnit.Framework.Assert.IsNotNull(newRMAppState[dummyApp.GetApplicationSubmissionContext ().GetApplicationId()].GetAttempt(dummyAttemptId)); ApplicationAttemptStateData updatedAttemptState = updatedAppState.GetAttempt(newAttemptState .GetAttemptId()); NUnit.Framework.Assert.AreEqual(oldAttemptState.GetAttemptId(), updatedAttemptState .GetAttemptId()); NUnit.Framework.Assert.AreEqual(containerId2, updatedAttemptState.GetMasterContainer ().GetId()); Assert.AssertArrayEquals(clientTokenKey2.GetEncoded(), attemptState.GetAppAttemptTokens ().GetSecretKey(RMStateStore.AmClientTokenMasterKeyName)); // new attempt state fields NUnit.Framework.Assert.AreEqual(RMAppAttemptState.Finished, updatedAttemptState.GetState ()); NUnit.Framework.Assert.AreEqual("myTrackingUrl", updatedAttemptState.GetFinalTrackingUrl ()); NUnit.Framework.Assert.AreEqual("attemptDiagnostics", updatedAttemptState.GetDiagnostics ()); NUnit.Framework.Assert.AreEqual(100, updatedAttemptState.GetAMContainerExitStatus ()); NUnit.Framework.Assert.AreEqual(FinalApplicationStatus.Succeeded, updatedAttemptState .GetFinalApplicationStatus()); // assert store is in expected state after everything is cleaned NUnit.Framework.Assert.IsTrue(stateStoreHelper.IsFinalStateValid()); store.Close(); }
public virtual void TestReleaseWhileRunning() { DrainDispatcher drainDispatcher = new DrainDispatcher(); EventHandler <RMAppAttemptEvent> appAttemptEventHandler = Org.Mockito.Mockito.Mock <EventHandler>(); EventHandler generic = Org.Mockito.Mockito.Mock <EventHandler>(); drainDispatcher.Register(typeof(RMAppAttemptEventType), appAttemptEventHandler); drainDispatcher.Register(typeof(RMNodeEventType), generic); drainDispatcher.Init(new YarnConfiguration()); drainDispatcher.Start(); NodeId nodeId = BuilderUtils.NewNodeId("host", 3425); ApplicationId appId = BuilderUtils.NewApplicationId(1, 1); ApplicationAttemptId appAttemptId = BuilderUtils.NewApplicationAttemptId(appId, 1 ); ContainerId containerId = BuilderUtils.NewContainerId(appAttemptId, 1); ContainerAllocationExpirer expirer = Org.Mockito.Mockito.Mock <ContainerAllocationExpirer >(); Resource resource = BuilderUtils.NewResource(512, 1); Priority priority = BuilderUtils.NewPriority(5); Container container = BuilderUtils.NewContainer(containerId, nodeId, "host:3465", resource, priority, null); ConcurrentMap <ApplicationId, RMApp> rmApps = Org.Mockito.Mockito.Spy(new ConcurrentHashMap <ApplicationId, RMApp>()); RMApp rmApp = Org.Mockito.Mockito.Mock <RMApp>(); Org.Mockito.Mockito.When(rmApp.GetRMAppAttempt((ApplicationAttemptId)Matchers.Any ())).ThenReturn(null); Org.Mockito.Mockito.DoReturn(rmApp).When(rmApps)[(ApplicationId)Matchers.Any()]; RMApplicationHistoryWriter writer = Org.Mockito.Mockito.Mock <RMApplicationHistoryWriter >(); SystemMetricsPublisher publisher = Org.Mockito.Mockito.Mock <SystemMetricsPublisher >(); RMContext rmContext = Org.Mockito.Mockito.Mock <RMContext>(); Org.Mockito.Mockito.When(rmContext.GetDispatcher()).ThenReturn(drainDispatcher); Org.Mockito.Mockito.When(rmContext.GetContainerAllocationExpirer()).ThenReturn(expirer ); Org.Mockito.Mockito.When(rmContext.GetRMApplicationHistoryWriter()).ThenReturn(writer ); Org.Mockito.Mockito.When(rmContext.GetRMApps()).ThenReturn(rmApps); Org.Mockito.Mockito.When(rmContext.GetSystemMetricsPublisher()).ThenReturn(publisher ); Org.Mockito.Mockito.When(rmContext.GetYarnConfiguration()).ThenReturn(new YarnConfiguration ()); RMContainer rmContainer = new RMContainerImpl(container, appAttemptId, nodeId, "user" , rmContext); NUnit.Framework.Assert.AreEqual(RMContainerState.New, rmContainer.GetState()); NUnit.Framework.Assert.AreEqual(resource, rmContainer.GetAllocatedResource()); NUnit.Framework.Assert.AreEqual(nodeId, rmContainer.GetAllocatedNode()); NUnit.Framework.Assert.AreEqual(priority, rmContainer.GetAllocatedPriority()); Org.Mockito.Mockito.Verify(writer).ContainerStarted(Matchers.Any <RMContainer>()); Org.Mockito.Mockito.Verify(publisher).ContainerCreated(Matchers.Any <RMContainer>( ), Matchers.AnyLong()); rmContainer.Handle(new RMContainerEvent(containerId, RMContainerEventType.Start)); drainDispatcher.Await(); NUnit.Framework.Assert.AreEqual(RMContainerState.Allocated, rmContainer.GetState( )); rmContainer.Handle(new RMContainerEvent(containerId, RMContainerEventType.Acquired )); drainDispatcher.Await(); NUnit.Framework.Assert.AreEqual(RMContainerState.Acquired, rmContainer.GetState() ); rmContainer.Handle(new RMContainerEvent(containerId, RMContainerEventType.Launched )); drainDispatcher.Await(); NUnit.Framework.Assert.AreEqual(RMContainerState.Running, rmContainer.GetState()); NUnit.Framework.Assert.AreEqual("http://host:3465/node/containerlogs/container_1_0001_01_000001/user" , rmContainer.GetLogURL()); // In RUNNING state. Verify RELEASED and associated actions. Org.Mockito.Mockito.Reset(appAttemptEventHandler); ContainerStatus containerStatus = SchedulerUtils.CreateAbnormalContainerStatus(containerId , SchedulerUtils.ReleasedContainer); rmContainer.Handle(new RMContainerFinishedEvent(containerId, containerStatus, RMContainerEventType .Released)); drainDispatcher.Await(); NUnit.Framework.Assert.AreEqual(RMContainerState.Released, rmContainer.GetState() ); NUnit.Framework.Assert.AreEqual(SchedulerUtils.ReleasedContainer, rmContainer.GetDiagnosticsInfo ()); NUnit.Framework.Assert.AreEqual(ContainerExitStatus.Aborted, rmContainer.GetContainerExitStatus ()); NUnit.Framework.Assert.AreEqual(ContainerState.Complete, rmContainer.GetContainerState ()); Org.Mockito.Mockito.Verify(writer).ContainerFinished(Matchers.Any <RMContainer>()); Org.Mockito.Mockito.Verify(publisher).ContainerFinished(Matchers.Any <RMContainer> (), Matchers.AnyLong()); ArgumentCaptor <RMAppAttemptContainerFinishedEvent> captor = ArgumentCaptor.ForClass <RMAppAttemptContainerFinishedEvent>(); Org.Mockito.Mockito.Verify(appAttemptEventHandler).Handle(captor.Capture()); RMAppAttemptContainerFinishedEvent cfEvent = captor.GetValue(); NUnit.Framework.Assert.AreEqual(appAttemptId, cfEvent.GetApplicationAttemptId()); NUnit.Framework.Assert.AreEqual(containerStatus, cfEvent.GetContainerStatus()); NUnit.Framework.Assert.AreEqual(RMAppAttemptEventType.ContainerFinished, cfEvent. GetType()); // In RELEASED state. A FINIHSED event may come in. rmContainer.Handle(new RMContainerFinishedEvent(containerId, SchedulerUtils.CreateAbnormalContainerStatus (containerId, "FinishedContainer"), RMContainerEventType.Finished)); NUnit.Framework.Assert.AreEqual(RMContainerState.Released, rmContainer.GetState() ); }
public virtual void TestUnauthorizedAccess() { TestAMAuthorization.MyContainerManager containerManager = new TestAMAuthorization.MyContainerManager (); rm = new TestAMAuthorization.MockRMWithAMS(conf, containerManager); rm.Start(); MockNM nm1 = rm.RegisterNode("localhost:1234", 5120); RMApp app = rm.SubmitApp(1024); nm1.NodeHeartbeat(true); int waitCount = 0; while (containerManager.containerTokens == null && waitCount++ < 40) { Log.Info("Waiting for AM Launch to happen.."); Sharpen.Thread.Sleep(1000); } NUnit.Framework.Assert.IsNotNull(containerManager.containerTokens); RMAppAttempt attempt = app.GetCurrentAppAttempt(); ApplicationAttemptId applicationAttemptId = attempt.GetAppAttemptId(); WaitForLaunchedState(attempt); Configuration conf = rm.GetConfig(); YarnRPC rpc = YarnRPC.Create(conf); IPEndPoint serviceAddr = conf.GetSocketAddr(YarnConfiguration.RmSchedulerAddress, YarnConfiguration.DefaultRmSchedulerAddress, YarnConfiguration.DefaultRmSchedulerPort ); UserGroupInformation currentUser = UserGroupInformation.CreateRemoteUser(applicationAttemptId .ToString()); // First try contacting NM without tokens ApplicationMasterProtocol client = currentUser.DoAs(new _PrivilegedAction_262(rpc , serviceAddr, conf)); RegisterApplicationMasterRequest request = Org.Apache.Hadoop.Yarn.Util.Records.NewRecord <RegisterApplicationMasterRequest>(); try { client.RegisterApplicationMaster(request); NUnit.Framework.Assert.Fail("Should fail with authorization error"); } catch (Exception e) { if (IsCause(typeof(AccessControlException), e)) { // Because there are no tokens, the request should be rejected as the // server side will assume we are trying simple auth. string expectedMessage = string.Empty; if (UserGroupInformation.IsSecurityEnabled()) { expectedMessage = "Client cannot authenticate via:[TOKEN]"; } else { expectedMessage = "SIMPLE authentication is not enabled. Available:[TOKEN]"; } NUnit.Framework.Assert.IsTrue(e.InnerException.Message.Contains(expectedMessage)); } else { throw; } } }
/// <summary>create a summary of the application's runtime.</summary> /// <param name="app"> /// /// <see cref="Org.Apache.Hadoop.Yarn.Server.Resourcemanager.Rmapp.RMApp"/> /// whose summary is to be created, cannot /// be <code>null</code>. /// </param> public static RMAppManager.ApplicationSummary.SummaryBuilder CreateAppSummary(RMApp app) { string trackingUrl = "N/A"; string host = "N/A"; RMAppAttempt attempt = app.GetCurrentAppAttempt(); if (attempt != null) { trackingUrl = attempt.GetTrackingUrl(); host = attempt.GetHost(); } RMAppMetrics metrics = app.GetRMAppMetrics(); RMAppManager.ApplicationSummary.SummaryBuilder summary = new RMAppManager.ApplicationSummary.SummaryBuilder ().Add("appId", app.GetApplicationId()).Add("name", app.GetName()).Add("user", app .GetUser()).Add("queue", app.GetQueue()).Add("state", app.GetState()).Add("trackingUrl" , trackingUrl).Add("appMasterHost", host).Add("startTime", app.GetStartTime()).Add ("finishTime", app.GetFinishTime()).Add("finalStatus", app.GetFinalApplicationStatus ()).Add("memorySeconds", metrics.GetMemorySeconds()).Add("vcoreSeconds", metrics .GetVcoreSeconds()).Add("preemptedAMContainers", metrics.GetNumAMContainersPreempted ()).Add("preemptedNonAMContainers", metrics.GetNumNonAMContainersPreempted()).Add ("preemptedResources", metrics.GetResourcePreempted()).Add("applicationType", app .GetApplicationType()); return(summary); }
/// <exception cref="System.Exception"/> public virtual void TestPublishApplicationMetrics() { for (int i = 1; i <= 2; ++i) { ApplicationId appId = ApplicationId.NewInstance(0, i); RMApp app = CreateRMApp(appId); metricsPublisher.AppCreated(app, app.GetStartTime()); metricsPublisher.AppFinished(app, RMAppState.Finished, app.GetFinishTime()); if (i == 1) { metricsPublisher.AppACLsUpdated(app, "uers1,user2", 4L); } else { // in case user doesn't specify the ACLs metricsPublisher.AppACLsUpdated(app, null, 4L); } TimelineEntity entity = null; do { entity = store.GetEntity(appId.ToString(), ApplicationMetricsConstants.EntityType , EnumSet.AllOf <TimelineReader.Field>()); }while (entity == null || entity.GetEvents().Count < 3); // ensure three events are both published before leaving the loop // verify all the fields NUnit.Framework.Assert.AreEqual(ApplicationMetricsConstants.EntityType, entity.GetEntityType ()); NUnit.Framework.Assert.AreEqual(app.GetApplicationId().ToString(), entity.GetEntityId ()); NUnit.Framework.Assert.AreEqual(app.GetName(), entity.GetOtherInfo()[ApplicationMetricsConstants .NameEntityInfo]); NUnit.Framework.Assert.AreEqual(app.GetQueue(), entity.GetOtherInfo()[ApplicationMetricsConstants .QueueEntityInfo]); NUnit.Framework.Assert.AreEqual(app.GetUser(), entity.GetOtherInfo()[ApplicationMetricsConstants .UserEntityInfo]); NUnit.Framework.Assert.AreEqual(app.GetApplicationType(), entity.GetOtherInfo()[ApplicationMetricsConstants .TypeEntityInfo]); NUnit.Framework.Assert.AreEqual(app.GetSubmitTime(), entity.GetOtherInfo()[ApplicationMetricsConstants .SubmittedTimeEntityInfo]); if (i == 1) { NUnit.Framework.Assert.AreEqual("uers1,user2", entity.GetOtherInfo()[ApplicationMetricsConstants .AppViewAclsEntityInfo]); } else { NUnit.Framework.Assert.AreEqual(string.Empty, entity.GetOtherInfo()[ApplicationMetricsConstants .AppViewAclsEntityInfo]); NUnit.Framework.Assert.AreEqual(app.GetRMAppMetrics().GetMemorySeconds(), long.Parse (entity.GetOtherInfo()[ApplicationMetricsConstants.AppMemMetrics].ToString())); NUnit.Framework.Assert.AreEqual(app.GetRMAppMetrics().GetVcoreSeconds(), long.Parse (entity.GetOtherInfo()[ApplicationMetricsConstants.AppCpuMetrics].ToString())); } bool hasCreatedEvent = false; bool hasFinishedEvent = false; bool hasACLsUpdatedEvent = false; foreach (TimelineEvent @event in entity.GetEvents()) { if (@event.GetEventType().Equals(ApplicationMetricsConstants.CreatedEventType)) { hasCreatedEvent = true; NUnit.Framework.Assert.AreEqual(app.GetStartTime(), @event.GetTimestamp()); } else { if (@event.GetEventType().Equals(ApplicationMetricsConstants.FinishedEventType)) { hasFinishedEvent = true; NUnit.Framework.Assert.AreEqual(app.GetFinishTime(), @event.GetTimestamp()); NUnit.Framework.Assert.AreEqual(app.GetDiagnostics().ToString(), @event.GetEventInfo ()[ApplicationMetricsConstants.DiagnosticsInfoEventInfo]); NUnit.Framework.Assert.AreEqual(app.GetFinalApplicationStatus().ToString(), @event .GetEventInfo()[ApplicationMetricsConstants.FinalStatusEventInfo]); NUnit.Framework.Assert.AreEqual(YarnApplicationState.Finished.ToString(), @event. GetEventInfo()[ApplicationMetricsConstants.StateEventInfo]); } else { if (@event.GetEventType().Equals(ApplicationMetricsConstants.AclsUpdatedEventType )) { hasACLsUpdatedEvent = true; NUnit.Framework.Assert.AreEqual(4L, @event.GetTimestamp()); } } } } NUnit.Framework.Assert.IsTrue(hasCreatedEvent && hasFinishedEvent && hasACLsUpdatedEvent ); } }
public virtual void TestResourceOverCommit() { MockRM rm = new MockRM(conf); rm.Start(); MockNM nm1 = rm.RegisterNode("127.0.0.1:1234", 4 * Gb); RMApp app1 = rm.SubmitApp(2048); // kick the scheduling, 2 GB given to AM1, remaining 2GB on nm1 nm1.NodeHeartbeat(true); RMAppAttempt attempt1 = app1.GetCurrentAppAttempt(); MockAM am1 = rm.SendAMLaunched(attempt1.GetAppAttemptId()); am1.RegisterAppAttempt(); SchedulerNodeReport report_nm1 = rm.GetResourceScheduler().GetNodeReport(nm1.GetNodeId ()); // check node report, 2 GB used and 2 GB available NUnit.Framework.Assert.AreEqual(2 * Gb, report_nm1.GetUsedResource().GetMemory()); NUnit.Framework.Assert.AreEqual(2 * Gb, report_nm1.GetAvailableResource().GetMemory ()); // add request for containers am1.AddRequests(new string[] { "127.0.0.1", "127.0.0.2" }, 2 * Gb, 1, 1); AllocateResponse alloc1Response = am1.Schedule(); // send the request // kick the scheduler, 2 GB given to AM1, resource remaining 0 nm1.NodeHeartbeat(true); while (alloc1Response.GetAllocatedContainers().Count < 1) { Log.Info("Waiting for containers to be created for app 1..."); Sharpen.Thread.Sleep(1000); alloc1Response = am1.Schedule(); } IList <Container> allocated1 = alloc1Response.GetAllocatedContainers(); NUnit.Framework.Assert.AreEqual(1, allocated1.Count); NUnit.Framework.Assert.AreEqual(2 * Gb, allocated1[0].GetResource().GetMemory()); NUnit.Framework.Assert.AreEqual(nm1.GetNodeId(), allocated1[0].GetNodeId()); report_nm1 = rm.GetResourceScheduler().GetNodeReport(nm1.GetNodeId()); // check node report, 4 GB used and 0 GB available NUnit.Framework.Assert.AreEqual(0, report_nm1.GetAvailableResource().GetMemory()); NUnit.Framework.Assert.AreEqual(4 * Gb, report_nm1.GetUsedResource().GetMemory()); // check container is assigned with 2 GB. Container c1 = allocated1[0]; NUnit.Framework.Assert.AreEqual(2 * Gb, c1.GetResource().GetMemory()); // update node resource to 2 GB, so resource is over-consumed. IDictionary <NodeId, ResourceOption> nodeResourceMap = new Dictionary <NodeId, ResourceOption >(); nodeResourceMap[nm1.GetNodeId()] = ResourceOption.NewInstance(Org.Apache.Hadoop.Yarn.Api.Records.Resource .NewInstance(2 * Gb, 1), -1); UpdateNodeResourceRequest request = UpdateNodeResourceRequest.NewInstance(nodeResourceMap ); AdminService @as = rm.adminService; @as.UpdateNodeResource(request); // Now, the used resource is still 4 GB, and available resource is minus value. report_nm1 = rm.GetResourceScheduler().GetNodeReport(nm1.GetNodeId()); NUnit.Framework.Assert.AreEqual(4 * Gb, report_nm1.GetUsedResource().GetMemory()); NUnit.Framework.Assert.AreEqual(-2 * Gb, report_nm1.GetAvailableResource().GetMemory ()); // Check container can complete successfully in case of resource over-commitment. ContainerStatus containerStatus = BuilderUtils.NewContainerStatus(c1.GetId(), ContainerState .Complete, string.Empty, 0); nm1.ContainerStatus(containerStatus); int waitCount = 0; while (attempt1.GetJustFinishedContainers().Count < 1 && waitCount++ != 20) { Log.Info("Waiting for containers to be finished for app 1... Tried " + waitCount + " times already.."); Sharpen.Thread.Sleep(100); } NUnit.Framework.Assert.AreEqual(1, attempt1.GetJustFinishedContainers().Count); NUnit.Framework.Assert.AreEqual(1, am1.Schedule().GetCompletedContainersStatuses( ).Count); report_nm1 = rm.GetResourceScheduler().GetNodeReport(nm1.GetNodeId()); NUnit.Framework.Assert.AreEqual(2 * Gb, report_nm1.GetUsedResource().GetMemory()); // As container return 2 GB back, the available resource becomes 0 again. NUnit.Framework.Assert.AreEqual(0 * Gb, report_nm1.GetAvailableResource().GetMemory ()); rm.Stop(); }
/// <exception cref="System.Exception"/> public virtual void TestProgressFilter() { MockRM rm = new MockRM(conf); rm.Start(); // Register node1 MockNM nm1 = rm.RegisterNode("127.0.0.1:1234", 6 * Gb); // Submit an application RMApp app1 = rm.SubmitApp(2048); nm1.NodeHeartbeat(true); RMAppAttempt attempt1 = app1.GetCurrentAppAttempt(); MockAM am1 = rm.SendAMLaunched(attempt1.GetAppAttemptId()); am1.RegisterAppAttempt(); AllocateRequestPBImpl allocateRequest = new AllocateRequestPBImpl(); IList <ContainerId> release = new AList <ContainerId>(); IList <ResourceRequest> ask = new AList <ResourceRequest>(); allocateRequest.SetReleaseList(release); allocateRequest.SetAskList(ask); allocateRequest.SetProgress(float.PositiveInfinity); am1.Allocate(allocateRequest); while (attempt1.GetProgress() != 1) { Log.Info("Waiting for allocate event to be handled ..."); Sharpen.Thread.Sleep(100); } allocateRequest.SetProgress(float.NaN); am1.Allocate(allocateRequest); while (attempt1.GetProgress() != 0) { Log.Info("Waiting for allocate event to be handled ..."); Sharpen.Thread.Sleep(100); } allocateRequest.SetProgress((float)9); am1.Allocate(allocateRequest); while (attempt1.GetProgress() != 1) { Log.Info("Waiting for allocate event to be handled ..."); Sharpen.Thread.Sleep(100); } allocateRequest.SetProgress(float.NegativeInfinity); am1.Allocate(allocateRequest); while (attempt1.GetProgress() != 0) { Log.Info("Waiting for allocate event to be handled ..."); Sharpen.Thread.Sleep(100); } allocateRequest.SetProgress((float)0.5); am1.Allocate(allocateRequest); while (attempt1.GetProgress() != 0.5) { Log.Info("Waiting for allocate event to be handled ..."); Sharpen.Thread.Sleep(100); } allocateRequest.SetProgress((float)-1); am1.Allocate(allocateRequest); while (attempt1.GetProgress() != 0) { Log.Info("Waiting for allocate event to be handled ..."); Sharpen.Thread.Sleep(100); } }
/// <exception cref="System.Exception"/> public virtual void TestallocateBeforeAMRegistration() { Logger rootLogger = LogManager.GetRootLogger(); bool thrown = false; rootLogger.SetLevel(Level.Debug); MockRM rm = new MockRM(); rm.Start(); MockNM nm1 = rm.RegisterNode("h1:1234", 5000); RMApp app = rm.SubmitApp(2000); // kick the scheduling nm1.NodeHeartbeat(true); RMAppAttempt attempt = app.GetCurrentAppAttempt(); MockAM am = rm.SendAMLaunched(attempt.GetAppAttemptId()); // request for containers int request = 2; AllocateResponse ar = null; try { ar = am.Allocate("h1", 1000, request, new AList <ContainerId>()); NUnit.Framework.Assert.Fail(); } catch (ApplicationMasterNotRegisteredException) { } // kick the scheduler nm1.NodeHeartbeat(true); AllocateResponse amrs = null; try { amrs = am.Allocate(new AList <ResourceRequest>(), new AList <ContainerId>()); NUnit.Framework.Assert.Fail(); } catch (ApplicationMasterNotRegisteredException) { } am.RegisterAppAttempt(); try { am.RegisterAppAttempt(false); NUnit.Framework.Assert.Fail(); } catch (Exception e) { NUnit.Framework.Assert.AreEqual("Application Master is already registered : " + attempt .GetAppAttemptId().GetApplicationId(), e.Message); } // Simulate an AM that was disconnected and app attempt was removed // (responseMap does not contain attemptid) am.UnregisterAppAttempt(); nm1.NodeHeartbeat(attempt.GetAppAttemptId(), 1, ContainerState.Complete); am.WaitForState(RMAppAttemptState.Finished); try { amrs = am.Allocate(new AList <ResourceRequest>(), new AList <ContainerId>()); NUnit.Framework.Assert.Fail(); } catch (ApplicationAttemptNotFoundException) { } }
/// <exception cref="System.Exception"/> public virtual void TestNodeUpdate() { // set node -> label mgr.AddToCluserNodeLabels(ImmutableSet.Of("x", "y", "z")); // set mapping: // h1 -> x // h2 -> y mgr.AddLabelsToNode(ImmutableMap.Of(NodeId.NewInstance("h1", 0), ToSet("x"))); mgr.AddLabelsToNode(ImmutableMap.Of(NodeId.NewInstance("h2", 0), ToSet("y"))); // inject node label manager MockRM rm = new _MockRM_110(this, GetConfigurationWithQueueLabels(conf)); rm.GetRMContext().SetNodeLabelManager(mgr); rm.Start(); MockNM nm1 = rm.RegisterNode("h1:1234", 8000); MockNM nm2 = rm.RegisterNode("h2:1234", 8000); MockNM nm3 = rm.RegisterNode("h3:1234", 8000); ContainerId containerId; // launch an app to queue a1 (label = x), and check all container will // be allocated in h1 RMApp app1 = rm.SubmitApp(Gb, "app", "user", null, "a"); MockAM am1 = MockRM.LaunchAndRegisterAM(app1, rm, nm3); // request a container. am1.Allocate("*", Gb, 1, new AList <ContainerId>(), "x"); containerId = ContainerId.NewContainerId(am1.GetApplicationAttemptId(), 2); NUnit.Framework.Assert.IsTrue(rm.WaitForState(nm1, containerId, RMContainerState. Allocated, 10 * 1000)); // check used resource: // queue-a used x=1G, ""=1G CheckUsedResource(rm, "a", 1024, "x"); CheckUsedResource(rm, "a", 1024); // change h1's label to z, container should be killed mgr.ReplaceLabelsOnNode(ImmutableMap.Of(NodeId.NewInstance("h1", 0), ToSet("z"))); NUnit.Framework.Assert.IsTrue(rm.WaitForState(nm1, containerId, RMContainerState. Killed, 10 * 1000)); // check used resource: // queue-a used x=0G, ""=1G ("" not changed) CheckUsedResource(rm, "a", 0, "x"); CheckUsedResource(rm, "a", 1024); // request a container with label = y am1.Allocate("*", Gb, 1, new AList <ContainerId>(), "y"); containerId = ContainerId.NewContainerId(am1.GetApplicationAttemptId(), 3); NUnit.Framework.Assert.IsTrue(rm.WaitForState(nm2, containerId, RMContainerState. Allocated, 10 * 1000)); // check used resource: // queue-a used y=1G, ""=1G CheckUsedResource(rm, "a", 1024, "y"); CheckUsedResource(rm, "a", 1024); // change h2's label to no label, container should be killed mgr.ReplaceLabelsOnNode(ImmutableMap.Of(NodeId.NewInstance("h2", 0), CommonNodeLabelsManager .EmptyStringSet)); NUnit.Framework.Assert.IsTrue(rm.WaitForState(nm1, containerId, RMContainerState. Killed, 10 * 1000)); // check used resource: // queue-a used x=0G, y=0G, ""=1G ("" not changed) CheckUsedResource(rm, "a", 0, "x"); CheckUsedResource(rm, "a", 0, "y"); CheckUsedResource(rm, "a", 1024); containerId = ContainerId.NewContainerId(am1.GetApplicationAttemptId(), 1); // change h3's label to z, AM container should be killed mgr.ReplaceLabelsOnNode(ImmutableMap.Of(NodeId.NewInstance("h3", 0), ToSet("z"))); NUnit.Framework.Assert.IsTrue(rm.WaitForState(nm1, containerId, RMContainerState. Killed, 10 * 1000)); // check used resource: // queue-a used x=0G, y=0G, ""=1G ("" not changed) CheckUsedResource(rm, "a", 0, "x"); CheckUsedResource(rm, "a", 0, "y"); CheckUsedResource(rm, "a", 0); rm.Close(); }
/// <exception cref="System.Exception"/> public virtual void TestContainerAllocateWithComplexLabels() { /* * Queue structure: * root (*) * ________________ * / \ * a x(100%), y(50%) b y(50%), z(100%) * ________________ ______________ * / / \ * a1 (x,y) b1(no) b2(y,z) * 100% y = 100%, z = 100% * * Node structure: * h1 : x * h2 : y * h3 : y * h4 : z * h5 : NO * * Total resource: * x: 4G * y: 6G * z: 2G * *: 2G * * Resource of * a1: x=4G, y=3G, NO=0.2G * b1: NO=0.9G (max=1G) * b2: y=3, z=2G, NO=0.9G (max=1G) * * Each node can only allocate two containers */ // set node -> label mgr.AddToCluserNodeLabels(ImmutableSet.Of("x", "y", "z")); mgr.AddLabelsToNode(ImmutableMap.Of(NodeId.NewInstance("h1", 0), ToSet("x"), NodeId .NewInstance("h2", 0), ToSet("y"), NodeId.NewInstance("h3", 0), ToSet("y"), NodeId .NewInstance("h4", 0), ToSet("z"), NodeId.NewInstance("h5", 0), RMNodeLabelsManager .EmptyStringSet)); // inject node label manager MockRM rm1 = new _MockRM_557(this, GetComplexConfigurationWithQueueLabels(conf)); rm1.GetRMContext().SetNodeLabelManager(mgr); rm1.Start(); MockNM nm1 = rm1.RegisterNode("h1:1234", 2048); MockNM nm2 = rm1.RegisterNode("h2:1234", 2048); MockNM nm3 = rm1.RegisterNode("h3:1234", 2048); MockNM nm4 = rm1.RegisterNode("h4:1234", 2048); MockNM nm5 = rm1.RegisterNode("h5:1234", 2048); ContainerId containerId; // launch an app to queue a1 (label = x), and check all container will // be allocated in h1 RMApp app1 = rm1.SubmitApp(1024, "app", "user", null, "a1"); MockAM am1 = MockRM.LaunchAndRegisterAM(app1, rm1, nm1); // request a container (label = y). can be allocated on nm2 am1.Allocate("*", 1024, 1, new AList <ContainerId>(), "y"); containerId = ContainerId.NewContainerId(am1.GetApplicationAttemptId(), 2L); NUnit.Framework.Assert.IsTrue(rm1.WaitForState(nm2, containerId, RMContainerState .Allocated, 10 * 1000)); CheckTaskContainersHost(am1.GetApplicationAttemptId(), containerId, rm1, "h2"); // launch an app to queue b1 (label = y), and check all container will // be allocated in h5 RMApp app2 = rm1.SubmitApp(1024, "app", "user", null, "b1"); MockAM am2 = MockRM.LaunchAndRegisterAM(app2, rm1, nm5); // request a container for AM, will succeed // and now b1's queue capacity will be used, cannot allocate more containers // (Maximum capacity reached) am2.Allocate("*", 1024, 1, new AList <ContainerId>()); containerId = ContainerId.NewContainerId(am2.GetApplicationAttemptId(), 2); NUnit.Framework.Assert.IsFalse(rm1.WaitForState(nm4, containerId, RMContainerState .Allocated, 10 * 1000)); NUnit.Framework.Assert.IsFalse(rm1.WaitForState(nm5, containerId, RMContainerState .Allocated, 10 * 1000)); // launch an app to queue b2 RMApp app3 = rm1.SubmitApp(1024, "app", "user", null, "b2"); MockAM am3 = MockRM.LaunchAndRegisterAM(app3, rm1, nm5); // request a container. try to allocate on nm1 (label = x) and nm3 (label = // y,z). Will successfully allocate on nm3 am3.Allocate("*", 1024, 1, new AList <ContainerId>(), "y"); containerId = ContainerId.NewContainerId(am3.GetApplicationAttemptId(), 2); NUnit.Framework.Assert.IsFalse(rm1.WaitForState(nm1, containerId, RMContainerState .Allocated, 10 * 1000)); NUnit.Framework.Assert.IsTrue(rm1.WaitForState(nm3, containerId, RMContainerState .Allocated, 10 * 1000)); CheckTaskContainersHost(am3.GetApplicationAttemptId(), containerId, rm1, "h3"); // try to allocate container (request label = z) on nm4 (label = y,z). // Will successfully allocate on nm4 only. am3.Allocate("*", 1024, 1, new AList <ContainerId>(), "z"); containerId = ContainerId.NewContainerId(am3.GetApplicationAttemptId(), 3L); NUnit.Framework.Assert.IsTrue(rm1.WaitForState(nm4, containerId, RMContainerState .Allocated, 10 * 1000)); CheckTaskContainersHost(am3.GetApplicationAttemptId(), containerId, rm1, "h4"); rm1.Close(); }
/// <exception cref="System.Exception"/> public virtual RMApp SubmitApp(int masterMemory, string name, string user, IDictionary <ApplicationAccessType, string> acls, bool unmanaged, string queue, int maxAppAttempts , Credentials ts, string appType, bool waitForAccepted, bool keepContainers, bool isAppIdProvided, ApplicationId applicationId, long attemptFailuresValidityInterval , LogAggregationContext logAggregationContext, bool cancelTokensWhenComplete) { ApplicationId appId = isAppIdProvided ? applicationId : null; ApplicationClientProtocol client = GetClientRMService(); if (!isAppIdProvided) { GetNewApplicationResponse resp = client.GetNewApplication(Org.Apache.Hadoop.Yarn.Util.Records .NewRecord <GetNewApplicationRequest>()); appId = resp.GetApplicationId(); } SubmitApplicationRequest req = Org.Apache.Hadoop.Yarn.Util.Records.NewRecord <SubmitApplicationRequest >(); ApplicationSubmissionContext sub = Org.Apache.Hadoop.Yarn.Util.Records.NewRecord < ApplicationSubmissionContext>(); sub.SetKeepContainersAcrossApplicationAttempts(keepContainers); sub.SetApplicationId(appId); sub.SetApplicationName(name); sub.SetMaxAppAttempts(maxAppAttempts); if (unmanaged) { sub.SetUnmanagedAM(true); } if (queue != null) { sub.SetQueue(queue); } sub.SetApplicationType(appType); ContainerLaunchContext clc = Org.Apache.Hadoop.Yarn.Util.Records.NewRecord <ContainerLaunchContext >(); Resource capability = Org.Apache.Hadoop.Yarn.Util.Records.NewRecord <Resource>(); capability.SetMemory(masterMemory); sub.SetResource(capability); clc.SetApplicationACLs(acls); if (ts != null && UserGroupInformation.IsSecurityEnabled()) { DataOutputBuffer dob = new DataOutputBuffer(); ts.WriteTokenStorageToStream(dob); ByteBuffer securityTokens = ByteBuffer.Wrap(dob.GetData(), 0, dob.GetLength()); clc.SetTokens(securityTokens); } sub.SetAMContainerSpec(clc); sub.SetAttemptFailuresValidityInterval(attemptFailuresValidityInterval); if (logAggregationContext != null) { sub.SetLogAggregationContext(logAggregationContext); } sub.SetCancelTokensWhenComplete(cancelTokensWhenComplete); req.SetApplicationSubmissionContext(sub); UserGroupInformation fakeUser = UserGroupInformation.CreateUserForTesting(user, new string[] { "someGroup" }); PrivilegedAction <SubmitApplicationResponse> action = new _PrivilegedAction_415(). SetClientReq(client, req); fakeUser.DoAs(action); // make sure app is immediately available after submit if (waitForAccepted) { WaitForState(appId, RMAppState.Accepted); } RMApp rmApp = GetRMContext().GetRMApps()[appId]; // unmanaged AM won't go to RMAppAttemptState.SCHEDULED. if (waitForAccepted && !unmanaged) { WaitForState(rmApp.GetCurrentAppAttempt().GetAppAttemptId(), RMAppAttemptState.Scheduled ); } return(rmApp); }
/// <exception cref="System.Exception"/> public virtual void TestResourceRequestRestoreWhenRMContainerIsAtAllocated() { ConfigureScheduler(); YarnConfiguration conf = GetConf(); MockRM rm1 = new MockRM(conf); try { rm1.Start(); RMApp app1 = rm1.SubmitApp(200, "name", "user", new Dictionary <ApplicationAccessType , string>(), false, "default", -1, null, "Test", false, true); MockNM nm1 = new MockNM("127.0.0.1:1234", 10240, rm1.GetResourceTrackerService()); nm1.RegisterNode(); MockNM nm2 = new MockNM("127.0.0.1:2351", 10240, rm1.GetResourceTrackerService()); nm2.RegisterNode(); MockAM am1 = MockRM.LaunchAndRegisterAM(app1, rm1, nm1); int NumContainers = 1; // allocate NUM_CONTAINERS containers am1.Allocate("127.0.0.1", 1024, NumContainers, new AList <ContainerId>()); nm1.NodeHeartbeat(true); // wait for containers to be allocated. IList <Container> containers = am1.Allocate(new AList <ResourceRequest>(), new AList <ContainerId>()).GetAllocatedContainers(); while (containers.Count != NumContainers) { nm1.NodeHeartbeat(true); Sharpen.Collections.AddAll(containers, am1.Allocate(new AList <ResourceRequest>(), new AList <ContainerId>()).GetAllocatedContainers()); Sharpen.Thread.Sleep(200); } // launch the 2nd container, for testing running container transferred. nm1.NodeHeartbeat(am1.GetApplicationAttemptId(), 2, ContainerState.Running); ContainerId containerId2 = ContainerId.NewContainerId(am1.GetApplicationAttemptId (), 2); rm1.WaitForState(nm1, containerId2, RMContainerState.Running); // 3rd container is in Allocated state. am1.Allocate("127.0.0.1", 1024, NumContainers, new AList <ContainerId>()); nm2.NodeHeartbeat(true); ContainerId containerId3 = ContainerId.NewContainerId(am1.GetApplicationAttemptId (), 3); rm1.WaitForContainerAllocated(nm2, containerId3); rm1.WaitForState(nm2, containerId3, RMContainerState.Allocated); // NodeManager restart nm2.RegisterNode(); // NM restart kills all allocated and running containers. rm1.WaitForState(nm2, containerId3, RMContainerState.Killed); // The killed RMContainer request should be restored. In successive // nodeHeartBeats AM should be able to get container allocated. containers = am1.Allocate(new AList <ResourceRequest>(), new AList <ContainerId>()) .GetAllocatedContainers(); while (containers.Count != NumContainers) { nm2.NodeHeartbeat(true); Sharpen.Collections.AddAll(containers, am1.Allocate(new AList <ResourceRequest>(), new AList <ContainerId>()).GetAllocatedContainers()); Sharpen.Thread.Sleep(200); } nm2.NodeHeartbeat(am1.GetApplicationAttemptId(), 4, ContainerState.Running); ContainerId containerId4 = ContainerId.NewContainerId(am1.GetApplicationAttemptId (), 4); rm1.WaitForState(nm2, containerId4, RMContainerState.Running); } finally { rm1.Stop(); } }
// Explicitly reset queue metrics for testing. public virtual void ClearQueueMetrics(RMApp app) { QueueMetrics.ClearQueueMetrics(); }
public virtual void TestClientToAMTokens() { conf.Set(CommonConfigurationKeysPublic.HadoopSecurityAuthentication, "kerberos"); UserGroupInformation.SetConfiguration(conf); ContainerManagementProtocol containerManager = Org.Mockito.Mockito.Mock <ContainerManagementProtocol >(); StartContainersResponse mockResponse = Org.Mockito.Mockito.Mock <StartContainersResponse >(); Org.Mockito.Mockito.When(containerManager.StartContainers((StartContainersRequest )Matchers.Any())).ThenReturn(mockResponse); DrainDispatcher dispatcher = new DrainDispatcher(); MockRM rm = new _MockRMWithCustomAMLauncher_192(dispatcher, conf, containerManager ); rm.Start(); // Submit an app RMApp app = rm.SubmitApp(1024); // Set up a node. MockNM nm1 = rm.RegisterNode("localhost:1234", 3072); nm1.NodeHeartbeat(true); dispatcher.Await(); nm1.NodeHeartbeat(true); dispatcher.Await(); ApplicationAttemptId appAttempt = app.GetCurrentAppAttempt().GetAppAttemptId(); MockAM mockAM = new MockAM(rm.GetRMContext(), rm.GetApplicationMasterService(), app .GetCurrentAppAttempt().GetAppAttemptId()); UserGroupInformation appUgi = UserGroupInformation.CreateRemoteUser(appAttempt.ToString ()); RegisterApplicationMasterResponse response = appUgi.DoAs(new _PrivilegedAction_229 (mockAM)); // Get the app-report. GetApplicationReportRequest request = Org.Apache.Hadoop.Yarn.Util.Records.NewRecord <GetApplicationReportRequest>(); request.SetApplicationId(app.GetApplicationId()); GetApplicationReportResponse reportResponse = rm.GetClientRMService().GetApplicationReport (request); ApplicationReport appReport = reportResponse.GetApplicationReport(); Org.Apache.Hadoop.Yarn.Api.Records.Token originalClientToAMToken = appReport.GetClientToAMToken (); // ClientToAMToken master key should have been received on register // application master response. NUnit.Framework.Assert.IsNotNull(response.GetClientToAMTokenMasterKey()); NUnit.Framework.Assert.IsTrue(((byte[])response.GetClientToAMTokenMasterKey().Array ()).Length > 0); // Start the AM with the correct shared-secret. ApplicationAttemptId appAttemptId = app.GetAppAttempts().Keys.GetEnumerator().Next (); NUnit.Framework.Assert.IsNotNull(appAttemptId); TestClientToAMTokens.CustomAM am = new TestClientToAMTokens.CustomAM(appAttemptId , ((byte[])response.GetClientToAMTokenMasterKey().Array())); am.Init(conf); am.Start(); // Now the real test! // Set up clients to be able to pick up correct tokens. SecurityUtil.SetSecurityInfoProviders(new TestClientToAMTokens.CustomSecurityInfo ()); // Verify denial for unauthenticated user try { TestClientToAMTokens.CustomProtocol client = (TestClientToAMTokens.CustomProtocol )RPC.GetProxy <TestClientToAMTokens.CustomProtocol>(1L, am.address, conf); client.Ping(); NUnit.Framework.Assert.Fail("Access by unauthenticated user should fail!!"); } catch (Exception) { NUnit.Framework.Assert.IsFalse(am.pinged); } Org.Apache.Hadoop.Security.Token.Token <ClientToAMTokenIdentifier> token = ConverterUtils .ConvertFromYarn(originalClientToAMToken, am.address); // Verify denial for a malicious user with tampered ID VerifyTokenWithTamperedID(conf, am, token); // Verify denial for a malicious user with tampered user-name VerifyTokenWithTamperedUserName(conf, am, token); // Now for an authenticated user VerifyValidToken(conf, am, token); // Verify for a new version token VerifyNewVersionToken(conf, am, token, rm); am.Stop(); rm.Stop(); }
public virtual void TestFencedState() { TestZKRMStateStore.TestZKRMStateStoreTester zkTester = new TestZKRMStateStore.TestZKRMStateStoreTester (this); RMStateStore store = zkTester.GetRMStateStore(); // Move state to FENCED from ACTIVE store.UpdateFencedState(); NUnit.Framework.Assert.AreEqual("RMStateStore should have been in fenced state", true, store.IsFencedState()); long submitTime = Runtime.CurrentTimeMillis(); long startTime = submitTime + 1000; // Add a new app RMApp mockApp = Org.Mockito.Mockito.Mock <RMApp>(); ApplicationSubmissionContext context = new ApplicationSubmissionContextPBImpl(); Org.Mockito.Mockito.When(mockApp.GetSubmitTime()).ThenReturn(submitTime); Org.Mockito.Mockito.When(mockApp.GetStartTime()).ThenReturn(startTime); Org.Mockito.Mockito.When(mockApp.GetApplicationSubmissionContext()).ThenReturn(context ); Org.Mockito.Mockito.When(mockApp.GetUser()).ThenReturn("test"); store.StoreNewApplication(mockApp); NUnit.Framework.Assert.AreEqual("RMStateStore should have been in fenced state", true, store.IsFencedState()); // Add a new attempt ClientToAMTokenSecretManagerInRM clientToAMTokenMgr = new ClientToAMTokenSecretManagerInRM (); ApplicationAttemptId attemptId = ConverterUtils.ToApplicationAttemptId("appattempt_1234567894321_0001_000001" ); SecretKey clientTokenMasterKey = clientToAMTokenMgr.CreateMasterKey(attemptId); RMAppAttemptMetrics mockRmAppAttemptMetrics = Org.Mockito.Mockito.Mock <RMAppAttemptMetrics >(); Container container = new ContainerPBImpl(); container.SetId(ConverterUtils.ToContainerId("container_1234567891234_0001_01_000001" )); RMAppAttempt mockAttempt = Org.Mockito.Mockito.Mock <RMAppAttempt>(); Org.Mockito.Mockito.When(mockAttempt.GetAppAttemptId()).ThenReturn(attemptId); Org.Mockito.Mockito.When(mockAttempt.GetMasterContainer()).ThenReturn(container); Org.Mockito.Mockito.When(mockAttempt.GetClientTokenMasterKey()).ThenReturn(clientTokenMasterKey ); Org.Mockito.Mockito.When(mockAttempt.GetRMAppAttemptMetrics()).ThenReturn(mockRmAppAttemptMetrics ); Org.Mockito.Mockito.When(mockRmAppAttemptMetrics.GetAggregateAppResourceUsage()). ThenReturn(new AggregateAppResourceUsage(0, 0)); store.StoreNewApplicationAttempt(mockAttempt); NUnit.Framework.Assert.AreEqual("RMStateStore should have been in fenced state", true, store.IsFencedState()); long finishTime = submitTime + 1000; // Update attempt ApplicationAttemptStateData newAttemptState = ApplicationAttemptStateData.NewInstance (attemptId, container, store.GetCredentialsFromAppAttempt(mockAttempt), startTime , RMAppAttemptState.Finished, "testUrl", "test", FinalApplicationStatus.Succeeded , 100, finishTime, 0, 0); store.UpdateApplicationAttemptState(newAttemptState); NUnit.Framework.Assert.AreEqual("RMStateStore should have been in fenced state", true, store.IsFencedState()); // Update app ApplicationStateData appState = ApplicationStateData.NewInstance(submitTime, startTime , context, "test"); store.UpdateApplicationState(appState); NUnit.Framework.Assert.AreEqual("RMStateStore should have been in fenced state", true, store.IsFencedState()); // Remove app store.RemoveApplication(mockApp); NUnit.Framework.Assert.AreEqual("RMStateStore should have been in fenced state", true, store.IsFencedState()); // store RM delegation token; RMDelegationTokenIdentifier dtId1 = new RMDelegationTokenIdentifier(new Text("owner1" ), new Text("renewer1"), new Text("realuser1")); long renewDate1 = Runtime.CurrentTimeMillis(); dtId1.SetSequenceNumber(1111); store.StoreRMDelegationToken(dtId1, renewDate1); NUnit.Framework.Assert.AreEqual("RMStateStore should have been in fenced state", true, store.IsFencedState()); store.UpdateRMDelegationToken(dtId1, renewDate1); NUnit.Framework.Assert.AreEqual("RMStateStore should have been in fenced state", true, store.IsFencedState()); // remove delegation key; store.RemoveRMDelegationToken(dtId1); NUnit.Framework.Assert.AreEqual("RMStateStore should have been in fenced state", true, store.IsFencedState()); // store delegation master key; DelegationKey key = new DelegationKey(1234, 4321, Sharpen.Runtime.GetBytesForString ("keyBytes")); store.StoreRMDTMasterKey(key); NUnit.Framework.Assert.AreEqual("RMStateStore should have been in fenced state", true, store.IsFencedState()); // remove delegation master key; store.RemoveRMDTMasterKey(key); NUnit.Framework.Assert.AreEqual("RMStateStore should have been in fenced state", true, store.IsFencedState()); // store or update AMRMToken; store.StoreOrUpdateAMRMTokenSecretManager(null, false); NUnit.Framework.Assert.AreEqual("RMStateStore should have been in fenced state", true, store.IsFencedState()); store.Close(); }
public virtual void TestContainerCleanup() { Logger rootLogger = LogManager.GetRootLogger(); rootLogger.SetLevel(Level.Debug); DrainDispatcher dispatcher = new DrainDispatcher(); MockRM rm = new _MockRM_167(this, dispatcher); rm.Start(); MockNM nm1 = rm.RegisterNode("127.0.0.1:1234", 5000); RMApp app = rm.SubmitApp(2000); //kick the scheduling nm1.NodeHeartbeat(true); RMAppAttempt attempt = app.GetCurrentAppAttempt(); MockAM am = rm.SendAMLaunched(attempt.GetAppAttemptId()); am.RegisterAppAttempt(); //request for containers int request = 2; am.Allocate("127.0.0.1", 1000, request, new AList <ContainerId>()); dispatcher.Await(); //kick the scheduler nm1.NodeHeartbeat(true); IList <Container> conts = am.Allocate(new AList <ResourceRequest>(), new AList <ContainerId >()).GetAllocatedContainers(); int contReceived = conts.Count; int waitCount = 0; while (contReceived < request && waitCount++ < 200) { Log.Info("Got " + contReceived + " containers. Waiting to get " + request); Sharpen.Thread.Sleep(100); conts = am.Allocate(new AList <ResourceRequest>(), new AList <ContainerId>()).GetAllocatedContainers (); dispatcher.Await(); contReceived += conts.Count; nm1.NodeHeartbeat(true); } NUnit.Framework.Assert.AreEqual(request, contReceived); // Release a container. AList <ContainerId> release = new AList <ContainerId>(); release.AddItem(conts[0].GetId()); am.Allocate(new AList <ResourceRequest>(), release); dispatcher.Await(); // Send one more heartbeat with a fake running container. This is to // simulate the situation that can happen if the NM reports that container // is running in the same heartbeat when the RM asks it to clean it up. IDictionary <ApplicationId, IList <ContainerStatus> > containerStatuses = new Dictionary <ApplicationId, IList <ContainerStatus> >(); AList <ContainerStatus> containerStatusList = new AList <ContainerStatus>(); containerStatusList.AddItem(BuilderUtils.NewContainerStatus(conts[0].GetId(), ContainerState .Running, "nothing", 0)); containerStatuses[app.GetApplicationId()] = containerStatusList; NodeHeartbeatResponse resp = nm1.NodeHeartbeat(containerStatuses, true); WaitForContainerCleanup(dispatcher, nm1, resp); // Now to test the case when RM already gave cleanup, and NM suddenly // realizes that the container is running. Log.Info("Testing container launch much after release and " + "NM getting cleanup" ); containerStatuses.Clear(); containerStatusList.Clear(); containerStatusList.AddItem(BuilderUtils.NewContainerStatus(conts[0].GetId(), ContainerState .Running, "nothing", 0)); containerStatuses[app.GetApplicationId()] = containerStatusList; resp = nm1.NodeHeartbeat(containerStatuses, true); // The cleanup list won't be instantaneous as it is given out by scheduler // and not RMNodeImpl. WaitForContainerCleanup(dispatcher, nm1, resp); rm.Stop(); }
// To hold list of application for which event was received /// <exception cref="System.Exception"/> public virtual void TestNodeUsableEvent() { Logger rootLogger = LogManager.GetRootLogger(); rootLogger.SetLevel(Level.Debug); Dispatcher dispatcher = GetDispatcher(); YarnConfiguration conf = new YarnConfiguration(); MockRM rm = new _MockRM_62(dispatcher, conf); rm.Start(); MockNM nm1 = rm.RegisterNode("h1:1234", 28000); NodesListManager nodesListManager = rm.GetNodesListManager(); Resource clusterResource = Resource.NewInstance(28000, 8); RMNode rmnode = MockNodes.NewNodeInfo(1, clusterResource); // Create killing APP RMApp killrmApp = rm.SubmitApp(200); rm.KillApp(killrmApp.GetApplicationId()); rm.WaitForState(killrmApp.GetApplicationId(), RMAppState.Killed); // Create finish APP RMApp finshrmApp = rm.SubmitApp(2000); nm1.NodeHeartbeat(true); RMAppAttempt attempt = finshrmApp.GetCurrentAppAttempt(); MockAM am = rm.SendAMLaunched(attempt.GetAppAttemptId()); am.RegisterAppAttempt(); am.UnregisterAppAttempt(); nm1.NodeHeartbeat(attempt.GetAppAttemptId(), 1, ContainerState.Complete); am.WaitForState(RMAppAttemptState.Finished); // Create submitted App RMApp subrmApp = rm.SubmitApp(200); // Fire Event for NODE_USABLE nodesListManager.Handle(new NodesListManagerEvent(NodesListManagerEventType.NodeUsable , rmnode)); if (applist.Count > 0) { NUnit.Framework.Assert.IsTrue("Event based on running app expected " + subrmApp.GetApplicationId (), applist.Contains(subrmApp.GetApplicationId())); NUnit.Framework.Assert.IsFalse("Event based on finish app not expected " + finshrmApp .GetApplicationId(), applist.Contains(finshrmApp.GetApplicationId())); NUnit.Framework.Assert.IsFalse("Event based on killed app not expected " + killrmApp .GetApplicationId(), applist.Contains(killrmApp.GetApplicationId())); } else { NUnit.Framework.Assert.Fail("Events received should have beeen more than 1"); } applist.Clear(); // Fire Event for NODE_UNUSABLE nodesListManager.Handle(new NodesListManagerEvent(NodesListManagerEventType.NodeUnusable , rmnode)); if (applist.Count > 0) { NUnit.Framework.Assert.IsTrue("Event based on running app expected " + subrmApp.GetApplicationId (), applist.Contains(subrmApp.GetApplicationId())); NUnit.Framework.Assert.IsFalse("Event based on finish app not expected " + finshrmApp .GetApplicationId(), applist.Contains(finshrmApp.GetApplicationId())); NUnit.Framework.Assert.IsFalse("Event based on killed app not expected " + killrmApp .GetApplicationId(), applist.Contains(killrmApp.GetApplicationId())); } else { NUnit.Framework.Assert.Fail("Events received should have beeen more than 1"); } }
// The test verifies processing of NMContainerStatuses which are sent during // NM registration. // 1. Start the cluster-RM,NM,Submit app with 1024MB,Launch & register AM // 2. AM sends ResourceRequest for 1 container with memory 2048MB. // 3. Verify for number of container allocated by RM // 4. Verify Memory Usage by cluster, it should be 3072. AM memory + requested // memory. 1024 + 2048=3072 // 5. Re-register NM by sending completed container status // 6. Verify for Memory Used, it should be 1024 // 7. Send AM heatbeat to RM. Allocated response should contain completed // container. /// <exception cref="System.Exception"/> public virtual void TestProcessingNMContainerStatusesOnNMRestart() { conf.SetInt(YarnConfiguration.RmAmMaxAttempts, 1); MemoryRMStateStore memStore = new MemoryRMStateStore(); memStore.Init(conf); // 1. Start the cluster-RM,NM,Submit app with 1024MB,Launch & register AM MockRM rm1 = new MockRM(conf, memStore); rm1.Start(); int nmMemory = 8192; int amMemory = 1024; int containerMemory = 2048; MockNM nm1 = new MockNM("127.0.0.1:1234", nmMemory, rm1.GetResourceTrackerService ()); nm1.RegisterNode(); RMApp app0 = rm1.SubmitApp(amMemory); MockAM am0 = MockRM.LaunchAndRegisterAM(app0, rm1, nm1); // 2. AM sends ResourceRequest for 1 container with memory 2048MB. int noOfContainers = 1; IList <Container> allocateContainers = am0.AllocateAndWaitForContainers(noOfContainers , containerMemory, nm1); // 3. Verify for number of container allocated by RM NUnit.Framework.Assert.AreEqual(noOfContainers, allocateContainers.Count); Container container = allocateContainers[0]; nm1.NodeHeartbeat(am0.GetApplicationAttemptId(), 1, ContainerState.Running); nm1.NodeHeartbeat(am0.GetApplicationAttemptId(), container.GetId().GetContainerId (), ContainerState.Running); rm1.WaitForState(app0.GetApplicationId(), RMAppState.Running); // 4. Verify Memory Usage by cluster, it should be 3072. AM memory + // requested memory. 1024 + 2048=3072 ResourceScheduler rs = rm1.GetRMContext().GetScheduler(); int allocatedMB = rs.GetRootQueueMetrics().GetAllocatedMB(); NUnit.Framework.Assert.AreEqual(amMemory + containerMemory, allocatedMB); // 5. Re-register NM by sending completed container status IList <NMContainerStatus> nMContainerStatusForApp = CreateNMContainerStatusForApp( am0); nm1.RegisterNode(nMContainerStatusForApp, Arrays.AsList(app0.GetApplicationId())); WaitForClusterMemory(nm1, rs, amMemory); // 6. Verify for Memory Used, it should be 1024 NUnit.Framework.Assert.AreEqual(amMemory, rs.GetRootQueueMetrics().GetAllocatedMB ()); // 7. Send AM heatbeat to RM. Allocated response should contain completed // container AllocateRequest req = AllocateRequest.NewInstance(0, 0F, new AList <ResourceRequest >(), new AList <ContainerId>(), null); AllocateResponse allocate = am0.Allocate(req); IList <ContainerStatus> completedContainersStatuses = allocate.GetCompletedContainersStatuses (); NUnit.Framework.Assert.AreEqual(noOfContainers, completedContainersStatuses.Count ); // Application clean up should happen Cluster memory used is 0 nm1.NodeHeartbeat(am0.GetApplicationAttemptId(), 1, ContainerState.Complete); WaitForClusterMemory(nm1, rs, 0); rm1.Stop(); }
public virtual void Test() { Logger rootLogger = LogManager.GetRootLogger(); rootLogger.SetLevel(Level.Debug); MockRM rm = new MockRM(conf); rm.Start(); MockNM nm1 = rm.RegisterNode("127.0.0.1:1234", 6 * Gb); MockNM nm2 = rm.RegisterNode("127.0.0.2:5678", 4 * Gb); RMApp app1 = rm.SubmitApp(2048); // kick the scheduling, 2 GB given to AM1, remaining 4GB on nm1 nm1.NodeHeartbeat(true); RMAppAttempt attempt1 = app1.GetCurrentAppAttempt(); MockAM am1 = rm.SendAMLaunched(attempt1.GetAppAttemptId()); am1.RegisterAppAttempt(); SchedulerNodeReport report_nm1 = rm.GetResourceScheduler().GetNodeReport(nm1.GetNodeId ()); NUnit.Framework.Assert.AreEqual(2 * Gb, report_nm1.GetUsedResource().GetMemory()); RMApp app2 = rm.SubmitApp(2048); // kick the scheduling, 2GB given to AM, remaining 2 GB on nm2 nm2.NodeHeartbeat(true); RMAppAttempt attempt2 = app2.GetCurrentAppAttempt(); MockAM am2 = rm.SendAMLaunched(attempt2.GetAppAttemptId()); am2.RegisterAppAttempt(); SchedulerNodeReport report_nm2 = rm.GetResourceScheduler().GetNodeReport(nm2.GetNodeId ()); NUnit.Framework.Assert.AreEqual(2 * Gb, report_nm2.GetUsedResource().GetMemory()); // add request for containers am1.AddRequests(new string[] { "127.0.0.1", "127.0.0.2" }, Gb, 1, 1); AllocateResponse alloc1Response = am1.Schedule(); // send the request // add request for containers am2.AddRequests(new string[] { "127.0.0.1", "127.0.0.2" }, 3 * Gb, 0, 1); AllocateResponse alloc2Response = am2.Schedule(); // send the request // kick the scheduler, 1 GB and 3 GB given to AM1 and AM2, remaining 0 nm1.NodeHeartbeat(true); while (alloc1Response.GetAllocatedContainers().Count < 1) { Log.Info("Waiting for containers to be created for app 1..."); Sharpen.Thread.Sleep(1000); alloc1Response = am1.Schedule(); } while (alloc2Response.GetAllocatedContainers().Count < 1) { Log.Info("Waiting for containers to be created for app 2..."); Sharpen.Thread.Sleep(1000); alloc2Response = am2.Schedule(); } // kick the scheduler, nothing given remaining 2 GB. nm2.NodeHeartbeat(true); IList <Container> allocated1 = alloc1Response.GetAllocatedContainers(); NUnit.Framework.Assert.AreEqual(1, allocated1.Count); NUnit.Framework.Assert.AreEqual(1 * Gb, allocated1[0].GetResource().GetMemory()); NUnit.Framework.Assert.AreEqual(nm1.GetNodeId(), allocated1[0].GetNodeId()); IList <Container> allocated2 = alloc2Response.GetAllocatedContainers(); NUnit.Framework.Assert.AreEqual(1, allocated2.Count); NUnit.Framework.Assert.AreEqual(3 * Gb, allocated2[0].GetResource().GetMemory()); NUnit.Framework.Assert.AreEqual(nm1.GetNodeId(), allocated2[0].GetNodeId()); report_nm1 = rm.GetResourceScheduler().GetNodeReport(nm1.GetNodeId()); report_nm2 = rm.GetResourceScheduler().GetNodeReport(nm2.GetNodeId()); NUnit.Framework.Assert.AreEqual(0, report_nm1.GetAvailableResource().GetMemory()); NUnit.Framework.Assert.AreEqual(2 * Gb, report_nm2.GetAvailableResource().GetMemory ()); NUnit.Framework.Assert.AreEqual(6 * Gb, report_nm1.GetUsedResource().GetMemory()); NUnit.Framework.Assert.AreEqual(2 * Gb, report_nm2.GetUsedResource().GetMemory()); Container c1 = allocated1[0]; NUnit.Framework.Assert.AreEqual(Gb, c1.GetResource().GetMemory()); ContainerStatus containerStatus = BuilderUtils.NewContainerStatus(c1.GetId(), ContainerState .Complete, string.Empty, 0); nm1.ContainerStatus(containerStatus); int waitCount = 0; while (attempt1.GetJustFinishedContainers().Count < 1 && waitCount++ != 20) { Log.Info("Waiting for containers to be finished for app 1... Tried " + waitCount + " times already.."); Sharpen.Thread.Sleep(1000); } NUnit.Framework.Assert.AreEqual(1, attempt1.GetJustFinishedContainers().Count); NUnit.Framework.Assert.AreEqual(1, am1.Schedule().GetCompletedContainersStatuses( ).Count); report_nm1 = rm.GetResourceScheduler().GetNodeReport(nm1.GetNodeId()); NUnit.Framework.Assert.AreEqual(5 * Gb, report_nm1.GetUsedResource().GetMemory()); rm.Stop(); }
/// <exception cref="System.Exception"/> public virtual void TestUsageWithMultipleContainersAndRMRestart() { // Set max attempts to 1 so that when the first attempt fails, the app // won't try to start a new one. conf.SetInt(YarnConfiguration.RmAmMaxAttempts, 1); conf.SetBoolean(YarnConfiguration.RecoveryEnabled, true); conf.SetBoolean(YarnConfiguration.RmWorkPreservingRecoveryEnabled, false); MemoryRMStateStore memStore = new MemoryRMStateStore(); memStore.Init(conf); MockRM rm0 = new MockRM(conf, memStore); rm0.Start(); MockNM nm = new MockNM("127.0.0.1:1234", 65536, rm0.GetResourceTrackerService()); nm.RegisterNode(); RMApp app0 = rm0.SubmitApp(200); rm0.WaitForState(app0.GetApplicationId(), RMAppState.Accepted); RMAppAttempt attempt0 = app0.GetCurrentAppAttempt(); ApplicationAttemptId attemptId0 = attempt0.GetAppAttemptId(); rm0.WaitForState(attemptId0, RMAppAttemptState.Scheduled); nm.NodeHeartbeat(true); rm0.WaitForState(attemptId0, RMAppAttemptState.Allocated); MockAM am0 = rm0.SendAMLaunched(attempt0.GetAppAttemptId()); am0.RegisterAppAttempt(); int NumContainers = 2; am0.Allocate("127.0.0.1", 1000, NumContainers, new AList <ContainerId>()); nm.NodeHeartbeat(true); IList <Container> conts = am0.Allocate(new AList <ResourceRequest>(), new AList <ContainerId >()).GetAllocatedContainers(); while (conts.Count != NumContainers) { nm.NodeHeartbeat(true); Sharpen.Collections.AddAll(conts, am0.Allocate(new AList <ResourceRequest>(), new AList <ContainerId>()).GetAllocatedContainers()); Sharpen.Thread.Sleep(500); } // launch the 2nd and 3rd containers. foreach (Container c in conts) { nm.NodeHeartbeat(attempt0.GetAppAttemptId(), c.GetId().GetContainerId(), ContainerState .Running); rm0.WaitForState(nm, c.GetId(), RMContainerState.Running); } // Get the RMContainers for all of the live containers, to be used later // for metrics calculations and comparisons. ICollection <RMContainer> rmContainers = rm0.scheduler.GetSchedulerAppInfo(attempt0 .GetAppAttemptId()).GetLiveContainers(); // Allow metrics to accumulate. int sleepInterval = 1000; int cumulativeSleepTime = 0; while (app0.GetRMAppMetrics().GetMemorySeconds() <= 0 && cumulativeSleepTime < 5000 ) { Sharpen.Thread.Sleep(sleepInterval); cumulativeSleepTime += sleepInterval; } // Stop all non-AM containers foreach (Container c_1 in conts) { if (c_1.GetId().GetContainerId() == 1) { continue; } nm.NodeHeartbeat(attempt0.GetAppAttemptId(), c_1.GetId().GetContainerId(), ContainerState .Complete); rm0.WaitForState(nm, c_1.GetId(), RMContainerState.Completed); } // After all other containers have completed, manually complete the master // container in order to trigger a save to the state store of the resource // usage metrics. This will cause the attempt to fail, and, since the max // attempt retries is 1, the app will also fail. This is intentional so // that all containers will complete prior to saving. ContainerId cId = ContainerId.NewContainerId(attempt0.GetAppAttemptId(), 1); nm.NodeHeartbeat(attempt0.GetAppAttemptId(), cId.GetContainerId(), ContainerState .Complete); rm0.WaitForState(nm, cId, RMContainerState.Completed); // Check that the container metrics match those from the app usage report. long memorySeconds = 0; long vcoreSeconds = 0; foreach (RMContainer c_2 in rmContainers) { AggregateAppResourceUsage ru = CalculateContainerResourceMetrics(c_2); memorySeconds += ru.GetMemorySeconds(); vcoreSeconds += ru.GetVcoreSeconds(); } RMAppMetrics metricsBefore = app0.GetRMAppMetrics(); NUnit.Framework.Assert.AreEqual("Unexcpected MemorySeconds value", memorySeconds, metricsBefore.GetMemorySeconds()); NUnit.Framework.Assert.AreEqual("Unexpected VcoreSeconds value", vcoreSeconds, metricsBefore .GetVcoreSeconds()); // create new RM to represent RM restart. Load up the state store. MockRM rm1 = new MockRM(conf, memStore); rm1.Start(); RMApp app0After = rm1.GetRMContext().GetRMApps()[app0.GetApplicationId()]; // Compare container resource usage metrics from before and after restart. RMAppMetrics metricsAfter = app0After.GetRMAppMetrics(); NUnit.Framework.Assert.AreEqual("Vcore seconds were not the same after RM Restart" , metricsBefore.GetVcoreSeconds(), metricsAfter.GetVcoreSeconds()); NUnit.Framework.Assert.AreEqual("Memory seconds were not the same after RM Restart" , metricsBefore.GetMemorySeconds(), metricsAfter.GetMemorySeconds()); rm0.Stop(); rm0.Close(); rm1.Stop(); rm1.Close(); }
// Test does major 6 steps verification. // Step-1 : AMRMClient send allocate request for 2 container requests // Step-2 : 2 containers are allocated by RM. // Step-3 : AM Send 1 containerRequest(cRequest3) and 1 releaseRequests to // RM // Step-4 : On RM restart, AM(does not know RM is restarted) sends additional // containerRequest(cRequest4) and blacklisted nodes. // Intern RM send resync command // Step-5 : Allocater after resync command & new containerRequest(cRequest5) // Step-6 : RM allocates containers i.e cRequest3,cRequest4 and cRequest5 /// <exception cref="System.Exception"/> public virtual void TestAMRMClientResendsRequestsOnRMRestart() { UserGroupInformation.SetLoginUser(null); MemoryRMStateStore memStore = new MemoryRMStateStore(); memStore.Init(conf); // Phase-1 Start 1st RM TestAMRMClientOnRMRestart.MyResourceManager rm1 = new TestAMRMClientOnRMRestart.MyResourceManager (conf, memStore); rm1.Start(); DrainDispatcher dispatcher = (DrainDispatcher)rm1.GetRMContext().GetDispatcher(); // Submit the application RMApp app = rm1.SubmitApp(1024); dispatcher.Await(); MockNM nm1 = new MockNM("h1:1234", 15120, rm1.GetResourceTrackerService()); nm1.RegisterNode(); nm1.NodeHeartbeat(true); // Node heartbeat dispatcher.Await(); ApplicationAttemptId appAttemptId = app.GetCurrentAppAttempt().GetAppAttemptId(); rm1.SendAMLaunched(appAttemptId); dispatcher.Await(); Org.Apache.Hadoop.Security.Token.Token <AMRMTokenIdentifier> token = rm1.GetRMContext ().GetRMApps()[appAttemptId.GetApplicationId()].GetRMAppAttempt(appAttemptId).GetAMRMToken (); UserGroupInformation ugi = UserGroupInformation.GetCurrentUser(); ugi.AddTokenIdentifier(token.DecodeIdentifier()); // Step-1 : AMRMClient send allocate request for 2 ContainerRequest // cRequest1 = h1 and cRequest2 = h1,h2 // blacklisted nodes = h2 AMRMClient <AMRMClient.ContainerRequest> amClient = new TestAMRMClientOnRMRestart.MyAMRMClientImpl (rm1); amClient.Init(conf); amClient.Start(); amClient.RegisterApplicationMaster("Host", 10000, string.Empty); AMRMClient.ContainerRequest cRequest1 = CreateReq(1, 1024, new string[] { "h1" }); amClient.AddContainerRequest(cRequest1); AMRMClient.ContainerRequest cRequest2 = CreateReq(1, 1024, new string[] { "h1", "h2" }); amClient.AddContainerRequest(cRequest2); IList <string> blacklistAdditions = new AList <string>(); IList <string> blacklistRemoval = new AList <string>(); blacklistAdditions.AddItem("h2"); blacklistRemoval.AddItem("h10"); amClient.UpdateBlacklist(blacklistAdditions, blacklistRemoval); blacklistAdditions.Remove("h2"); // remove from local list AllocateResponse allocateResponse = amClient.Allocate(0.1f); dispatcher.Await(); NUnit.Framework.Assert.AreEqual("No of assignments must be 0", 0, allocateResponse .GetAllocatedContainers().Count); // Why 4 ask, why not 3 ask even h2 is blacklisted? // On blacklisting host,applicationmaster has to remove ask request from // remoterequest table.Here,test does not remove explicitely AssertAsksAndReleases(4, 0, rm1); AssertBlacklistAdditionsAndRemovals(1, 1, rm1); // Step-2 : NM heart beat is sent. // On 2nd AM allocate request, RM allocates 2 containers to AM nm1.NodeHeartbeat(true); // Node heartbeat dispatcher.Await(); allocateResponse = amClient.Allocate(0.2f); dispatcher.Await(); // 2 containers are allocated i.e for cRequest1 and cRequest2. NUnit.Framework.Assert.AreEqual("No of assignments must be 0", 2, allocateResponse .GetAllocatedContainers().Count); AssertAsksAndReleases(0, 0, rm1); AssertBlacklistAdditionsAndRemovals(0, 0, rm1); IList <Container> allocatedContainers = allocateResponse.GetAllocatedContainers(); // removed allocated container requests amClient.RemoveContainerRequest(cRequest1); amClient.RemoveContainerRequest(cRequest2); allocateResponse = amClient.Allocate(0.2f); dispatcher.Await(); NUnit.Framework.Assert.AreEqual("No of assignments must be 0", 0, allocateResponse .GetAllocatedContainers().Count); AssertAsksAndReleases(4, 0, rm1); AssertBlacklistAdditionsAndRemovals(0, 0, rm1); // Step-3 : Send 1 containerRequest and 1 releaseRequests to RM AMRMClient.ContainerRequest cRequest3 = CreateReq(1, 1024, new string[] { "h1" }); amClient.AddContainerRequest(cRequest3); int pendingRelease = 0; IEnumerator <Container> it = allocatedContainers.GetEnumerator(); while (it.HasNext()) { amClient.ReleaseAssignedContainer(it.Next().GetId()); pendingRelease++; it.Remove(); break; } // remove one container allocateResponse = amClient.Allocate(0.3f); dispatcher.Await(); NUnit.Framework.Assert.AreEqual("No of assignments must be 0", 0, allocateResponse .GetAllocatedContainers().Count); AssertAsksAndReleases(3, pendingRelease, rm1); AssertBlacklistAdditionsAndRemovals(0, 0, rm1); int completedContainer = allocateResponse.GetCompletedContainersStatuses().Count; pendingRelease -= completedContainer; // Phase-2 start 2nd RM is up TestAMRMClientOnRMRestart.MyResourceManager rm2 = new TestAMRMClientOnRMRestart.MyResourceManager (conf, memStore); rm2.Start(); nm1.SetResourceTrackerService(rm2.GetResourceTrackerService()); ((TestAMRMClientOnRMRestart.MyAMRMClientImpl)amClient).UpdateRMProxy(rm2); dispatcher = (DrainDispatcher)rm2.GetRMContext().GetDispatcher(); // NM should be rebooted on heartbeat, even first heartbeat for nm2 NodeHeartbeatResponse hbResponse = nm1.NodeHeartbeat(true); NUnit.Framework.Assert.AreEqual(NodeAction.Resync, hbResponse.GetNodeAction()); // new NM to represent NM re-register nm1 = new MockNM("h1:1234", 10240, rm2.GetResourceTrackerService()); nm1.RegisterNode(); nm1.NodeHeartbeat(true); dispatcher.Await(); blacklistAdditions.AddItem("h3"); amClient.UpdateBlacklist(blacklistAdditions, null); blacklistAdditions.Remove("h3"); it = allocatedContainers.GetEnumerator(); while (it.HasNext()) { amClient.ReleaseAssignedContainer(it.Next().GetId()); pendingRelease++; it.Remove(); } AMRMClient.ContainerRequest cRequest4 = CreateReq(1, 1024, new string[] { "h1", "h2" }); amClient.AddContainerRequest(cRequest4); // Step-4 : On RM restart, AM(does not know RM is restarted) sends // additional // containerRequest and blacklisted nodes. // Intern RM send resync command,AMRMClient resend allocate request allocateResponse = amClient.Allocate(0.3f); dispatcher.Await(); completedContainer = allocateResponse.GetCompletedContainersStatuses().Count; pendingRelease -= completedContainer; AssertAsksAndReleases(4, pendingRelease, rm2); AssertBlacklistAdditionsAndRemovals(2, 0, rm2); AMRMClient.ContainerRequest cRequest5 = CreateReq(1, 1024, new string[] { "h1", "h2" , "h3" }); amClient.AddContainerRequest(cRequest5); // Step-5 : Allocater after resync command allocateResponse = amClient.Allocate(0.5f); dispatcher.Await(); NUnit.Framework.Assert.AreEqual("No of assignments must be 0", 0, allocateResponse .GetAllocatedContainers().Count); AssertAsksAndReleases(5, 0, rm2); AssertBlacklistAdditionsAndRemovals(0, 0, rm2); int noAssignedContainer = 0; int count = 5; while (count-- > 0) { nm1.NodeHeartbeat(true); dispatcher.Await(); allocateResponse = amClient.Allocate(0.5f); dispatcher.Await(); noAssignedContainer += allocateResponse.GetAllocatedContainers().Count; if (noAssignedContainer == 3) { break; } Sharpen.Thread.Sleep(1000); } // Step-6 : RM allocates containers i.e cRequest3,cRequest4 and cRequest5 NUnit.Framework.Assert.AreEqual("Number of container should be 3", 3, noAssignedContainer ); amClient.Stop(); rm1.Stop(); rm2.Stop(); }