public virtual void TestNodeUpdateBeforeAppAttemptInit() { FifoScheduler scheduler = new FifoScheduler(); MockRM rm = new MockRM(conf); scheduler.SetRMContext(rm.GetRMContext()); scheduler.Init(conf); scheduler.Start(); scheduler.Reinitialize(conf, rm.GetRMContext()); RMNode node = MockNodes.NewNodeInfo(1, Resources.CreateResource(1024, 4), 1, "127.0.0.1" ); scheduler.Handle(new NodeAddedSchedulerEvent(node)); ApplicationId appId = ApplicationId.NewInstance(0, 1); scheduler.AddApplication(appId, "queue1", "user1", false); NodeUpdateSchedulerEvent updateEvent = new NodeUpdateSchedulerEvent(node); try { scheduler.Handle(updateEvent); } catch (ArgumentNullException) { NUnit.Framework.Assert.Fail(); } ApplicationAttemptId attId = ApplicationAttemptId.NewInstance(appId, 1); scheduler.AddApplicationAttempt(attId, false, false); rm.Stop(); }
/// <exception cref="System.Exception"/> public virtual void TestSchedulingDelay() { // Add one node string host = "127.0.0.1"; RMNode node1 = MockNodes.NewNodeInfo(1, Resources.CreateResource(4096, 4), 1, host ); NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1); scheduler.Handle(nodeEvent1); NodeUpdateSchedulerEvent nodeUpdateEvent = new NodeUpdateSchedulerEvent(node1); scheduler.Handle(nodeUpdateEvent); // Create one application and submit one each of node-local, rack-local // and ANY requests ApplicationAttemptId appAttemptId = CreateAppAttemptId(this.AppId++, this.AttemptId ++); CreateMockRMApp(appAttemptId); scheduler.AddApplication(appAttemptId.GetApplicationId(), "queue11", "user11", false ); scheduler.AddApplicationAttempt(appAttemptId, false, false); IList <ResourceRequest> ask = new AList <ResourceRequest>(); ask.AddItem(CreateResourceRequest(1024, 1, ResourceRequest.Any, 1, 1, true)); scheduler.Allocate(appAttemptId, ask, new AList <ContainerId>(), null, null); FSAppAttempt app = scheduler.GetSchedulerApp(appAttemptId); // Advance time and let continuous scheduling kick in mockClock.Tick(1); while (1024 != app.GetCurrentConsumption().GetMemory()) { Sharpen.Thread.Sleep(100); } NUnit.Framework.Assert.AreEqual(1024, app.GetCurrentConsumption().GetMemory()); }
private void NodeUpdate(NodeManager nm1) { RMNode node = resourceManager.GetRMContext().GetRMNodes()[nm1.GetNodeId()]; // Send a heartbeat to kick the tires on the Scheduler NodeUpdateSchedulerEvent nodeUpdate = new NodeUpdateSchedulerEvent(node); resourceManager.GetResourceScheduler().Handle(nodeUpdate); }
/// <exception cref="System.Exception"/> public virtual void Test() { conf.Set(FairSchedulerConfiguration.AllocationFile, AllocFile); PrintWriter @out = new PrintWriter(new FileWriter(AllocFile)); @out.WriteLine("<?xml version=\"1.0\"?>"); @out.WriteLine("<allocations>"); @out.WriteLine("<queue name=\"queueA\">"); @out.WriteLine("<minResources>2048mb,0vcores</minResources>"); @out.WriteLine("</queue>"); @out.WriteLine("<queue name=\"queueB\">"); @out.WriteLine("<minResources>2048mb,0vcores</minResources>"); @out.WriteLine("</queue>"); @out.WriteLine("</allocations>"); @out.Close(); resourceManager = new MockRM(conf); resourceManager.Start(); scheduler = (FairScheduler)resourceManager.GetResourceScheduler(); // Add one big node (only care about aggregate capacity) RMNode node1 = MockNodes.NewNodeInfo(1, Resources.CreateResource(4 * 1024, 4), 1, "127.0.0.1"); NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1); scheduler.Handle(nodeEvent1); scheduler.Update(); // Queue A wants 3 * 1024. Node update gives this all to A CreateSchedulingRequest(3 * 1024, "queueA", "user1"); scheduler.Update(); NodeUpdateSchedulerEvent nodeEvent2 = new NodeUpdateSchedulerEvent(node1); scheduler.Handle(nodeEvent2); // Queue B arrives and wants 1 * 1024 CreateSchedulingRequest(1 * 1024, "queueB", "user1"); scheduler.Update(); ICollection <FSLeafQueue> queues = scheduler.GetQueueManager().GetLeafQueues(); NUnit.Framework.Assert.AreEqual(3, queues.Count); // Queue A should be above min share, B below. FSLeafQueue queueA = scheduler.GetQueueManager().GetLeafQueue("queueA", false); FSLeafQueue queueB = scheduler.GetQueueManager().GetLeafQueue("queueB", false); NUnit.Framework.Assert.IsFalse(queueA.IsStarvedForMinShare()); NUnit.Framework.Assert.IsTrue(queueB.IsStarvedForMinShare()); // Node checks in again, should allocate for B scheduler.Handle(nodeEvent2); // Now B should have min share ( = demand here) NUnit.Framework.Assert.IsFalse(queueB.IsStarvedForMinShare()); }
private void RegisterNodeAndSubmitApp(int memory, int vcores, int appContainers, int appMemory) { RMNode node1 = MockNodes.NewNodeInfo(1, Resources.CreateResource(memory, vcores), 1, "node1"); NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1); scheduler.Handle(nodeEvent1); NUnit.Framework.Assert.AreEqual("Incorrect amount of resources in the cluster", memory , scheduler.rootMetrics.GetAvailableMB()); NUnit.Framework.Assert.AreEqual("Incorrect amount of resources in the cluster", vcores , scheduler.rootMetrics.GetAvailableVirtualCores()); CreateSchedulingRequest(appMemory, "queueA", "user1", appContainers); scheduler.Update(); // Sufficient node check-ins to fully schedule containers for (int i = 0; i < 3; i++) { NodeUpdateSchedulerEvent nodeUpdate1 = new NodeUpdateSchedulerEvent(node1); scheduler.Handle(nodeUpdate1); } NUnit.Framework.Assert.AreEqual("app1's request is not met", memory - appContainers * appMemory, scheduler.rootMetrics.GetAvailableMB()); }
public override void Handle(SchedulerEvent @event) { switch (@event.GetType()) { case SchedulerEventType.NodeAdded: { NodeAddedSchedulerEvent nodeAddedEvent = (NodeAddedSchedulerEvent)@event; AddNode(nodeAddedEvent.GetAddedRMNode()); RecoverContainersOnNode(nodeAddedEvent.GetContainerReports(), nodeAddedEvent.GetAddedRMNode ()); break; } case SchedulerEventType.NodeRemoved: { NodeRemovedSchedulerEvent nodeRemovedEvent = (NodeRemovedSchedulerEvent)@event; RemoveNode(nodeRemovedEvent.GetRemovedRMNode()); break; } case SchedulerEventType.NodeResourceUpdate: { NodeResourceUpdateSchedulerEvent nodeResourceUpdatedEvent = (NodeResourceUpdateSchedulerEvent )@event; UpdateNodeResource(nodeResourceUpdatedEvent.GetRMNode(), nodeResourceUpdatedEvent .GetResourceOption()); break; } case SchedulerEventType.NodeUpdate: { NodeUpdateSchedulerEvent nodeUpdatedEvent = (NodeUpdateSchedulerEvent)@event; NodeUpdate(nodeUpdatedEvent.GetRMNode()); break; } case SchedulerEventType.AppAdded: { AppAddedSchedulerEvent appAddedEvent = (AppAddedSchedulerEvent)@event; AddApplication(appAddedEvent.GetApplicationId(), appAddedEvent.GetQueue(), appAddedEvent .GetUser(), appAddedEvent.GetIsAppRecovering()); break; } case SchedulerEventType.AppRemoved: { AppRemovedSchedulerEvent appRemovedEvent = (AppRemovedSchedulerEvent)@event; DoneApplication(appRemovedEvent.GetApplicationID(), appRemovedEvent.GetFinalState ()); break; } case SchedulerEventType.AppAttemptAdded: { AppAttemptAddedSchedulerEvent appAttemptAddedEvent = (AppAttemptAddedSchedulerEvent )@event; AddApplicationAttempt(appAttemptAddedEvent.GetApplicationAttemptId(), appAttemptAddedEvent .GetTransferStateFromPreviousAttempt(), appAttemptAddedEvent.GetIsAttemptRecovering ()); break; } case SchedulerEventType.AppAttemptRemoved: { AppAttemptRemovedSchedulerEvent appAttemptRemovedEvent = (AppAttemptRemovedSchedulerEvent )@event; try { DoneApplicationAttempt(appAttemptRemovedEvent.GetApplicationAttemptID(), appAttemptRemovedEvent .GetFinalAttemptState(), appAttemptRemovedEvent.GetKeepContainersAcrossAppAttempts ()); } catch (IOException ie) { Log.Error("Unable to remove application " + appAttemptRemovedEvent.GetApplicationAttemptID (), ie); } break; } case SchedulerEventType.ContainerExpired: { ContainerExpiredSchedulerEvent containerExpiredEvent = (ContainerExpiredSchedulerEvent )@event; ContainerId containerid = containerExpiredEvent.GetContainerId(); CompletedContainer(GetRMContainer(containerid), SchedulerUtils.CreateAbnormalContainerStatus (containerid, SchedulerUtils.ExpiredContainer), RMContainerEventType.Expire); break; } case SchedulerEventType.ContainerRescheduled: { ContainerRescheduledEvent containerRescheduledEvent = (ContainerRescheduledEvent) @event; RMContainer container = containerRescheduledEvent.GetContainer(); RecoverResourceRequestForContainer(container); break; } default: { Log.Error("Invalid eventtype " + @event.GetType() + ". Ignoring!"); break; } } }
/// <exception cref="System.Exception"/> public virtual void TestIsStarvedForFairShare() { conf.Set(FairSchedulerConfiguration.AllocationFile, AllocFile); PrintWriter @out = new PrintWriter(new FileWriter(AllocFile)); @out.WriteLine("<?xml version=\"1.0\"?>"); @out.WriteLine("<allocations>"); @out.WriteLine("<queue name=\"queueA\">"); @out.WriteLine("<weight>.2</weight>"); @out.WriteLine("</queue>"); @out.WriteLine("<queue name=\"queueB\">"); @out.WriteLine("<weight>.8</weight>"); @out.WriteLine("<fairSharePreemptionThreshold>.4</fairSharePreemptionThreshold>"); @out.WriteLine("<queue name=\"queueB1\">"); @out.WriteLine("</queue>"); @out.WriteLine("<queue name=\"queueB2\">"); @out.WriteLine("<fairSharePreemptionThreshold>.6</fairSharePreemptionThreshold>"); @out.WriteLine("</queue>"); @out.WriteLine("</queue>"); @out.WriteLine("<defaultFairSharePreemptionThreshold>.5</defaultFairSharePreemptionThreshold>" ); @out.WriteLine("</allocations>"); @out.Close(); resourceManager = new MockRM(conf); resourceManager.Start(); scheduler = (FairScheduler)resourceManager.GetResourceScheduler(); // Add one big node (only care about aggregate capacity) RMNode node1 = MockNodes.NewNodeInfo(1, Resources.CreateResource(10 * 1024, 10), 1, "127.0.0.1"); NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1); scheduler.Handle(nodeEvent1); scheduler.Update(); // Queue A wants 4 * 1024. Node update gives this all to A CreateSchedulingRequest(1 * 1024, "queueA", "user1", 4); scheduler.Update(); NodeUpdateSchedulerEvent nodeEvent2 = new NodeUpdateSchedulerEvent(node1); for (int i = 0; i < 4; i++) { scheduler.Handle(nodeEvent2); } QueueManager queueMgr = scheduler.GetQueueManager(); FSLeafQueue queueA = queueMgr.GetLeafQueue("queueA", false); NUnit.Framework.Assert.AreEqual(4 * 1024, queueA.GetResourceUsage().GetMemory()); // Both queue B1 and queue B2 want 3 * 1024 CreateSchedulingRequest(1 * 1024, "queueB.queueB1", "user1", 3); CreateSchedulingRequest(1 * 1024, "queueB.queueB2", "user1", 3); scheduler.Update(); for (int i_1 = 0; i_1 < 4; i_1++) { scheduler.Handle(nodeEvent2); } FSLeafQueue queueB1 = queueMgr.GetLeafQueue("queueB.queueB1", false); FSLeafQueue queueB2 = queueMgr.GetLeafQueue("queueB.queueB2", false); NUnit.Framework.Assert.AreEqual(2 * 1024, queueB1.GetResourceUsage().GetMemory()); NUnit.Framework.Assert.AreEqual(2 * 1024, queueB2.GetResourceUsage().GetMemory()); // For queue B1, the fairSharePreemptionThreshold is 0.4, and the fair share // threshold is 1.6 * 1024 NUnit.Framework.Assert.IsFalse(queueB1.IsStarvedForFairShare()); // For queue B2, the fairSharePreemptionThreshold is 0.6, and the fair share // threshold is 2.4 * 1024 NUnit.Framework.Assert.IsTrue(queueB2.IsStarvedForFairShare()); // Node checks in again scheduler.Handle(nodeEvent2); scheduler.Handle(nodeEvent2); NUnit.Framework.Assert.AreEqual(3 * 1024, queueB1.GetResourceUsage().GetMemory()); NUnit.Framework.Assert.AreEqual(3 * 1024, queueB2.GetResourceUsage().GetMemory()); // Both queue B1 and queue B2 usages go to 3 * 1024 NUnit.Framework.Assert.IsFalse(queueB1.IsStarvedForFairShare()); NUnit.Framework.Assert.IsFalse(queueB2.IsStarvedForFairShare()); }
/// <exception cref="System.Exception"/> public virtual void TestUpdateResourceOnNode() { AsyncDispatcher dispatcher = new InlineDispatcher(); Configuration conf = new Configuration(); RMContainerTokenSecretManager containerTokenSecretManager = new RMContainerTokenSecretManager (conf); containerTokenSecretManager.RollMasterKey(); NMTokenSecretManagerInRM nmTokenSecretManager = new NMTokenSecretManagerInRM(conf ); nmTokenSecretManager.RollMasterKey(); RMApplicationHistoryWriter writer = Org.Mockito.Mockito.Mock <RMApplicationHistoryWriter >(); FifoScheduler scheduler = new _FifoScheduler_275(this); RMContext rmContext = new RMContextImpl(dispatcher, null, null, null, null, null, containerTokenSecretManager, nmTokenSecretManager, null, scheduler); rmContext.SetSystemMetricsPublisher(Org.Mockito.Mockito.Mock <SystemMetricsPublisher >()); rmContext.SetRMApplicationHistoryWriter(Org.Mockito.Mockito.Mock <RMApplicationHistoryWriter >()); ((RMContextImpl)rmContext).SetYarnConfiguration(new YarnConfiguration()); scheduler.SetRMContext(rmContext); scheduler.Init(conf); scheduler.Start(); scheduler.Reinitialize(new Configuration(), rmContext); RMNode node0 = MockNodes.NewNodeInfo(1, Resources.CreateResource(2048, 4), 1, "127.0.0.1" ); NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node0); scheduler.Handle(nodeEvent1); MethodInfo method = Sharpen.Runtime.GetDeclaredMethod(scheduler.GetType(), "getNodes" ); IDictionary <NodeId, FiCaSchedulerNode> schedulerNodes = (IDictionary <NodeId, FiCaSchedulerNode >)method.Invoke(scheduler); NUnit.Framework.Assert.AreEqual(schedulerNodes.Values.Count, 1); Org.Apache.Hadoop.Yarn.Api.Records.Resource newResource = Resources.CreateResource (1024, 4); NodeResourceUpdateSchedulerEvent node0ResourceUpdate = new NodeResourceUpdateSchedulerEvent (node0, ResourceOption.NewInstance(newResource, RMNode.OverCommitTimeoutMillisDefault )); scheduler.Handle(node0ResourceUpdate); // SchedulerNode's total resource and available resource are changed. NUnit.Framework.Assert.AreEqual(schedulerNodes[node0.GetNodeID()].GetTotalResource ().GetMemory(), 1024); NUnit.Framework.Assert.AreEqual(schedulerNodes[node0.GetNodeID()].GetAvailableResource ().GetMemory(), 1024); QueueInfo queueInfo = scheduler.GetQueueInfo(null, false, false); NUnit.Framework.Assert.AreEqual(0.0f, queueInfo.GetCurrentCapacity(), 0.0f); int _appId = 1; int _appAttemptId = 1; ApplicationAttemptId appAttemptId = CreateAppAttemptId(_appId, _appAttemptId); CreateMockRMApp(appAttemptId, rmContext); AppAddedSchedulerEvent appEvent = new AppAddedSchedulerEvent(appAttemptId.GetApplicationId (), "queue1", "user1"); scheduler.Handle(appEvent); AppAttemptAddedSchedulerEvent attemptEvent = new AppAttemptAddedSchedulerEvent(appAttemptId , false); scheduler.Handle(attemptEvent); int memory = 1024; int priority = 1; IList <ResourceRequest> ask = new AList <ResourceRequest>(); ResourceRequest nodeLocal = CreateResourceRequest(memory, node0.GetHostName(), priority , 1); ResourceRequest rackLocal = CreateResourceRequest(memory, node0.GetRackName(), priority , 1); ResourceRequest any = CreateResourceRequest(memory, ResourceRequest.Any, priority , 1); ask.AddItem(nodeLocal); ask.AddItem(rackLocal); ask.AddItem(any); scheduler.Allocate(appAttemptId, ask, new AList <ContainerId>(), null, null); // Before the node update event, there are one local request NUnit.Framework.Assert.AreEqual(1, nodeLocal.GetNumContainers()); NodeUpdateSchedulerEvent node0Update = new NodeUpdateSchedulerEvent(node0); // Now schedule. scheduler.Handle(node0Update); // After the node update event, check no local request NUnit.Framework.Assert.AreEqual(0, nodeLocal.GetNumContainers()); // Also check that one container was scheduled SchedulerAppReport info = scheduler.GetSchedulerAppInfo(appAttemptId); NUnit.Framework.Assert.AreEqual(1, info.GetLiveContainers().Count); // And check the default Queue now is full. queueInfo = scheduler.GetQueueInfo(null, false, false); NUnit.Framework.Assert.AreEqual(1.0f, queueInfo.GetCurrentCapacity(), 0.0f); }
/// <exception cref="System.Exception"/> public virtual void TestNodeLocalAssignment() { AsyncDispatcher dispatcher = new InlineDispatcher(); Configuration conf = new Configuration(); RMContainerTokenSecretManager containerTokenSecretManager = new RMContainerTokenSecretManager (conf); containerTokenSecretManager.RollMasterKey(); NMTokenSecretManagerInRM nmTokenSecretManager = new NMTokenSecretManagerInRM(conf ); nmTokenSecretManager.RollMasterKey(); RMApplicationHistoryWriter writer = Org.Mockito.Mockito.Mock <RMApplicationHistoryWriter >(); FifoScheduler scheduler = new FifoScheduler(); RMContext rmContext = new RMContextImpl(dispatcher, null, null, null, null, null, containerTokenSecretManager, nmTokenSecretManager, null, scheduler); rmContext.SetSystemMetricsPublisher(Org.Mockito.Mockito.Mock <SystemMetricsPublisher >()); rmContext.SetRMApplicationHistoryWriter(Org.Mockito.Mockito.Mock <RMApplicationHistoryWriter >()); ((RMContextImpl)rmContext).SetYarnConfiguration(new YarnConfiguration()); scheduler.SetRMContext(rmContext); scheduler.Init(conf); scheduler.Start(); scheduler.Reinitialize(new Configuration(), rmContext); RMNode node0 = MockNodes.NewNodeInfo(1, Resources.CreateResource(1024 * 64), 1, "127.0.0.1" ); NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node0); scheduler.Handle(nodeEvent1); int _appId = 1; int _appAttemptId = 1; ApplicationAttemptId appAttemptId = CreateAppAttemptId(_appId, _appAttemptId); CreateMockRMApp(appAttemptId, rmContext); AppAddedSchedulerEvent appEvent = new AppAddedSchedulerEvent(appAttemptId.GetApplicationId (), "queue1", "user1"); scheduler.Handle(appEvent); AppAttemptAddedSchedulerEvent attemptEvent = new AppAttemptAddedSchedulerEvent(appAttemptId , false); scheduler.Handle(attemptEvent); int memory = 64; int nConts = 3; int priority = 20; IList <ResourceRequest> ask = new AList <ResourceRequest>(); ResourceRequest nodeLocal = CreateResourceRequest(memory, node0.GetHostName(), priority , nConts); ResourceRequest rackLocal = CreateResourceRequest(memory, node0.GetRackName(), priority , nConts); ResourceRequest any = CreateResourceRequest(memory, ResourceRequest.Any, priority , nConts); ask.AddItem(nodeLocal); ask.AddItem(rackLocal); ask.AddItem(any); scheduler.Allocate(appAttemptId, ask, new AList <ContainerId>(), null, null); NodeUpdateSchedulerEvent node0Update = new NodeUpdateSchedulerEvent(node0); // Before the node update event, there are 3 local requests outstanding NUnit.Framework.Assert.AreEqual(3, nodeLocal.GetNumContainers()); scheduler.Handle(node0Update); // After the node update event, check that there are no more local requests // outstanding NUnit.Framework.Assert.AreEqual(0, nodeLocal.GetNumContainers()); //Also check that the containers were scheduled SchedulerAppReport info = scheduler.GetSchedulerAppInfo(appAttemptId); NUnit.Framework.Assert.AreEqual(3, info.GetLiveContainers().Count); scheduler.Stop(); }