/// <exception cref="System.Exception"/> protected override void ServiceInit(Configuration conf) { conf.SetBoolean(Dispatcher.DispatcherExitOnErrorKey, true); rmWorkPreservingRestartEnabled = conf.GetBoolean(YarnConfiguration.RmWorkPreservingRecoveryEnabled , YarnConfiguration.DefaultRmWorkPreservingRecoveryEnabled); InitAndStartRecoveryStore(conf); NMContainerTokenSecretManager containerTokenSecretManager = new NMContainerTokenSecretManager (conf, nmStore); NMTokenSecretManagerInNM nmTokenSecretManager = new NMTokenSecretManagerInNM(nmStore ); RecoverTokens(nmTokenSecretManager, containerTokenSecretManager); this.aclsManager = new ApplicationACLsManager(conf); ContainerExecutor exec = ReflectionUtils.NewInstance(conf.GetClass <ContainerExecutor >(YarnConfiguration.NmContainerExecutor, typeof(DefaultContainerExecutor)), conf ); try { exec.Init(); } catch (IOException e) { throw new YarnRuntimeException("Failed to initialize container executor", e); } DeletionService del = CreateDeletionService(exec); AddService(del); // NodeManager level dispatcher this.dispatcher = new AsyncDispatcher(); nodeHealthChecker = new NodeHealthCheckerService(); AddService(nodeHealthChecker); dirsHandler = nodeHealthChecker.GetDiskHandler(); this.context = CreateNMContext(containerTokenSecretManager, nmTokenSecretManager, nmStore); nodeStatusUpdater = CreateNodeStatusUpdater(context, dispatcher, nodeHealthChecker ); NodeResourceMonitor nodeResourceMonitor = CreateNodeResourceMonitor(); AddService(nodeResourceMonitor); containerManager = CreateContainerManager(context, exec, del, nodeStatusUpdater, this.aclsManager, dirsHandler); AddService(containerManager); ((NodeManager.NMContext)context).SetContainerManager(containerManager); WebServer webServer = CreateWebServer(context, containerManager.GetContainersMonitor (), this.aclsManager, dirsHandler); AddService(webServer); ((NodeManager.NMContext)context).SetWebServer(webServer); dispatcher.Register(typeof(ContainerManagerEventType), containerManager); dispatcher.Register(typeof(NodeManagerEventType), this); AddService(dispatcher); DefaultMetricsSystem.Initialize("NodeManager"); // StatusUpdater should be added last so that it get started last // so that we make sure everything is up before registering with RM. AddService(nodeStatusUpdater); base.ServiceInit(conf); }
public virtual void TestSchedulerEventDispatcherForPreemptionEvents() { AsyncDispatcher rmDispatcher = new AsyncDispatcher(); CapacityScheduler sched = Org.Mockito.Mockito.Spy(new CapacityScheduler()); YarnConfiguration conf = new YarnConfiguration(); ResourceManager.SchedulerEventDispatcher schedulerDispatcher = new ResourceManager.SchedulerEventDispatcher (sched); rmDispatcher.Register(typeof(SchedulerEventType), schedulerDispatcher); rmDispatcher.Init(conf); rmDispatcher.Start(); schedulerDispatcher.Init(conf); schedulerDispatcher.Start(); try { ApplicationAttemptId appAttemptId = Org.Mockito.Mockito.Mock <ApplicationAttemptId >(); RMContainer container = Org.Mockito.Mockito.Mock <RMContainer>(); ContainerPreemptEvent event1 = new ContainerPreemptEvent(appAttemptId, container, SchedulerEventType.DropReservation); rmDispatcher.GetEventHandler().Handle(event1); ContainerPreemptEvent event2 = new ContainerPreemptEvent(appAttemptId, container, SchedulerEventType.KillContainer); rmDispatcher.GetEventHandler().Handle(event2); ContainerPreemptEvent event3 = new ContainerPreemptEvent(appAttemptId, container, SchedulerEventType.PreemptContainer); rmDispatcher.GetEventHandler().Handle(event3); // Wait for events to be processed by scheduler dispatcher. Sharpen.Thread.Sleep(1000); Org.Mockito.Mockito.Verify(sched, Org.Mockito.Mockito.Times(3)).Handle(Matchers.Any <SchedulerEvent>()); Org.Mockito.Mockito.Verify(sched).DropContainerReservation(container); Org.Mockito.Mockito.Verify(sched).PreemptContainer(appAttemptId, container); Org.Mockito.Mockito.Verify(sched).KillContainer(container); } catch (Exception) { NUnit.Framework.Assert.Fail(); } finally { schedulerDispatcher.Stop(); rmDispatcher.Stop(); } }
// this has to be at least as much as map slot requirement // This is a huge kluge. The real implementations have a decent approach private void CoreTestEstimator(TaskRuntimeEstimator testedEstimator, int expectedSpeculations ) { estimator = testedEstimator; clock = new TestRuntimeEstimators.MockClock(); dispatcher = new AsyncDispatcher(); myJob = null; slotsInUse.Set(0); completedMaps.Set(0); completedReduces.Set(0); successfulSpeculations.Set(0); taskTimeSavedBySpeculation.Set(0); clock.AdvanceTime(1000); Configuration conf = new Configuration(); myAppContext = new TestRuntimeEstimators.MyAppContext(this, MapTasks, ReduceTasks ); myJob = myAppContext.GetAllJobs().Values.GetEnumerator().Next(); estimator.Contextualize(conf, myAppContext); conf.SetLong(MRJobConfig.SpeculativeRetryAfterNoSpeculate, 500L); conf.SetLong(MRJobConfig.SpeculativeRetryAfterSpeculate, 5000L); conf.SetDouble(MRJobConfig.SpeculativecapRunningTasks, 0.1); conf.SetDouble(MRJobConfig.SpeculativecapTotalTasks, 0.001); conf.SetInt(MRJobConfig.SpeculativeMinimumAllowedTasks, 5); speculator = new DefaultSpeculator(conf, myAppContext, estimator, clock); NUnit.Framework.Assert.AreEqual("wrong SPECULATIVE_RETRY_AFTER_NO_SPECULATE value" , 500L, speculator.GetSoonestRetryAfterNoSpeculate()); NUnit.Framework.Assert.AreEqual("wrong SPECULATIVE_RETRY_AFTER_SPECULATE value", 5000L, speculator.GetSoonestRetryAfterSpeculate()); NUnit.Framework.Assert.AreEqual(speculator.GetProportionRunningTasksSpeculatable( ), 0.1, 0.00001); NUnit.Framework.Assert.AreEqual(speculator.GetProportionTotalTasksSpeculatable(), 0.001, 0.00001); NUnit.Framework.Assert.AreEqual("wrong SPECULATIVE_MINIMUM_ALLOWED_TASKS value", 5, speculator.GetMinimumAllowedSpeculativeTasks()); dispatcher.Register(typeof(Speculator.EventType), speculator); dispatcher.Register(typeof(TaskEventType), new TestRuntimeEstimators.SpeculationRequestEventHandler (this)); dispatcher.Init(conf); dispatcher.Start(); speculator.Init(conf); speculator.Start(); // Now that the plumbing is hooked up, we do the following: // do until all tasks are finished, ... // 1: If we have spare capacity, assign as many map tasks as we can, then // assign as many reduce tasks as we can. Note that an odd reduce // task might be started while there are still map tasks, because // map tasks take 3 slots and reduce tasks 2 slots. // 2: Send a speculation event for every task attempt that's running // note that new attempts might get started by the speculator // discover undone tasks int undoneMaps = MapTasks; int undoneReduces = ReduceTasks; // build a task sequence where all the maps precede any of the reduces IList <Task> allTasksSequence = new List <Task>(); Sharpen.Collections.AddAll(allTasksSequence, myJob.GetTasks(TaskType.Map).Values); Sharpen.Collections.AddAll(allTasksSequence, myJob.GetTasks(TaskType.Reduce).Values ); while (undoneMaps + undoneReduces > 0) { undoneMaps = 0; undoneReduces = 0; // start all attempts which are new but for which there is enough slots foreach (Task task in allTasksSequence) { if (!task.IsFinished()) { if (task.GetType() == TaskType.Map) { ++undoneMaps; } else { ++undoneReduces; } } foreach (TaskAttempt attempt in task.GetAttempts().Values) { if (attempt.GetState() == TaskAttemptState.New && InitialNumberFreeSlots - slotsInUse .Get() >= TaskTypeSlots(task.GetType())) { TestRuntimeEstimators.MyTaskAttemptImpl attemptImpl = (TestRuntimeEstimators.MyTaskAttemptImpl )attempt; SpeculatorEvent @event = new SpeculatorEvent(attempt.GetID(), false, clock.GetTime ()); speculator.Handle(@event); attemptImpl.StartUp(); } else { // If a task attempt is in progress we should send the news to // the Speculator. TaskAttemptStatusUpdateEvent.TaskAttemptStatus status = new TaskAttemptStatusUpdateEvent.TaskAttemptStatus (); status.id = attempt.GetID(); status.progress = attempt.GetProgress(); status.stateString = attempt.GetState().ToString(); status.taskState = attempt.GetState(); SpeculatorEvent @event = new SpeculatorEvent(status, clock.GetTime()); speculator.Handle(@event); } } } long startTime = Runtime.CurrentTimeMillis(); // drain the speculator event queue while (!speculator.EventQueueEmpty()) { Sharpen.Thread.Yield(); if (Runtime.CurrentTimeMillis() > startTime + 130000) { return; } } clock.AdvanceTime(1000L); if (clock.GetTime() % 10000L == 0L) { speculator.ScanForSpeculations(); } } NUnit.Framework.Assert.AreEqual("We got the wrong number of successful speculations." , expectedSpeculations, successfulSpeculations.Get()); }
public virtual void TestCommitWindow() { Configuration conf = new Configuration(); conf.Set(MRJobConfig.MrAmStagingDir, stagingDir); AsyncDispatcher dispatcher = new AsyncDispatcher(); dispatcher.Init(conf); dispatcher.Start(); TestCommitterEventHandler.TestingJobEventHandler jeh = new TestCommitterEventHandler.TestingJobEventHandler (); dispatcher.Register(typeof(JobEventType), jeh); SystemClock clock = new SystemClock(); AppContext appContext = Org.Mockito.Mockito.Mock <AppContext>(); ApplicationAttemptId attemptid = ConverterUtils.ToApplicationAttemptId("appattempt_1234567890000_0001_0" ); Org.Mockito.Mockito.When(appContext.GetApplicationID()).ThenReturn(attemptid.GetApplicationId ()); Org.Mockito.Mockito.When(appContext.GetApplicationAttemptId()).ThenReturn(attemptid ); Org.Mockito.Mockito.When(appContext.GetEventHandler()).ThenReturn(dispatcher.GetEventHandler ()); Org.Mockito.Mockito.When(appContext.GetClock()).ThenReturn(clock); OutputCommitter committer = Org.Mockito.Mockito.Mock <OutputCommitter>(); TestCommitterEventHandler.TestingRMHeartbeatHandler rmhh = new TestCommitterEventHandler.TestingRMHeartbeatHandler (); CommitterEventHandler ceh = new CommitterEventHandler(appContext, committer, rmhh ); ceh.Init(conf); ceh.Start(); // verify trying to commit when RM heartbeats are stale does not commit ceh.Handle(new CommitterJobCommitEvent(null, null)); long timeToWaitMs = 5000; while (rmhh.GetNumCallbacks() != 1 && timeToWaitMs > 0) { Sharpen.Thread.Sleep(10); timeToWaitMs -= 10; } NUnit.Framework.Assert.AreEqual("committer did not register a heartbeat callback" , 1, rmhh.GetNumCallbacks()); Org.Mockito.Mockito.Verify(committer, Org.Mockito.Mockito.Never()).CommitJob(Matchers.Any <JobContext>()); NUnit.Framework.Assert.AreEqual("committer should not have committed", 0, jeh.numCommitCompletedEvents ); // set a fresh heartbeat and verify commit completes rmhh.SetLastHeartbeatTime(clock.GetTime()); timeToWaitMs = 5000; while (jeh.numCommitCompletedEvents != 1 && timeToWaitMs > 0) { Sharpen.Thread.Sleep(10); timeToWaitMs -= 10; } NUnit.Framework.Assert.AreEqual("committer did not complete commit after RM hearbeat" , 1, jeh.numCommitCompletedEvents); Org.Mockito.Mockito.Verify(committer, Org.Mockito.Mockito.Times(1)).CommitJob(Matchers.Any <JobContext>()); //Clean up so we can try to commit again (Don't do this at home) Cleanup(); // try to commit again and verify it goes through since the heartbeat // is still fresh ceh.Handle(new CommitterJobCommitEvent(null, null)); timeToWaitMs = 5000; while (jeh.numCommitCompletedEvents != 2 && timeToWaitMs > 0) { Sharpen.Thread.Sleep(10); timeToWaitMs -= 10; } NUnit.Framework.Assert.AreEqual("committer did not commit", 2, jeh.numCommitCompletedEvents ); Org.Mockito.Mockito.Verify(committer, Org.Mockito.Mockito.Times(2)).CommitJob(Matchers.Any <JobContext>()); ceh.Stop(); dispatcher.Stop(); }