Example #1
0
        /// <exception cref="System.Exception"/>
        protected override void ServiceInit(Configuration conf)
        {
            conf.SetBoolean(Dispatcher.DispatcherExitOnErrorKey, true);
            rmWorkPreservingRestartEnabled = conf.GetBoolean(YarnConfiguration.RmWorkPreservingRecoveryEnabled
                                                             , YarnConfiguration.DefaultRmWorkPreservingRecoveryEnabled);
            InitAndStartRecoveryStore(conf);
            NMContainerTokenSecretManager containerTokenSecretManager = new NMContainerTokenSecretManager
                                                                            (conf, nmStore);
            NMTokenSecretManagerInNM nmTokenSecretManager = new NMTokenSecretManagerInNM(nmStore
                                                                                         );

            RecoverTokens(nmTokenSecretManager, containerTokenSecretManager);
            this.aclsManager = new ApplicationACLsManager(conf);
            ContainerExecutor exec = ReflectionUtils.NewInstance(conf.GetClass <ContainerExecutor
                                                                                >(YarnConfiguration.NmContainerExecutor, typeof(DefaultContainerExecutor)), conf
                                                                 );

            try
            {
                exec.Init();
            }
            catch (IOException e)
            {
                throw new YarnRuntimeException("Failed to initialize container executor", e);
            }
            DeletionService del = CreateDeletionService(exec);

            AddService(del);
            // NodeManager level dispatcher
            this.dispatcher   = new AsyncDispatcher();
            nodeHealthChecker = new NodeHealthCheckerService();
            AddService(nodeHealthChecker);
            dirsHandler  = nodeHealthChecker.GetDiskHandler();
            this.context = CreateNMContext(containerTokenSecretManager, nmTokenSecretManager,
                                           nmStore);
            nodeStatusUpdater = CreateNodeStatusUpdater(context, dispatcher, nodeHealthChecker
                                                        );
            NodeResourceMonitor nodeResourceMonitor = CreateNodeResourceMonitor();

            AddService(nodeResourceMonitor);
            containerManager = CreateContainerManager(context, exec, del, nodeStatusUpdater,
                                                      this.aclsManager, dirsHandler);
            AddService(containerManager);
            ((NodeManager.NMContext)context).SetContainerManager(containerManager);
            WebServer webServer = CreateWebServer(context, containerManager.GetContainersMonitor
                                                      (), this.aclsManager, dirsHandler);

            AddService(webServer);
            ((NodeManager.NMContext)context).SetWebServer(webServer);
            dispatcher.Register(typeof(ContainerManagerEventType), containerManager);
            dispatcher.Register(typeof(NodeManagerEventType), this);
            AddService(dispatcher);
            DefaultMetricsSystem.Initialize("NodeManager");
            // StatusUpdater should be added last so that it get started last
            // so that we make sure everything is up before registering with RM.
            AddService(nodeStatusUpdater);
            base.ServiceInit(conf);
        }
Example #2
0
        public virtual void TestSchedulerEventDispatcherForPreemptionEvents()
        {
            AsyncDispatcher   rmDispatcher = new AsyncDispatcher();
            CapacityScheduler sched        = Org.Mockito.Mockito.Spy(new CapacityScheduler());
            YarnConfiguration conf         = new YarnConfiguration();

            ResourceManager.SchedulerEventDispatcher schedulerDispatcher = new ResourceManager.SchedulerEventDispatcher
                                                                               (sched);
            rmDispatcher.Register(typeof(SchedulerEventType), schedulerDispatcher);
            rmDispatcher.Init(conf);
            rmDispatcher.Start();
            schedulerDispatcher.Init(conf);
            schedulerDispatcher.Start();
            try
            {
                ApplicationAttemptId appAttemptId = Org.Mockito.Mockito.Mock <ApplicationAttemptId
                                                                              >();
                RMContainer           container = Org.Mockito.Mockito.Mock <RMContainer>();
                ContainerPreemptEvent event1    = new ContainerPreemptEvent(appAttemptId, container,
                                                                            SchedulerEventType.DropReservation);
                rmDispatcher.GetEventHandler().Handle(event1);
                ContainerPreemptEvent event2 = new ContainerPreemptEvent(appAttemptId, container,
                                                                         SchedulerEventType.KillContainer);
                rmDispatcher.GetEventHandler().Handle(event2);
                ContainerPreemptEvent event3 = new ContainerPreemptEvent(appAttemptId, container,
                                                                         SchedulerEventType.PreemptContainer);
                rmDispatcher.GetEventHandler().Handle(event3);
                // Wait for events to be processed by scheduler dispatcher.
                Sharpen.Thread.Sleep(1000);
                Org.Mockito.Mockito.Verify(sched, Org.Mockito.Mockito.Times(3)).Handle(Matchers.Any
                                                                                       <SchedulerEvent>());
                Org.Mockito.Mockito.Verify(sched).DropContainerReservation(container);
                Org.Mockito.Mockito.Verify(sched).PreemptContainer(appAttemptId, container);
                Org.Mockito.Mockito.Verify(sched).KillContainer(container);
            }
            catch (Exception)
            {
                NUnit.Framework.Assert.Fail();
            }
            finally
            {
                schedulerDispatcher.Stop();
                rmDispatcher.Stop();
            }
        }
Example #3
0
        // this has to be at least as much as map slot requirement
        // This is a huge kluge.  The real implementations have a decent approach
        private void CoreTestEstimator(TaskRuntimeEstimator testedEstimator, int expectedSpeculations
                                       )
        {
            estimator  = testedEstimator;
            clock      = new TestRuntimeEstimators.MockClock();
            dispatcher = new AsyncDispatcher();
            myJob      = null;
            slotsInUse.Set(0);
            completedMaps.Set(0);
            completedReduces.Set(0);
            successfulSpeculations.Set(0);
            taskTimeSavedBySpeculation.Set(0);
            clock.AdvanceTime(1000);
            Configuration conf = new Configuration();

            myAppContext = new TestRuntimeEstimators.MyAppContext(this, MapTasks, ReduceTasks
                                                                  );
            myJob = myAppContext.GetAllJobs().Values.GetEnumerator().Next();
            estimator.Contextualize(conf, myAppContext);
            conf.SetLong(MRJobConfig.SpeculativeRetryAfterNoSpeculate, 500L);
            conf.SetLong(MRJobConfig.SpeculativeRetryAfterSpeculate, 5000L);
            conf.SetDouble(MRJobConfig.SpeculativecapRunningTasks, 0.1);
            conf.SetDouble(MRJobConfig.SpeculativecapTotalTasks, 0.001);
            conf.SetInt(MRJobConfig.SpeculativeMinimumAllowedTasks, 5);
            speculator = new DefaultSpeculator(conf, myAppContext, estimator, clock);
            NUnit.Framework.Assert.AreEqual("wrong SPECULATIVE_RETRY_AFTER_NO_SPECULATE value"
                                            , 500L, speculator.GetSoonestRetryAfterNoSpeculate());
            NUnit.Framework.Assert.AreEqual("wrong SPECULATIVE_RETRY_AFTER_SPECULATE value",
                                            5000L, speculator.GetSoonestRetryAfterSpeculate());
            NUnit.Framework.Assert.AreEqual(speculator.GetProportionRunningTasksSpeculatable(
                                                ), 0.1, 0.00001);
            NUnit.Framework.Assert.AreEqual(speculator.GetProportionTotalTasksSpeculatable(),
                                            0.001, 0.00001);
            NUnit.Framework.Assert.AreEqual("wrong SPECULATIVE_MINIMUM_ALLOWED_TASKS value",
                                            5, speculator.GetMinimumAllowedSpeculativeTasks());
            dispatcher.Register(typeof(Speculator.EventType), speculator);
            dispatcher.Register(typeof(TaskEventType), new TestRuntimeEstimators.SpeculationRequestEventHandler
                                    (this));
            dispatcher.Init(conf);
            dispatcher.Start();
            speculator.Init(conf);
            speculator.Start();
            // Now that the plumbing is hooked up, we do the following:
            //  do until all tasks are finished, ...
            //  1: If we have spare capacity, assign as many map tasks as we can, then
            //     assign as many reduce tasks as we can.  Note that an odd reduce
            //     task might be started while there are still map tasks, because
            //     map tasks take 3 slots and reduce tasks 2 slots.
            //  2: Send a speculation event for every task attempt that's running
            //  note that new attempts might get started by the speculator
            // discover undone tasks
            int undoneMaps    = MapTasks;
            int undoneReduces = ReduceTasks;
            // build a task sequence where all the maps precede any of the reduces
            IList <Task> allTasksSequence = new List <Task>();

            Sharpen.Collections.AddAll(allTasksSequence, myJob.GetTasks(TaskType.Map).Values);
            Sharpen.Collections.AddAll(allTasksSequence, myJob.GetTasks(TaskType.Reduce).Values
                                       );
            while (undoneMaps + undoneReduces > 0)
            {
                undoneMaps    = 0;
                undoneReduces = 0;
                // start all attempts which are new but for which there is enough slots
                foreach (Task task in allTasksSequence)
                {
                    if (!task.IsFinished())
                    {
                        if (task.GetType() == TaskType.Map)
                        {
                            ++undoneMaps;
                        }
                        else
                        {
                            ++undoneReduces;
                        }
                    }
                    foreach (TaskAttempt attempt in task.GetAttempts().Values)
                    {
                        if (attempt.GetState() == TaskAttemptState.New && InitialNumberFreeSlots - slotsInUse
                            .Get() >= TaskTypeSlots(task.GetType()))
                        {
                            TestRuntimeEstimators.MyTaskAttemptImpl attemptImpl = (TestRuntimeEstimators.MyTaskAttemptImpl
                                                                                   )attempt;
                            SpeculatorEvent @event = new SpeculatorEvent(attempt.GetID(), false, clock.GetTime
                                                                             ());
                            speculator.Handle(@event);
                            attemptImpl.StartUp();
                        }
                        else
                        {
                            // If a task attempt is in progress we should send the news to
                            // the Speculator.
                            TaskAttemptStatusUpdateEvent.TaskAttemptStatus status = new TaskAttemptStatusUpdateEvent.TaskAttemptStatus
                                                                                        ();
                            status.id          = attempt.GetID();
                            status.progress    = attempt.GetProgress();
                            status.stateString = attempt.GetState().ToString();
                            status.taskState   = attempt.GetState();
                            SpeculatorEvent @event = new SpeculatorEvent(status, clock.GetTime());
                            speculator.Handle(@event);
                        }
                    }
                }
                long startTime = Runtime.CurrentTimeMillis();
                // drain the speculator event queue
                while (!speculator.EventQueueEmpty())
                {
                    Sharpen.Thread.Yield();
                    if (Runtime.CurrentTimeMillis() > startTime + 130000)
                    {
                        return;
                    }
                }
                clock.AdvanceTime(1000L);
                if (clock.GetTime() % 10000L == 0L)
                {
                    speculator.ScanForSpeculations();
                }
            }
            NUnit.Framework.Assert.AreEqual("We got the wrong number of successful speculations."
                                            , expectedSpeculations, successfulSpeculations.Get());
        }
Example #4
0
        public virtual void TestCommitWindow()
        {
            Configuration conf = new Configuration();

            conf.Set(MRJobConfig.MrAmStagingDir, stagingDir);
            AsyncDispatcher dispatcher = new AsyncDispatcher();

            dispatcher.Init(conf);
            dispatcher.Start();
            TestCommitterEventHandler.TestingJobEventHandler jeh = new TestCommitterEventHandler.TestingJobEventHandler
                                                                       ();
            dispatcher.Register(typeof(JobEventType), jeh);
            SystemClock          clock      = new SystemClock();
            AppContext           appContext = Org.Mockito.Mockito.Mock <AppContext>();
            ApplicationAttemptId attemptid  = ConverterUtils.ToApplicationAttemptId("appattempt_1234567890000_0001_0"
                                                                                    );

            Org.Mockito.Mockito.When(appContext.GetApplicationID()).ThenReturn(attemptid.GetApplicationId
                                                                                   ());
            Org.Mockito.Mockito.When(appContext.GetApplicationAttemptId()).ThenReturn(attemptid
                                                                                      );
            Org.Mockito.Mockito.When(appContext.GetEventHandler()).ThenReturn(dispatcher.GetEventHandler
                                                                                  ());
            Org.Mockito.Mockito.When(appContext.GetClock()).ThenReturn(clock);
            OutputCommitter committer = Org.Mockito.Mockito.Mock <OutputCommitter>();

            TestCommitterEventHandler.TestingRMHeartbeatHandler rmhh = new TestCommitterEventHandler.TestingRMHeartbeatHandler
                                                                           ();
            CommitterEventHandler ceh = new CommitterEventHandler(appContext, committer, rmhh
                                                                  );

            ceh.Init(conf);
            ceh.Start();
            // verify trying to commit when RM heartbeats are stale does not commit
            ceh.Handle(new CommitterJobCommitEvent(null, null));
            long timeToWaitMs = 5000;

            while (rmhh.GetNumCallbacks() != 1 && timeToWaitMs > 0)
            {
                Sharpen.Thread.Sleep(10);
                timeToWaitMs -= 10;
            }
            NUnit.Framework.Assert.AreEqual("committer did not register a heartbeat callback"
                                            , 1, rmhh.GetNumCallbacks());
            Org.Mockito.Mockito.Verify(committer, Org.Mockito.Mockito.Never()).CommitJob(Matchers.Any
                                                                                         <JobContext>());
            NUnit.Framework.Assert.AreEqual("committer should not have committed", 0, jeh.numCommitCompletedEvents
                                            );
            // set a fresh heartbeat and verify commit completes
            rmhh.SetLastHeartbeatTime(clock.GetTime());
            timeToWaitMs = 5000;
            while (jeh.numCommitCompletedEvents != 1 && timeToWaitMs > 0)
            {
                Sharpen.Thread.Sleep(10);
                timeToWaitMs -= 10;
            }
            NUnit.Framework.Assert.AreEqual("committer did not complete commit after RM hearbeat"
                                            , 1, jeh.numCommitCompletedEvents);
            Org.Mockito.Mockito.Verify(committer, Org.Mockito.Mockito.Times(1)).CommitJob(Matchers.Any
                                                                                          <JobContext>());
            //Clean up so we can try to commit again (Don't do this at home)
            Cleanup();
            // try to commit again and verify it goes through since the heartbeat
            // is still fresh
            ceh.Handle(new CommitterJobCommitEvent(null, null));
            timeToWaitMs = 5000;
            while (jeh.numCommitCompletedEvents != 2 && timeToWaitMs > 0)
            {
                Sharpen.Thread.Sleep(10);
                timeToWaitMs -= 10;
            }
            NUnit.Framework.Assert.AreEqual("committer did not commit", 2, jeh.numCommitCompletedEvents
                                            );
            Org.Mockito.Mockito.Verify(committer, Org.Mockito.Mockito.Times(2)).CommitJob(Matchers.Any
                                                                                          <JobContext>());
            ceh.Stop();
            dispatcher.Stop();
        }