示例#1
0
 public TestNodeStatusUpdaterResync(TestNodeManagerResync _enclosing, Context context
                                    , Dispatcher dispatcher, NodeHealthCheckerService healthChecker, NodeManagerMetrics
                                    metrics)
     : base(context, dispatcher, healthChecker, metrics)
 {
     this._enclosing = _enclosing;
 }
示例#2
0
        /// <exception cref="System.Exception"/>
        protected override void ServiceInit(Configuration conf)
        {
            conf.SetBoolean(Dispatcher.DispatcherExitOnErrorKey, true);
            rmWorkPreservingRestartEnabled = conf.GetBoolean(YarnConfiguration.RmWorkPreservingRecoveryEnabled
                                                             , YarnConfiguration.DefaultRmWorkPreservingRecoveryEnabled);
            InitAndStartRecoveryStore(conf);
            NMContainerTokenSecretManager containerTokenSecretManager = new NMContainerTokenSecretManager
                                                                            (conf, nmStore);
            NMTokenSecretManagerInNM nmTokenSecretManager = new NMTokenSecretManagerInNM(nmStore
                                                                                         );

            RecoverTokens(nmTokenSecretManager, containerTokenSecretManager);
            this.aclsManager = new ApplicationACLsManager(conf);
            ContainerExecutor exec = ReflectionUtils.NewInstance(conf.GetClass <ContainerExecutor
                                                                                >(YarnConfiguration.NmContainerExecutor, typeof(DefaultContainerExecutor)), conf
                                                                 );

            try
            {
                exec.Init();
            }
            catch (IOException e)
            {
                throw new YarnRuntimeException("Failed to initialize container executor", e);
            }
            DeletionService del = CreateDeletionService(exec);

            AddService(del);
            // NodeManager level dispatcher
            this.dispatcher   = new AsyncDispatcher();
            nodeHealthChecker = new NodeHealthCheckerService();
            AddService(nodeHealthChecker);
            dirsHandler  = nodeHealthChecker.GetDiskHandler();
            this.context = CreateNMContext(containerTokenSecretManager, nmTokenSecretManager,
                                           nmStore);
            nodeStatusUpdater = CreateNodeStatusUpdater(context, dispatcher, nodeHealthChecker
                                                        );
            NodeResourceMonitor nodeResourceMonitor = CreateNodeResourceMonitor();

            AddService(nodeResourceMonitor);
            containerManager = CreateContainerManager(context, exec, del, nodeStatusUpdater,
                                                      this.aclsManager, dirsHandler);
            AddService(containerManager);
            ((NodeManager.NMContext)context).SetContainerManager(containerManager);
            WebServer webServer = CreateWebServer(context, containerManager.GetContainersMonitor
                                                      (), this.aclsManager, dirsHandler);

            AddService(webServer);
            ((NodeManager.NMContext)context).SetWebServer(webServer);
            dispatcher.Register(typeof(ContainerManagerEventType), containerManager);
            dispatcher.Register(typeof(NodeManagerEventType), this);
            AddService(dispatcher);
            DefaultMetricsSystem.Initialize("NodeManager");
            // StatusUpdater should be added last so that it get started last
            // so that we make sure everything is up before registering with RM.
            AddService(nodeStatusUpdater);
            base.ServiceInit(conf);
        }
示例#3
0
 public NodeStatusUpdaterImpl(Context context, Dispatcher dispatcher, NodeHealthCheckerService
                              healthChecker, NodeManagerMetrics metrics)
     : base(typeof(Org.Apache.Hadoop.Yarn.Server.Nodemanager.NodeStatusUpdaterImpl).FullName
            )
 {
     // It will be used to track recently stopped containers on node manager, this
     // is to avoid the misleading no-such-container exception messages on NM, when
     // the AM finishes it informs the RM to stop the may-be-already-completed
     // containers.
     // Save the reported completed containers in case of lost heartbeat responses.
     // These completed containers will be sent again till a successful response.
     // Duration for which to track recently stopped container.
     this.healthChecker              = healthChecker;
     this.context                    = context;
     this.dispatcher                 = dispatcher;
     this.metrics                    = metrics;
     this.recentlyStoppedContainers  = new LinkedHashMap <ContainerId, long>();
     this.pendingCompletedContainers = new Dictionary <ContainerId, ContainerStatus>();
 }
示例#4
0
 protected internal virtual NodeStatusUpdater CreateNodeStatusUpdater(Context context
                                                                      , Dispatcher dispatcher, NodeHealthCheckerService healthChecker)
 {
     return(new NodeStatusUpdaterImpl(context, dispatcher, healthChecker, metrics));
 }
示例#5
0
 protected internal override NodeStatusUpdater CreateNodeStatusUpdater(Context context
                                                                       , Dispatcher dispatcher, NodeHealthCheckerService healthChecker)
 {
     return(new TestNodeManagerResync.TestNodeManager3.TestNodeStatusUpdaterImpl3(this
                                                                                  , context, dispatcher, healthChecker, this.metrics));
 }
示例#6
0
 public _TestNodeStatusUpdaterResync_232(_NodeManager_225 _enclosing, ContainerStatus
                                         testCompleteContainer, Org.Apache.Hadoop.Yarn.Server.Nodemanager.Containermanager.Container.Container
                                         container, Context baseArg1, Dispatcher baseArg2, NodeHealthCheckerService baseArg3
                                         , NodeManagerMetrics baseArg4)
     : base(_enclosing, baseArg1, baseArg2, baseArg3, baseArg4)
 {
     this._enclosing            = _enclosing;
     this.testCompleteContainer = testCompleteContainer;
     this.container             = container;
 }
示例#7
0
 protected internal override NodeStatusUpdater CreateNodeStatusUpdater(Context context
                                                                       , Dispatcher dispatcher, NodeHealthCheckerService healthChecker)
 {
     return(new _TestNodeStatusUpdaterResync_232(this, testCompleteContainer, container
                                                 , context, dispatcher, healthChecker, this.metrics));
 }
 public MockNodeStatusUpdater(Context context, Dispatcher dispatcher, NodeHealthCheckerService
                              healthChecker, NodeManagerMetrics metrics)
     : base(context, dispatcher, healthChecker, metrics)
 {
     resourceTracker = CreateResourceTracker();
 }
示例#9
0
            protected internal override NodeStatusUpdater CreateNodeStatusUpdater(Context context
                                                                                  , Dispatcher dispatcher, NodeHealthCheckerService healthChecker)
            {
                MockNodeStatusUpdater myNodeStatusUpdater = new MockNodeStatusUpdater(context, dispatcher
                                                                                      , healthChecker, this.metrics);

                return(myNodeStatusUpdater);
            }
        public virtual void TestNodeHealthScript()
        {
            RecordFactory    factory       = RecordFactoryProvider.GetRecordFactory(null);
            NodeHealthStatus healthStatus  = factory.NewRecordInstance <NodeHealthStatus>();
            string           errorScript   = "echo ERROR\n echo \"Tracker not healthy\"";
            string           normalScript  = "echo \"I am all fine\"";
            string           timeOutScript = Shell.Windows ? "@echo off\nping -n 4 127.0.0.1 >nul\necho \"I am fine\""
                                 : "sleep 4\necho \"I am fine\"";
            Configuration conf = GetConfForNodeHealthScript();

            conf.WriteXml(new FileOutputStream(nodeHealthConfigFile));
            conf.AddResource(nodeHealthConfigFile.GetName());
            WriteNodeHealthScriptFile(normalScript, true);
            NodeHealthCheckerService nodeHealthChecker = new NodeHealthCheckerService();

            nodeHealthChecker.Init(conf);
            NodeHealthScriptRunner nodeHealthScriptRunner = nodeHealthChecker.GetNodeHealthScriptRunner
                                                                ();
            TimerTask timerTask = nodeHealthScriptRunner.GetTimerTask();

            timerTask.Run();
            SetHealthStatus(healthStatus, nodeHealthChecker.IsHealthy(), nodeHealthChecker.GetHealthReport
                                (), nodeHealthChecker.GetLastHealthReportTime());
            Log.Info("Checking initial healthy condition");
            // Check proper report conditions.
            NUnit.Framework.Assert.IsTrue("Node health status reported unhealthy", healthStatus
                                          .GetIsNodeHealthy());
            NUnit.Framework.Assert.IsTrue("Node health status reported unhealthy", healthStatus
                                          .GetHealthReport().Equals(nodeHealthChecker.GetHealthReport()));
            // write out error file.
            // Healthy to unhealthy transition
            WriteNodeHealthScriptFile(errorScript, true);
            // Run timer
            timerTask.Run();
            // update health status
            SetHealthStatus(healthStatus, nodeHealthChecker.IsHealthy(), nodeHealthChecker.GetHealthReport
                                (), nodeHealthChecker.GetLastHealthReportTime());
            Log.Info("Checking Healthy--->Unhealthy");
            NUnit.Framework.Assert.IsFalse("Node health status reported healthy", healthStatus
                                           .GetIsNodeHealthy());
            NUnit.Framework.Assert.IsTrue("Node health status reported healthy", healthStatus
                                          .GetHealthReport().Equals(nodeHealthChecker.GetHealthReport()));
            // Check unhealthy to healthy transitions.
            WriteNodeHealthScriptFile(normalScript, true);
            timerTask.Run();
            SetHealthStatus(healthStatus, nodeHealthChecker.IsHealthy(), nodeHealthChecker.GetHealthReport
                                (), nodeHealthChecker.GetLastHealthReportTime());
            Log.Info("Checking UnHealthy--->healthy");
            // Check proper report conditions.
            NUnit.Framework.Assert.IsTrue("Node health status reported unhealthy", healthStatus
                                          .GetIsNodeHealthy());
            NUnit.Framework.Assert.IsTrue("Node health status reported unhealthy", healthStatus
                                          .GetHealthReport().Equals(nodeHealthChecker.GetHealthReport()));
            // Healthy to timeout transition.
            WriteNodeHealthScriptFile(timeOutScript, true);
            timerTask.Run();
            SetHealthStatus(healthStatus, nodeHealthChecker.IsHealthy(), nodeHealthChecker.GetHealthReport
                                (), nodeHealthChecker.GetLastHealthReportTime());
            Log.Info("Checking Healthy--->timeout");
            NUnit.Framework.Assert.IsFalse("Node health status reported healthy even after timeout"
                                           , healthStatus.GetIsNodeHealthy());
            NUnit.Framework.Assert.IsTrue("Node script time out message not propogated", healthStatus
                                          .GetHealthReport().Equals(NodeHealthScriptRunner.NodeHealthScriptTimedOutMsg + NodeHealthCheckerService
                                                                    .Separator + nodeHealthChecker.GetDiskHandler().GetDisksHealthReport(false)));
        }
示例#11
0
        public virtual void TestSuccessfulContainerLaunch()
        {
            FileContext localFS = FileContext.GetLocalFSFileContext();

            localFS.Delete(new Path(localDir.GetAbsolutePath()), true);
            localFS.Delete(new Path(localLogDir.GetAbsolutePath()), true);
            localFS.Delete(new Path(remoteLogDir.GetAbsolutePath()), true);
            localDir.Mkdir();
            localLogDir.Mkdir();
            remoteLogDir.Mkdir();
            YarnConfiguration conf    = new YarnConfiguration();
            Context           context = new _NMContext_84(new NMContainerTokenSecretManager(conf), new
                                                          NMTokenSecretManagerInNM(), null, null, new NMNullStateStoreService());

            conf.Set(YarnConfiguration.NmLocalDirs, localDir.GetAbsolutePath());
            conf.Set(YarnConfiguration.NmLogDirs, localLogDir.GetAbsolutePath());
            conf.Set(YarnConfiguration.NmRemoteAppLogDir, remoteLogDir.GetAbsolutePath());
            ContainerExecutor exec = new DefaultContainerExecutor();

            exec.SetConf(conf);
            DeletionService          del           = new DeletionService(exec);
            Dispatcher               dispatcher    = new AsyncDispatcher();
            NodeHealthCheckerService healthChecker = new NodeHealthCheckerService();

            healthChecker.Init(conf);
            LocalDirsHandlerService dirsHandler       = healthChecker.GetDiskHandler();
            NodeManagerMetrics      metrics           = NodeManagerMetrics.Create();
            NodeStatusUpdater       nodeStatusUpdater = new _NodeStatusUpdaterImpl_106(context, dispatcher
                                                                                       , healthChecker, metrics);
            // Don't start any updating thread.
            DummyContainerManager containerManager = new DummyContainerManager(context, exec,
                                                                               del, nodeStatusUpdater, metrics, new ApplicationACLsManager(conf), dirsHandler);

            nodeStatusUpdater.Init(conf);
            ((NodeManager.NMContext)context).SetContainerManager(containerManager);
            nodeStatusUpdater.Start();
            containerManager.Init(conf);
            containerManager.Start();
            ContainerLaunchContext launchContext = recordFactory.NewRecordInstance <ContainerLaunchContext
                                                                                    >();
            ApplicationId        applicationId        = ApplicationId.NewInstance(0, 0);
            ApplicationAttemptId applicationAttemptId = ApplicationAttemptId.NewInstance(applicationId
                                                                                         , 0);
            ContainerId           cID       = ContainerId.NewContainerId(applicationAttemptId, 0);
            string                user      = "******";
            StartContainerRequest scRequest = StartContainerRequest.NewInstance(launchContext
                                                                                , TestContainerManager.CreateContainerToken(cID, SimulatedRmIdentifier, context.
                                                                                                                            GetNodeId(), user, context.GetContainerTokenSecretManager()));
            IList <StartContainerRequest> list = new AList <StartContainerRequest>();

            list.AddItem(scRequest);
            StartContainersRequest allRequests = StartContainersRequest.NewInstance(list);

            containerManager.StartContainers(allRequests);
            BaseContainerManagerTest.WaitForContainerState(containerManager, cID, ContainerState
                                                           .Running);
            IList <ContainerId> containerIds = new AList <ContainerId>();

            containerIds.AddItem(cID);
            StopContainersRequest stopRequest = StopContainersRequest.NewInstance(containerIds
                                                                                  );

            containerManager.StopContainers(stopRequest);
            BaseContainerManagerTest.WaitForContainerState(containerManager, cID, ContainerState
                                                           .Complete);
            containerManager.Stop();
        }
示例#12
0
 public _NodeStatusUpdaterImpl_106(Context baseArg1, Dispatcher baseArg2, NodeHealthCheckerService
                                   baseArg3, NodeManagerMetrics baseArg4)
     : base(baseArg1, baseArg2, baseArg3, baseArg4)
 {
 }