public TestNodeStatusUpdaterResync(TestNodeManagerResync _enclosing, Context context , Dispatcher dispatcher, NodeHealthCheckerService healthChecker, NodeManagerMetrics metrics) : base(context, dispatcher, healthChecker, metrics) { this._enclosing = _enclosing; }
/// <exception cref="System.Exception"/> protected override void ServiceInit(Configuration conf) { conf.SetBoolean(Dispatcher.DispatcherExitOnErrorKey, true); rmWorkPreservingRestartEnabled = conf.GetBoolean(YarnConfiguration.RmWorkPreservingRecoveryEnabled , YarnConfiguration.DefaultRmWorkPreservingRecoveryEnabled); InitAndStartRecoveryStore(conf); NMContainerTokenSecretManager containerTokenSecretManager = new NMContainerTokenSecretManager (conf, nmStore); NMTokenSecretManagerInNM nmTokenSecretManager = new NMTokenSecretManagerInNM(nmStore ); RecoverTokens(nmTokenSecretManager, containerTokenSecretManager); this.aclsManager = new ApplicationACLsManager(conf); ContainerExecutor exec = ReflectionUtils.NewInstance(conf.GetClass <ContainerExecutor >(YarnConfiguration.NmContainerExecutor, typeof(DefaultContainerExecutor)), conf ); try { exec.Init(); } catch (IOException e) { throw new YarnRuntimeException("Failed to initialize container executor", e); } DeletionService del = CreateDeletionService(exec); AddService(del); // NodeManager level dispatcher this.dispatcher = new AsyncDispatcher(); nodeHealthChecker = new NodeHealthCheckerService(); AddService(nodeHealthChecker); dirsHandler = nodeHealthChecker.GetDiskHandler(); this.context = CreateNMContext(containerTokenSecretManager, nmTokenSecretManager, nmStore); nodeStatusUpdater = CreateNodeStatusUpdater(context, dispatcher, nodeHealthChecker ); NodeResourceMonitor nodeResourceMonitor = CreateNodeResourceMonitor(); AddService(nodeResourceMonitor); containerManager = CreateContainerManager(context, exec, del, nodeStatusUpdater, this.aclsManager, dirsHandler); AddService(containerManager); ((NodeManager.NMContext)context).SetContainerManager(containerManager); WebServer webServer = CreateWebServer(context, containerManager.GetContainersMonitor (), this.aclsManager, dirsHandler); AddService(webServer); ((NodeManager.NMContext)context).SetWebServer(webServer); dispatcher.Register(typeof(ContainerManagerEventType), containerManager); dispatcher.Register(typeof(NodeManagerEventType), this); AddService(dispatcher); DefaultMetricsSystem.Initialize("NodeManager"); // StatusUpdater should be added last so that it get started last // so that we make sure everything is up before registering with RM. AddService(nodeStatusUpdater); base.ServiceInit(conf); }
public NodeStatusUpdaterImpl(Context context, Dispatcher dispatcher, NodeHealthCheckerService healthChecker, NodeManagerMetrics metrics) : base(typeof(Org.Apache.Hadoop.Yarn.Server.Nodemanager.NodeStatusUpdaterImpl).FullName ) { // It will be used to track recently stopped containers on node manager, this // is to avoid the misleading no-such-container exception messages on NM, when // the AM finishes it informs the RM to stop the may-be-already-completed // containers. // Save the reported completed containers in case of lost heartbeat responses. // These completed containers will be sent again till a successful response. // Duration for which to track recently stopped container. this.healthChecker = healthChecker; this.context = context; this.dispatcher = dispatcher; this.metrics = metrics; this.recentlyStoppedContainers = new LinkedHashMap <ContainerId, long>(); this.pendingCompletedContainers = new Dictionary <ContainerId, ContainerStatus>(); }
protected internal virtual NodeStatusUpdater CreateNodeStatusUpdater(Context context , Dispatcher dispatcher, NodeHealthCheckerService healthChecker) { return(new NodeStatusUpdaterImpl(context, dispatcher, healthChecker, metrics)); }
protected internal override NodeStatusUpdater CreateNodeStatusUpdater(Context context , Dispatcher dispatcher, NodeHealthCheckerService healthChecker) { return(new TestNodeManagerResync.TestNodeManager3.TestNodeStatusUpdaterImpl3(this , context, dispatcher, healthChecker, this.metrics)); }
public _TestNodeStatusUpdaterResync_232(_NodeManager_225 _enclosing, ContainerStatus testCompleteContainer, Org.Apache.Hadoop.Yarn.Server.Nodemanager.Containermanager.Container.Container container, Context baseArg1, Dispatcher baseArg2, NodeHealthCheckerService baseArg3 , NodeManagerMetrics baseArg4) : base(_enclosing, baseArg1, baseArg2, baseArg3, baseArg4) { this._enclosing = _enclosing; this.testCompleteContainer = testCompleteContainer; this.container = container; }
protected internal override NodeStatusUpdater CreateNodeStatusUpdater(Context context , Dispatcher dispatcher, NodeHealthCheckerService healthChecker) { return(new _TestNodeStatusUpdaterResync_232(this, testCompleteContainer, container , context, dispatcher, healthChecker, this.metrics)); }
public MockNodeStatusUpdater(Context context, Dispatcher dispatcher, NodeHealthCheckerService healthChecker, NodeManagerMetrics metrics) : base(context, dispatcher, healthChecker, metrics) { resourceTracker = CreateResourceTracker(); }
protected internal override NodeStatusUpdater CreateNodeStatusUpdater(Context context , Dispatcher dispatcher, NodeHealthCheckerService healthChecker) { MockNodeStatusUpdater myNodeStatusUpdater = new MockNodeStatusUpdater(context, dispatcher , healthChecker, this.metrics); return(myNodeStatusUpdater); }
public virtual void TestNodeHealthScript() { RecordFactory factory = RecordFactoryProvider.GetRecordFactory(null); NodeHealthStatus healthStatus = factory.NewRecordInstance <NodeHealthStatus>(); string errorScript = "echo ERROR\n echo \"Tracker not healthy\""; string normalScript = "echo \"I am all fine\""; string timeOutScript = Shell.Windows ? "@echo off\nping -n 4 127.0.0.1 >nul\necho \"I am fine\"" : "sleep 4\necho \"I am fine\""; Configuration conf = GetConfForNodeHealthScript(); conf.WriteXml(new FileOutputStream(nodeHealthConfigFile)); conf.AddResource(nodeHealthConfigFile.GetName()); WriteNodeHealthScriptFile(normalScript, true); NodeHealthCheckerService nodeHealthChecker = new NodeHealthCheckerService(); nodeHealthChecker.Init(conf); NodeHealthScriptRunner nodeHealthScriptRunner = nodeHealthChecker.GetNodeHealthScriptRunner (); TimerTask timerTask = nodeHealthScriptRunner.GetTimerTask(); timerTask.Run(); SetHealthStatus(healthStatus, nodeHealthChecker.IsHealthy(), nodeHealthChecker.GetHealthReport (), nodeHealthChecker.GetLastHealthReportTime()); Log.Info("Checking initial healthy condition"); // Check proper report conditions. NUnit.Framework.Assert.IsTrue("Node health status reported unhealthy", healthStatus .GetIsNodeHealthy()); NUnit.Framework.Assert.IsTrue("Node health status reported unhealthy", healthStatus .GetHealthReport().Equals(nodeHealthChecker.GetHealthReport())); // write out error file. // Healthy to unhealthy transition WriteNodeHealthScriptFile(errorScript, true); // Run timer timerTask.Run(); // update health status SetHealthStatus(healthStatus, nodeHealthChecker.IsHealthy(), nodeHealthChecker.GetHealthReport (), nodeHealthChecker.GetLastHealthReportTime()); Log.Info("Checking Healthy--->Unhealthy"); NUnit.Framework.Assert.IsFalse("Node health status reported healthy", healthStatus .GetIsNodeHealthy()); NUnit.Framework.Assert.IsTrue("Node health status reported healthy", healthStatus .GetHealthReport().Equals(nodeHealthChecker.GetHealthReport())); // Check unhealthy to healthy transitions. WriteNodeHealthScriptFile(normalScript, true); timerTask.Run(); SetHealthStatus(healthStatus, nodeHealthChecker.IsHealthy(), nodeHealthChecker.GetHealthReport (), nodeHealthChecker.GetLastHealthReportTime()); Log.Info("Checking UnHealthy--->healthy"); // Check proper report conditions. NUnit.Framework.Assert.IsTrue("Node health status reported unhealthy", healthStatus .GetIsNodeHealthy()); NUnit.Framework.Assert.IsTrue("Node health status reported unhealthy", healthStatus .GetHealthReport().Equals(nodeHealthChecker.GetHealthReport())); // Healthy to timeout transition. WriteNodeHealthScriptFile(timeOutScript, true); timerTask.Run(); SetHealthStatus(healthStatus, nodeHealthChecker.IsHealthy(), nodeHealthChecker.GetHealthReport (), nodeHealthChecker.GetLastHealthReportTime()); Log.Info("Checking Healthy--->timeout"); NUnit.Framework.Assert.IsFalse("Node health status reported healthy even after timeout" , healthStatus.GetIsNodeHealthy()); NUnit.Framework.Assert.IsTrue("Node script time out message not propogated", healthStatus .GetHealthReport().Equals(NodeHealthScriptRunner.NodeHealthScriptTimedOutMsg + NodeHealthCheckerService .Separator + nodeHealthChecker.GetDiskHandler().GetDisksHealthReport(false))); }
public virtual void TestSuccessfulContainerLaunch() { FileContext localFS = FileContext.GetLocalFSFileContext(); localFS.Delete(new Path(localDir.GetAbsolutePath()), true); localFS.Delete(new Path(localLogDir.GetAbsolutePath()), true); localFS.Delete(new Path(remoteLogDir.GetAbsolutePath()), true); localDir.Mkdir(); localLogDir.Mkdir(); remoteLogDir.Mkdir(); YarnConfiguration conf = new YarnConfiguration(); Context context = new _NMContext_84(new NMContainerTokenSecretManager(conf), new NMTokenSecretManagerInNM(), null, null, new NMNullStateStoreService()); conf.Set(YarnConfiguration.NmLocalDirs, localDir.GetAbsolutePath()); conf.Set(YarnConfiguration.NmLogDirs, localLogDir.GetAbsolutePath()); conf.Set(YarnConfiguration.NmRemoteAppLogDir, remoteLogDir.GetAbsolutePath()); ContainerExecutor exec = new DefaultContainerExecutor(); exec.SetConf(conf); DeletionService del = new DeletionService(exec); Dispatcher dispatcher = new AsyncDispatcher(); NodeHealthCheckerService healthChecker = new NodeHealthCheckerService(); healthChecker.Init(conf); LocalDirsHandlerService dirsHandler = healthChecker.GetDiskHandler(); NodeManagerMetrics metrics = NodeManagerMetrics.Create(); NodeStatusUpdater nodeStatusUpdater = new _NodeStatusUpdaterImpl_106(context, dispatcher , healthChecker, metrics); // Don't start any updating thread. DummyContainerManager containerManager = new DummyContainerManager(context, exec, del, nodeStatusUpdater, metrics, new ApplicationACLsManager(conf), dirsHandler); nodeStatusUpdater.Init(conf); ((NodeManager.NMContext)context).SetContainerManager(containerManager); nodeStatusUpdater.Start(); containerManager.Init(conf); containerManager.Start(); ContainerLaunchContext launchContext = recordFactory.NewRecordInstance <ContainerLaunchContext >(); ApplicationId applicationId = ApplicationId.NewInstance(0, 0); ApplicationAttemptId applicationAttemptId = ApplicationAttemptId.NewInstance(applicationId , 0); ContainerId cID = ContainerId.NewContainerId(applicationAttemptId, 0); string user = "******"; StartContainerRequest scRequest = StartContainerRequest.NewInstance(launchContext , TestContainerManager.CreateContainerToken(cID, SimulatedRmIdentifier, context. GetNodeId(), user, context.GetContainerTokenSecretManager())); IList <StartContainerRequest> list = new AList <StartContainerRequest>(); list.AddItem(scRequest); StartContainersRequest allRequests = StartContainersRequest.NewInstance(list); containerManager.StartContainers(allRequests); BaseContainerManagerTest.WaitForContainerState(containerManager, cID, ContainerState .Running); IList <ContainerId> containerIds = new AList <ContainerId>(); containerIds.AddItem(cID); StopContainersRequest stopRequest = StopContainersRequest.NewInstance(containerIds ); containerManager.StopContainers(stopRequest); BaseContainerManagerTest.WaitForContainerState(containerManager, cID, ContainerState .Complete); containerManager.Stop(); }
public _NodeStatusUpdaterImpl_106(Context baseArg1, Dispatcher baseArg2, NodeHealthCheckerService baseArg3, NodeManagerMetrics baseArg4) : base(baseArg1, baseArg2, baseArg3, baseArg4) { }