/// <exception cref="System.Exception"/> public virtual void TestResourceTrackerOnHA() { NodeId nodeId = NodeId.NewInstance("localhost", 0); Resource resource = Resource.NewInstance(2048, 4); // make sure registerNodeManager works when failover happens RegisterNodeManagerRequest request = RegisterNodeManagerRequest.NewInstance(nodeId , 0, resource, YarnVersionInfo.GetVersion(), null, null); resourceTracker.RegisterNodeManager(request); NUnit.Framework.Assert.IsTrue(WaitForNodeManagerToConnect(10000, nodeId)); // restart the failover thread, and make sure nodeHeartbeat works failoverThread = CreateAndStartFailoverThread(); NodeStatus status = NodeStatus.NewInstance(NodeId.NewInstance("localhost", 0), 0, null, null, null); NodeHeartbeatRequest request2 = NodeHeartbeatRequest.NewInstance(status, null, null ); resourceTracker.NodeHeartbeat(request2); }
public void Run() { int lastHeartBeatID = 0; while (!this._enclosing.isStopped) { try { NodeHeartbeatResponse response = null; NodeStatus nodeStatus = this._enclosing.GetNodeStatus(lastHeartBeatID); NodeHeartbeatRequest request = NodeHeartbeatRequest.NewInstance(nodeStatus, this. _enclosing.context.GetContainerTokenSecretManager().GetCurrentKey(), this._enclosing .context.GetNMTokenSecretManager().GetCurrentKey()); response = this._enclosing.resourceTracker.NodeHeartbeat(request); this._enclosing.nextHeartBeatInterval = response.GetNextHeartBeatInterval(); this.UpdateMasterKeys(response); if (response.GetNodeAction() == NodeAction.Shutdown) { Org.Apache.Hadoop.Yarn.Server.Nodemanager.NodeStatusUpdaterImpl.Log.Warn("Recieved SHUTDOWN signal from Resourcemanager as part of heartbeat," + " hence shutting down."); Org.Apache.Hadoop.Yarn.Server.Nodemanager.NodeStatusUpdaterImpl.Log.Warn("Message from ResourceManager: " + response.GetDiagnosticsMessage()); this._enclosing.context.SetDecommissioned(true); this._enclosing.dispatcher.GetEventHandler().Handle(new NodeManagerEvent(NodeManagerEventType .Shutdown)); break; } if (response.GetNodeAction() == NodeAction.Resync) { Org.Apache.Hadoop.Yarn.Server.Nodemanager.NodeStatusUpdaterImpl.Log.Warn("Node is out of sync with ResourceManager," + " hence resyncing."); Org.Apache.Hadoop.Yarn.Server.Nodemanager.NodeStatusUpdaterImpl.Log.Warn("Message from ResourceManager: " + response.GetDiagnosticsMessage()); this._enclosing.rmIdentifier = ResourceManagerConstants.RmInvalidIdentifier; this._enclosing.dispatcher.GetEventHandler().Handle(new NodeManagerEvent(NodeManagerEventType .Resync)); this._enclosing.pendingCompletedContainers.Clear(); break; } this._enclosing.RemoveOrTrackCompletedContainersFromContext(response.GetContainersToBeRemovedFromNM ()); lastHeartBeatID = response.GetResponseId(); IList <ContainerId> containersToCleanup = response.GetContainersToCleanup(); if (!containersToCleanup.IsEmpty()) { this._enclosing.dispatcher.GetEventHandler().Handle(new CMgrCompletedContainersEvent (containersToCleanup, CMgrCompletedContainersEvent.Reason.ByResourcemanager)); } IList <ApplicationId> appsToCleanup = response.GetApplicationsToCleanup(); this._enclosing.TrackAppsForKeepAlive(appsToCleanup); if (!appsToCleanup.IsEmpty()) { this._enclosing.dispatcher.GetEventHandler().Handle(new CMgrCompletedAppsEvent(appsToCleanup , CMgrCompletedAppsEvent.Reason.ByResourcemanager)); } IDictionary <ApplicationId, ByteBuffer> systemCredentials = response.GetSystemCredentialsForApps (); if (systemCredentials != null && !systemCredentials.IsEmpty()) { ((NodeManager.NMContext) this._enclosing.context).SetSystemCrendentialsForApps(Org.Apache.Hadoop.Yarn.Server.Nodemanager.NodeStatusUpdaterImpl .ParseCredentials(systemCredentials)); } } catch (ConnectException e) { this._enclosing.dispatcher.GetEventHandler().Handle(new NodeManagerEvent(NodeManagerEventType .Shutdown)); throw new YarnRuntimeException(e); } catch (Exception e) { Org.Apache.Hadoop.Yarn.Server.Nodemanager.NodeStatusUpdaterImpl.Log.Error("Caught exception in status-updater" , e); } finally { lock (this._enclosing.heartbeatMonitor) { this._enclosing.nextHeartBeatInterval = this._enclosing.nextHeartBeatInterval <= 0 ? YarnConfiguration.DefaultRmNmHeartbeatIntervalMs : this._enclosing.nextHeartBeatInterval; try { Sharpen.Runtime.Wait(this._enclosing.heartbeatMonitor, this._enclosing.nextHeartBeatInterval ); } catch (Exception) { } } } } }