/// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> /// <exception cref="System.IO.IOException"/> public override NodeHeartbeatResponse NodeHeartbeat(NodeHeartbeatRequest request) { this._enclosing.ResetStartFailoverFlag(true); // make sure failover has been triggered NUnit.Framework.Assert.IsTrue(this._enclosing.WaittingForFailOver()); return(base.NodeHeartbeat(request)); }
public override void Run() { int lastResponseID = 0; while (!this._enclosing.stopT) { try { NodeStatus nodeStatus = TestNMExpiry.recordFactory.NewRecordInstance <NodeStatus>( ); nodeStatus.SetNodeId(this._enclosing.request3.GetNodeId()); nodeStatus.SetResponseId(lastResponseID); nodeStatus.SetNodeHealthStatus(TestNMExpiry.recordFactory.NewRecordInstance <NodeHealthStatus >()); nodeStatus.GetNodeHealthStatus().SetIsNodeHealthy(true); NodeHeartbeatRequest request = TestNMExpiry.recordFactory.NewRecordInstance <NodeHeartbeatRequest >(); request.SetNodeStatus(nodeStatus); lastResponseID = this._enclosing.resourceTrackerService.NodeHeartbeat(request).GetResponseId (); Sharpen.Thread.Sleep(1000); } catch (Exception e) { TestNMExpiry.Log.Info("failed to heartbeat ", e); } } }
/// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> /// <exception cref="System.IO.IOException"/> public virtual NodeHeartbeatResponse NodeHeartbeat(NodeHeartbeatRequest request) { NodeHeartbeatResponse response = recordFactory.NewRecordInstance <NodeHeartbeatResponse >(); return(response); }
/// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> /// <exception cref="System.IO.IOException"/> public virtual NodeHeartbeatResponse NodeHeartbeat(NodeHeartbeatRequest request) { if (exception) { throw new YarnException("testMessage"); } return(recordFactory.NewRecordInstance <NodeHeartbeatResponse>()); }
/// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> /// <exception cref="System.IO.IOException"/> public virtual NodeHeartbeatResponse NodeHeartbeat(NodeHeartbeatRequest request) { NodeStatus nodeStatus = request.GetNodeStatus(); Log.Info("Got heartbeat number " + heartBeatID); nodeStatus.SetResponseId(heartBeatID++); NodeHeartbeatResponse nhResponse = YarnServerBuilderUtils.NewNodeHeartbeatResponse (heartBeatID, null, null, null, null, null, 1000L); return(nhResponse); }
/// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> /// <exception cref="System.IO.IOException"/> public virtual NodeHeartbeatResponse NodeHeartbeat(NodeHeartbeatRequest request) { YarnServerCommonServiceProtos.NodeHeartbeatRequestProto requestProto = ((NodeHeartbeatRequestPBImpl )request).GetProto(); try { return(new NodeHeartbeatResponsePBImpl(proxy.NodeHeartbeat(null, requestProto))); } catch (ServiceException e) { RPCUtil.UnwrapAndThrowException(e); return(null); } }
/// <exception cref="System.IO.IOException"/> /// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> public virtual void Heartbeat() { NodeStatus nodeStatus = Org.Apache.Hadoop.Yarn.Server.Resourcemanager.NodeManager .CreateNodeStatus(nodeId, GetContainerStatuses(containers)); nodeStatus.SetResponseId(responseID); NodeHeartbeatRequest request = recordFactory.NewRecordInstance <NodeHeartbeatRequest >(); request.SetNodeStatus(nodeStatus); NodeHeartbeatResponse response = resourceTrackerService.NodeHeartbeat(request); responseID = response.GetResponseId(); }
/// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> /// <exception cref="System.IO.IOException"/> public NodeHeartbeatResponse NodeHeartbeat(NodeHeartbeatRequest request) { NodeHeartbeatResponse response; try { response = rt.NodeHeartbeat(request); } catch (YarnException e) { MiniYARNCluster.Log.Info("Exception in heartbeat from node " + request.GetNodeStatus ().GetNodeId(), e); throw; } return(response); }
public virtual void TestRPCResponseId() { string node = "localhost"; Resource capability = BuilderUtils.NewResource(1024, 1); RegisterNodeManagerRequest request = recordFactory.NewRecordInstance <RegisterNodeManagerRequest >(); nodeId = NodeId.NewInstance(node, 1234); request.SetNodeId(nodeId); request.SetHttpPort(0); request.SetResource(capability); RegisterNodeManagerRequest request1 = recordFactory.NewRecordInstance <RegisterNodeManagerRequest >(); request1.SetNodeId(nodeId); request1.SetHttpPort(0); request1.SetResource(capability); resourceTrackerService.RegisterNodeManager(request1); NodeStatus nodeStatus = recordFactory.NewRecordInstance <NodeStatus>(); nodeStatus.SetNodeId(nodeId); NodeHealthStatus nodeHealthStatus = recordFactory.NewRecordInstance <NodeHealthStatus >(); nodeHealthStatus.SetIsNodeHealthy(true); nodeStatus.SetNodeHealthStatus(nodeHealthStatus); NodeHeartbeatRequest nodeHeartBeatRequest = recordFactory.NewRecordInstance <NodeHeartbeatRequest >(); nodeHeartBeatRequest.SetNodeStatus(nodeStatus); nodeStatus.SetResponseId(0); NodeHeartbeatResponse response = resourceTrackerService.NodeHeartbeat(nodeHeartBeatRequest ); NUnit.Framework.Assert.IsTrue(response.GetResponseId() == 1); nodeStatus.SetResponseId(response.GetResponseId()); response = resourceTrackerService.NodeHeartbeat(nodeHeartBeatRequest); NUnit.Framework.Assert.IsTrue(response.GetResponseId() == 2); /* try calling with less response id */ response = resourceTrackerService.NodeHeartbeat(nodeHeartBeatRequest); NUnit.Framework.Assert.IsTrue(response.GetResponseId() == 2); nodeStatus.SetResponseId(0); response = resourceTrackerService.NodeHeartbeat(nodeHeartBeatRequest); NUnit.Framework.Assert.IsTrue(NodeAction.Resync.Equals(response.GetNodeAction())); NUnit.Framework.Assert.AreEqual("Too far behind rm response id:2 nm response id:0" , response.GetDiagnosticsMessage()); }
public virtual void TestPbRecordFactory() { RecordFactory pbRecordFactory = RecordFactoryPBImpl.Get(); try { NodeHeartbeatRequest request = pbRecordFactory.NewRecordInstance <NodeHeartbeatRequest >(); NUnit.Framework.Assert.AreEqual(typeof(NodeHeartbeatRequestPBImpl), request.GetType ()); } catch (YarnRuntimeException e) { Sharpen.Runtime.PrintStackTrace(e); NUnit.Framework.Assert.Fail("Failed to crete record"); } }
public override NodeHeartbeatResponse NodeHeartbeat(NodeHeartbeatRequest request) { IList <ContainerStatus> statuses = request.GetNodeStatus().GetContainersStatuses(); try { NUnit.Framework.Assert.AreEqual(1, statuses.Count); NUnit.Framework.Assert.AreEqual(testCompleteContainer.GetContainerId(), statuses[ 0].GetContainerId()); } catch (Exception error) { Sharpen.Runtime.PrintStackTrace(error); this._enclosing._enclosing._enclosing.assertionFailedInThread.Set(true); } return(YarnServerBuilderUtils.NewNodeHeartbeatResponse(1, NodeAction.Resync, null , null, null, null, 1000L)); }
/// <exception cref="System.Exception"/> public virtual void TestResourceTrackerOnHA() { NodeId nodeId = NodeId.NewInstance("localhost", 0); Resource resource = Resource.NewInstance(2048, 4); // make sure registerNodeManager works when failover happens RegisterNodeManagerRequest request = RegisterNodeManagerRequest.NewInstance(nodeId , 0, resource, YarnVersionInfo.GetVersion(), null, null); resourceTracker.RegisterNodeManager(request); NUnit.Framework.Assert.IsTrue(WaitForNodeManagerToConnect(10000, nodeId)); // restart the failover thread, and make sure nodeHeartbeat works failoverThread = CreateAndStartFailoverThread(); NodeStatus status = NodeStatus.NewInstance(NodeId.NewInstance("localhost", 0), 0, null, null, null); NodeHeartbeatRequest request2 = NodeHeartbeatRequest.NewInstance(status, null, null ); resourceTracker.NodeHeartbeat(request2); }
/// <exception cref="System.Exception"/> public virtual NodeHeartbeatResponse NodeHeartbeat(IDictionary <ApplicationId, IList <Org.Apache.Hadoop.Yarn.Api.Records.ContainerStatus> > conts, bool isHealthy, int resId) { NodeHeartbeatRequest req = Org.Apache.Hadoop.Yarn.Util.Records.NewRecord <NodeHeartbeatRequest >(); NodeStatus status = Org.Apache.Hadoop.Yarn.Util.Records.NewRecord <NodeStatus>(); status.SetResponseId(resId); status.SetNodeId(nodeId); foreach (KeyValuePair <ApplicationId, IList <Org.Apache.Hadoop.Yarn.Api.Records.ContainerStatus > > entry in conts) { Org.Mortbay.Log.Log.Info("entry.getValue() " + entry.Value); status.SetContainersStatuses(entry.Value); } NodeHealthStatus healthStatus = Org.Apache.Hadoop.Yarn.Util.Records.NewRecord <NodeHealthStatus >(); healthStatus.SetHealthReport(string.Empty); healthStatus.SetIsNodeHealthy(isHealthy); healthStatus.SetLastHealthReportTime(1); status.SetNodeHealthStatus(healthStatus); req.SetNodeStatus(status); req.SetLastKnownContainerTokenMasterKey(this.currentContainerTokenMasterKey); req.SetLastKnownNMTokenMasterKey(this.currentNMTokenMasterKey); NodeHeartbeatResponse heartbeatResponse = resourceTracker.NodeHeartbeat(req); MasterKey masterKeyFromRM = heartbeatResponse.GetContainerTokenMasterKey(); if (masterKeyFromRM != null && masterKeyFromRM.GetKeyId() != this.currentContainerTokenMasterKey .GetKeyId()) { this.currentContainerTokenMasterKey = masterKeyFromRM; } masterKeyFromRM = heartbeatResponse.GetNMTokenMasterKey(); if (masterKeyFromRM != null && masterKeyFromRM.GetKeyId() != this.currentNMTokenMasterKey .GetKeyId()) { this.currentNMTokenMasterKey = masterKeyFromRM; } return(heartbeatResponse); }
public virtual void TestNodeHeartbeat() { NodeHeartbeatRequest request = recordFactory.NewRecordInstance <NodeHeartbeatRequest >(); NUnit.Framework.Assert.IsNotNull(client.NodeHeartbeat(request)); TestResourceTrackerPBClientImpl.ResourceTrackerTestImpl.exception = true; try { client.NodeHeartbeat(request); NUnit.Framework.Assert.Fail("there should be YarnException"); } catch (YarnException e) { NUnit.Framework.Assert.IsTrue(e.Message.StartsWith("testMessage")); } finally { TestResourceTrackerPBClientImpl.ResourceTrackerTestImpl.exception = false; } }
private void PopulateKeys(NodeHeartbeatRequest request, NodeHeartbeatResponse nodeHeartBeatResponse ) { // Check if node's masterKey needs to be updated and if the currentKey has // roller over, send it across // ContainerTokenMasterKey MasterKey nextMasterKeyForNode = this.containerTokenSecretManager.GetNextKey(); if (nextMasterKeyForNode != null && (request.GetLastKnownContainerTokenMasterKey( ).GetKeyId() != nextMasterKeyForNode.GetKeyId())) { nodeHeartBeatResponse.SetContainerTokenMasterKey(nextMasterKeyForNode); } // NMTokenMasterKey nextMasterKeyForNode = this.nmTokenSecretManager.GetNextKey(); if (nextMasterKeyForNode != null && (request.GetLastKnownNMTokenMasterKey().GetKeyId () != nextMasterKeyForNode.GetKeyId())) { nodeHeartBeatResponse.SetNMTokenMasterKey(nextMasterKeyForNode); } }
/// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> /// <exception cref="System.IO.IOException"/> public virtual NodeHeartbeatResponse NodeHeartbeat(NodeHeartbeatRequest request) { // TODO Auto-generated method stub return(null); }
public void Run() { int lastHeartBeatID = 0; while (!this._enclosing.isStopped) { try { NodeHeartbeatResponse response = null; NodeStatus nodeStatus = this._enclosing.GetNodeStatus(lastHeartBeatID); NodeHeartbeatRequest request = NodeHeartbeatRequest.NewInstance(nodeStatus, this. _enclosing.context.GetContainerTokenSecretManager().GetCurrentKey(), this._enclosing .context.GetNMTokenSecretManager().GetCurrentKey()); response = this._enclosing.resourceTracker.NodeHeartbeat(request); this._enclosing.nextHeartBeatInterval = response.GetNextHeartBeatInterval(); this.UpdateMasterKeys(response); if (response.GetNodeAction() == NodeAction.Shutdown) { Org.Apache.Hadoop.Yarn.Server.Nodemanager.NodeStatusUpdaterImpl.Log.Warn("Recieved SHUTDOWN signal from Resourcemanager as part of heartbeat," + " hence shutting down."); Org.Apache.Hadoop.Yarn.Server.Nodemanager.NodeStatusUpdaterImpl.Log.Warn("Message from ResourceManager: " + response.GetDiagnosticsMessage()); this._enclosing.context.SetDecommissioned(true); this._enclosing.dispatcher.GetEventHandler().Handle(new NodeManagerEvent(NodeManagerEventType .Shutdown)); break; } if (response.GetNodeAction() == NodeAction.Resync) { Org.Apache.Hadoop.Yarn.Server.Nodemanager.NodeStatusUpdaterImpl.Log.Warn("Node is out of sync with ResourceManager," + " hence resyncing."); Org.Apache.Hadoop.Yarn.Server.Nodemanager.NodeStatusUpdaterImpl.Log.Warn("Message from ResourceManager: " + response.GetDiagnosticsMessage()); this._enclosing.rmIdentifier = ResourceManagerConstants.RmInvalidIdentifier; this._enclosing.dispatcher.GetEventHandler().Handle(new NodeManagerEvent(NodeManagerEventType .Resync)); this._enclosing.pendingCompletedContainers.Clear(); break; } this._enclosing.RemoveOrTrackCompletedContainersFromContext(response.GetContainersToBeRemovedFromNM ()); lastHeartBeatID = response.GetResponseId(); IList <ContainerId> containersToCleanup = response.GetContainersToCleanup(); if (!containersToCleanup.IsEmpty()) { this._enclosing.dispatcher.GetEventHandler().Handle(new CMgrCompletedContainersEvent (containersToCleanup, CMgrCompletedContainersEvent.Reason.ByResourcemanager)); } IList <ApplicationId> appsToCleanup = response.GetApplicationsToCleanup(); this._enclosing.TrackAppsForKeepAlive(appsToCleanup); if (!appsToCleanup.IsEmpty()) { this._enclosing.dispatcher.GetEventHandler().Handle(new CMgrCompletedAppsEvent(appsToCleanup , CMgrCompletedAppsEvent.Reason.ByResourcemanager)); } IDictionary <ApplicationId, ByteBuffer> systemCredentials = response.GetSystemCredentialsForApps (); if (systemCredentials != null && !systemCredentials.IsEmpty()) { ((NodeManager.NMContext) this._enclosing.context).SetSystemCrendentialsForApps(Org.Apache.Hadoop.Yarn.Server.Nodemanager.NodeStatusUpdaterImpl .ParseCredentials(systemCredentials)); } } catch (ConnectException e) { this._enclosing.dispatcher.GetEventHandler().Handle(new NodeManagerEvent(NodeManagerEventType .Shutdown)); throw new YarnRuntimeException(e); } catch (Exception e) { Org.Apache.Hadoop.Yarn.Server.Nodemanager.NodeStatusUpdaterImpl.Log.Error("Caught exception in status-updater" , e); } finally { lock (this._enclosing.heartbeatMonitor) { this._enclosing.nextHeartBeatInterval = this._enclosing.nextHeartBeatInterval <= 0 ? YarnConfiguration.DefaultRmNmHeartbeatIntervalMs : this._enclosing.nextHeartBeatInterval; try { Sharpen.Runtime.Wait(this._enclosing.heartbeatMonitor, this._enclosing.nextHeartBeatInterval ); } catch (Exception) { } } } } }
/// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> /// <exception cref="System.IO.IOException"/> public virtual NodeHeartbeatResponse NodeHeartbeat(NodeHeartbeatRequest request) { NodeStatus remoteNodeStatus = request.GetNodeStatus(); NodeId nodeId = remoteNodeStatus.GetNodeId(); // 1. Check if it's a valid (i.e. not excluded) node if (!this.nodesListManager.IsValidNode(nodeId.GetHost())) { string message = "Disallowed NodeManager nodeId: " + nodeId + " hostname: " + nodeId .GetHost(); Log.Info(message); shutDown.SetDiagnosticsMessage(message); return(shutDown); } // 2. Check if it's a registered node RMNode rmNode = this.rmContext.GetRMNodes()[nodeId]; if (rmNode == null) { /* node does not exist */ string message = "Node not found resyncing " + remoteNodeStatus.GetNodeId(); Log.Info(message); resync.SetDiagnosticsMessage(message); return(resync); } // Send ping this.nmLivelinessMonitor.ReceivedPing(nodeId); // 3. Check if it's a 'fresh' heartbeat i.e. not duplicate heartbeat NodeHeartbeatResponse lastNodeHeartbeatResponse = rmNode.GetLastNodeHeartBeatResponse (); if (remoteNodeStatus.GetResponseId() + 1 == lastNodeHeartbeatResponse.GetResponseId ()) { Log.Info("Received duplicate heartbeat from node " + rmNode.GetNodeAddress() + " responseId=" + remoteNodeStatus.GetResponseId()); return(lastNodeHeartbeatResponse); } else { if (remoteNodeStatus.GetResponseId() + 1 < lastNodeHeartbeatResponse.GetResponseId ()) { string message = "Too far behind rm response id:" + lastNodeHeartbeatResponse.GetResponseId () + " nm response id:" + remoteNodeStatus.GetResponseId(); Log.Info(message); resync.SetDiagnosticsMessage(message); // TODO: Just sending reboot is not enough. Think more. this.rmContext.GetDispatcher().GetEventHandler().Handle(new RMNodeEvent(nodeId, RMNodeEventType .Rebooting)); return(resync); } } // Heartbeat response NodeHeartbeatResponse nodeHeartBeatResponse = YarnServerBuilderUtils.NewNodeHeartbeatResponse (lastNodeHeartbeatResponse.GetResponseId() + 1, NodeAction.Normal, null, null, null , null, nextHeartBeatInterval); rmNode.UpdateNodeHeartbeatResponseForCleanup(nodeHeartBeatResponse); PopulateKeys(request, nodeHeartBeatResponse); ConcurrentMap <ApplicationId, ByteBuffer> systemCredentials = rmContext.GetSystemCredentialsForApps (); if (!systemCredentials.IsEmpty()) { nodeHeartBeatResponse.SetSystemCredentialsForApps(systemCredentials); } // 4. Send status to RMNode, saving the latest response. this.rmContext.GetDispatcher().GetEventHandler().Handle(new RMNodeStatusEvent(nodeId , remoteNodeStatus.GetNodeHealthStatus(), remoteNodeStatus.GetContainersStatuses (), remoteNodeStatus.GetKeepAliveApplications(), nodeHeartBeatResponse)); return(nodeHeartBeatResponse); }