Exemple #1
0
                protected internal override void RebootNodeStatusUpdaterAndRegisterWithRM()
                {
                    ConcurrentMap <ContainerId, Org.Apache.Hadoop.Yarn.Server.Nodemanager.Containermanager.Container.Container
                                   > containers = this._enclosing.GetNMContext().GetContainers();

                    try
                    {
                        try
                        {
                            if (this._enclosing.containersShouldBePreserved)
                            {
                                NUnit.Framework.Assert.IsFalse(containers.IsEmpty());
                                NUnit.Framework.Assert.IsTrue(containers.Contains(this._enclosing.existingCid));
                                NUnit.Framework.Assert.AreEqual(ContainerState.Running, containers[this._enclosing
                                                                                                   .existingCid].CloneAndGetContainerStatus().GetState());
                            }
                            else
                            {
                                // ensure that containers are empty or are completed before
                                // restart nodeStatusUpdater
                                if (!containers.IsEmpty())
                                {
                                    NUnit.Framework.Assert.AreEqual(ContainerState.Complete, containers[this._enclosing
                                                                                                        .existingCid].CloneAndGetContainerStatus().GetState());
                                }
                            }
                            base.RebootNodeStatusUpdaterAndRegisterWithRM();
                        }
                        catch (Exception ae)
                        {
                            Sharpen.Runtime.PrintStackTrace(ae);
                            this._enclosing._enclosing.assertionFailedInThread.Set(true);
                        }
                        finally
                        {
                            this._enclosing._enclosing.syncBarrier.Await();
                        }
                    }
                    catch (Exception)
                    {
                    }
                    catch (BrokenBarrierException)
                    {
                    }
                    catch (Exception ae)
                    {
                        Sharpen.Runtime.PrintStackTrace(ae);
                        this._enclosing._enclosing.assertionFailedInThread.Set(true);
                    }
                }
Exemple #2
0
                protected internal override void RebootNodeStatusUpdaterAndRegisterWithRM()
                {
                    ConcurrentMap <ContainerId, Org.Apache.Hadoop.Yarn.Server.Nodemanager.Containermanager.Container.Container
                                   > containers = this._enclosing.GetNMContext().GetContainers();

                    try
                    {
                        // ensure that containers are empty before restart nodeStatusUpdater
                        if (!containers.IsEmpty())
                        {
                            foreach (Org.Apache.Hadoop.Yarn.Server.Nodemanager.Containermanager.Container.Container
                                     container in containers.Values)
                            {
                                NUnit.Framework.Assert.AreEqual(ContainerState.Complete, container.CloneAndGetContainerStatus
                                                                    ().GetState());
                            }
                        }
                        base.RebootNodeStatusUpdaterAndRegisterWithRM();
                        // After this point new containers are free to be launched, except
                        // containers from previous RM
                        // Wait here so as to sync with the main test thread.
                        this._enclosing._enclosing.syncBarrier.Await();
                    }
                    catch (Exception)
                    {
                    }
                    catch (BrokenBarrierException)
                    {
                    }
                    catch (Exception ae)
                    {
                        Sharpen.Runtime.PrintStackTrace(ae);
                        this._enclosing._enclosing.assertionFailedInThread.Set(true);
                    }
                }
        /// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/>
        /// <exception cref="System.IO.IOException"/>
        public virtual NodeHeartbeatResponse NodeHeartbeat(NodeHeartbeatRequest request)
        {
            NodeStatus remoteNodeStatus = request.GetNodeStatus();
            NodeId     nodeId           = remoteNodeStatus.GetNodeId();

            // 1. Check if it's a valid (i.e. not excluded) node
            if (!this.nodesListManager.IsValidNode(nodeId.GetHost()))
            {
                string message = "Disallowed NodeManager nodeId: " + nodeId + " hostname: " + nodeId
                                 .GetHost();
                Log.Info(message);
                shutDown.SetDiagnosticsMessage(message);
                return(shutDown);
            }
            // 2. Check if it's a registered node
            RMNode rmNode = this.rmContext.GetRMNodes()[nodeId];

            if (rmNode == null)
            {
                /* node does not exist */
                string message = "Node not found resyncing " + remoteNodeStatus.GetNodeId();
                Log.Info(message);
                resync.SetDiagnosticsMessage(message);
                return(resync);
            }
            // Send ping
            this.nmLivelinessMonitor.ReceivedPing(nodeId);
            // 3. Check if it's a 'fresh' heartbeat i.e. not duplicate heartbeat
            NodeHeartbeatResponse lastNodeHeartbeatResponse = rmNode.GetLastNodeHeartBeatResponse
                                                                  ();

            if (remoteNodeStatus.GetResponseId() + 1 == lastNodeHeartbeatResponse.GetResponseId
                    ())
            {
                Log.Info("Received duplicate heartbeat from node " + rmNode.GetNodeAddress() + " responseId="
                         + remoteNodeStatus.GetResponseId());
                return(lastNodeHeartbeatResponse);
            }
            else
            {
                if (remoteNodeStatus.GetResponseId() + 1 < lastNodeHeartbeatResponse.GetResponseId
                        ())
                {
                    string message = "Too far behind rm response id:" + lastNodeHeartbeatResponse.GetResponseId
                                         () + " nm response id:" + remoteNodeStatus.GetResponseId();
                    Log.Info(message);
                    resync.SetDiagnosticsMessage(message);
                    // TODO: Just sending reboot is not enough. Think more.
                    this.rmContext.GetDispatcher().GetEventHandler().Handle(new RMNodeEvent(nodeId, RMNodeEventType
                                                                                            .Rebooting));
                    return(resync);
                }
            }
            // Heartbeat response
            NodeHeartbeatResponse nodeHeartBeatResponse = YarnServerBuilderUtils.NewNodeHeartbeatResponse
                                                              (lastNodeHeartbeatResponse.GetResponseId() + 1, NodeAction.Normal, null, null, null
                                                              , null, nextHeartBeatInterval);

            rmNode.UpdateNodeHeartbeatResponseForCleanup(nodeHeartBeatResponse);
            PopulateKeys(request, nodeHeartBeatResponse);
            ConcurrentMap <ApplicationId, ByteBuffer> systemCredentials = rmContext.GetSystemCredentialsForApps
                                                                              ();

            if (!systemCredentials.IsEmpty())
            {
                nodeHeartBeatResponse.SetSystemCredentialsForApps(systemCredentials);
            }
            // 4. Send status to RMNode, saving the latest response.
            this.rmContext.GetDispatcher().GetEventHandler().Handle(new RMNodeStatusEvent(nodeId
                                                                                          , remoteNodeStatus.GetNodeHealthStatus(), remoteNodeStatus.GetContainersStatuses
                                                                                              (), remoteNodeStatus.GetKeepAliveApplications(), nodeHeartBeatResponse));
            return(nodeHeartBeatResponse);
        }