Exemplo n.º 1
0
        /// <exception cref="System.Exception"/>
        public virtual void TestResourceTrackerOnHA()
        {
            NodeId   nodeId   = NodeId.NewInstance("localhost", 0);
            Resource resource = Resource.NewInstance(2048, 4);
            // make sure registerNodeManager works when failover happens
            RegisterNodeManagerRequest request = RegisterNodeManagerRequest.NewInstance(nodeId
                                                                                        , 0, resource, YarnVersionInfo.GetVersion(), null, null);

            resourceTracker.RegisterNodeManager(request);
            NUnit.Framework.Assert.IsTrue(WaitForNodeManagerToConnect(10000, nodeId));
            // restart the failover thread, and make sure nodeHeartbeat works
            failoverThread = CreateAndStartFailoverThread();
            NodeStatus status = NodeStatus.NewInstance(NodeId.NewInstance("localhost", 0), 0,
                                                       null, null, null);
            NodeHeartbeatRequest request2 = NodeHeartbeatRequest.NewInstance(status, null, null
                                                                             );

            resourceTracker.NodeHeartbeat(request2);
        }
Exemplo n.º 2
0
            public void Run()
            {
                int lastHeartBeatID = 0;

                while (!this._enclosing.isStopped)
                {
                    try
                    {
                        NodeHeartbeatResponse response   = null;
                        NodeStatus            nodeStatus = this._enclosing.GetNodeStatus(lastHeartBeatID);
                        NodeHeartbeatRequest  request    = NodeHeartbeatRequest.NewInstance(nodeStatus, this.
                                                                                            _enclosing.context.GetContainerTokenSecretManager().GetCurrentKey(), this._enclosing
                                                                                            .context.GetNMTokenSecretManager().GetCurrentKey());
                        response = this._enclosing.resourceTracker.NodeHeartbeat(request);
                        this._enclosing.nextHeartBeatInterval = response.GetNextHeartBeatInterval();
                        this.UpdateMasterKeys(response);
                        if (response.GetNodeAction() == NodeAction.Shutdown)
                        {
                            Org.Apache.Hadoop.Yarn.Server.Nodemanager.NodeStatusUpdaterImpl.Log.Warn("Recieved SHUTDOWN signal from Resourcemanager as part of heartbeat,"
                                                                                                     + " hence shutting down.");
                            Org.Apache.Hadoop.Yarn.Server.Nodemanager.NodeStatusUpdaterImpl.Log.Warn("Message from ResourceManager: "
                                                                                                     + response.GetDiagnosticsMessage());
                            this._enclosing.context.SetDecommissioned(true);
                            this._enclosing.dispatcher.GetEventHandler().Handle(new NodeManagerEvent(NodeManagerEventType
                                                                                                     .Shutdown));
                            break;
                        }
                        if (response.GetNodeAction() == NodeAction.Resync)
                        {
                            Org.Apache.Hadoop.Yarn.Server.Nodemanager.NodeStatusUpdaterImpl.Log.Warn("Node is out of sync with ResourceManager,"
                                                                                                     + " hence resyncing.");
                            Org.Apache.Hadoop.Yarn.Server.Nodemanager.NodeStatusUpdaterImpl.Log.Warn("Message from ResourceManager: "
                                                                                                     + response.GetDiagnosticsMessage());
                            this._enclosing.rmIdentifier = ResourceManagerConstants.RmInvalidIdentifier;
                            this._enclosing.dispatcher.GetEventHandler().Handle(new NodeManagerEvent(NodeManagerEventType
                                                                                                     .Resync));
                            this._enclosing.pendingCompletedContainers.Clear();
                            break;
                        }
                        this._enclosing.RemoveOrTrackCompletedContainersFromContext(response.GetContainersToBeRemovedFromNM
                                                                                        ());
                        lastHeartBeatID = response.GetResponseId();
                        IList <ContainerId> containersToCleanup = response.GetContainersToCleanup();
                        if (!containersToCleanup.IsEmpty())
                        {
                            this._enclosing.dispatcher.GetEventHandler().Handle(new CMgrCompletedContainersEvent
                                                                                    (containersToCleanup, CMgrCompletedContainersEvent.Reason.ByResourcemanager));
                        }
                        IList <ApplicationId> appsToCleanup = response.GetApplicationsToCleanup();
                        this._enclosing.TrackAppsForKeepAlive(appsToCleanup);
                        if (!appsToCleanup.IsEmpty())
                        {
                            this._enclosing.dispatcher.GetEventHandler().Handle(new CMgrCompletedAppsEvent(appsToCleanup
                                                                                                           , CMgrCompletedAppsEvent.Reason.ByResourcemanager));
                        }
                        IDictionary <ApplicationId, ByteBuffer> systemCredentials = response.GetSystemCredentialsForApps
                                                                                        ();
                        if (systemCredentials != null && !systemCredentials.IsEmpty())
                        {
                            ((NodeManager.NMContext) this._enclosing.context).SetSystemCrendentialsForApps(Org.Apache.Hadoop.Yarn.Server.Nodemanager.NodeStatusUpdaterImpl
                                                                                                           .ParseCredentials(systemCredentials));
                        }
                    }
                    catch (ConnectException e)
                    {
                        this._enclosing.dispatcher.GetEventHandler().Handle(new NodeManagerEvent(NodeManagerEventType
                                                                                                 .Shutdown));
                        throw new YarnRuntimeException(e);
                    }
                    catch (Exception e)
                    {
                        Org.Apache.Hadoop.Yarn.Server.Nodemanager.NodeStatusUpdaterImpl.Log.Error("Caught exception in status-updater"
                                                                                                  , e);
                    }
                    finally
                    {
                        lock (this._enclosing.heartbeatMonitor)
                        {
                            this._enclosing.nextHeartBeatInterval = this._enclosing.nextHeartBeatInterval <=
                                                                    0 ? YarnConfiguration.DefaultRmNmHeartbeatIntervalMs : this._enclosing.nextHeartBeatInterval;
                            try
                            {
                                Sharpen.Runtime.Wait(this._enclosing.heartbeatMonitor, this._enclosing.nextHeartBeatInterval
                                                     );
                            }
                            catch (Exception)
                            {
                            }
                        }
                    }
                }
            }