示例#1
0
        /// <summary>
        /// Checks NameNode tracking of a particular DataNode for correct reporting of
        /// failed volumes.
        /// </summary>
        /// <param name="dm">DatanodeManager to check</param>
        /// <param name="dn">DataNode to check</param>
        /// <param name="expectCapacityKnown">
        /// if true, then expect that the capacities of the
        /// volumes were known before the failures, and therefore the lost capacity
        /// can be reported
        /// </param>
        /// <param name="expectedFailedVolumes">expected locations of failed volumes</param>
        /// <exception cref="System.Exception">if there is any failure</exception>
        private void CheckFailuresAtNameNode(DatanodeManager dm, DataNode dn, bool expectCapacityKnown
                                             , params string[] expectedFailedVolumes)
        {
            DatanodeDescriptor dd = cluster.GetNamesystem().GetBlockManager().GetDatanodeManager
                                        ().GetDatanode(dn.GetDatanodeId());

            NUnit.Framework.Assert.AreEqual(expectedFailedVolumes.Length, dd.GetVolumeFailures
                                                ());
            VolumeFailureSummary volumeFailureSummary = dd.GetVolumeFailureSummary();

            if (expectedFailedVolumes.Length > 0)
            {
                Assert.AssertArrayEquals(expectedFailedVolumes, volumeFailureSummary.GetFailedStorageLocations
                                             ());
                NUnit.Framework.Assert.IsTrue(volumeFailureSummary.GetLastVolumeFailureDate() > 0
                                              );
                long expectedCapacityLost = GetExpectedCapacityLost(expectCapacityKnown, expectedFailedVolumes
                                                                    .Length);
                NUnit.Framework.Assert.AreEqual(expectedCapacityLost, volumeFailureSummary.GetEstimatedCapacityLostTotal
                                                    ());
            }
            else
            {
                NUnit.Framework.Assert.IsNull(volumeFailureSummary);
            }
        }
        /// <exception cref="Com.Google.Protobuf.ServiceException"/>
        public virtual DatanodeProtocolProtos.HeartbeatResponseProto SendHeartbeat(RpcController
                                                                                   controller, DatanodeProtocolProtos.HeartbeatRequestProto request)
        {
            HeartbeatResponse response;

            try
            {
                StorageReport[]      report = PBHelper.ConvertStorageReports(request.GetReportsList());
                VolumeFailureSummary volumeFailureSummary = request.HasVolumeFailureSummary() ? PBHelper
                                                            .ConvertVolumeFailureSummary(request.GetVolumeFailureSummary()) : null;
                response = impl.SendHeartbeat(PBHelper.Convert(request.GetRegistration()), report
                                              , request.GetCacheCapacity(), request.GetCacheUsed(), request.GetXmitsInProgress
                                                  (), request.GetXceiverCount(), request.GetFailedVolumes(), volumeFailureSummary);
            }
            catch (IOException e)
            {
                throw new ServiceException(e);
            }
            DatanodeProtocolProtos.HeartbeatResponseProto.Builder builder = DatanodeProtocolProtos.HeartbeatResponseProto
                                                                            .NewBuilder();
            DatanodeCommand[] cmds = response.GetCommands();
            if (cmds != null)
            {
                for (int i = 0; i < cmds.Length; i++)
                {
                    if (cmds[i] != null)
                    {
                        builder.AddCmds(PBHelper.Convert(cmds[i]));
                    }
                }
            }
            builder.SetHaStatus(PBHelper.Convert(response.GetNameNodeHaState()));
            RollingUpgradeStatus rollingUpdateStatus = response.GetRollingUpdateStatus();

            if (rollingUpdateStatus != null)
            {
                // V2 is always set for newer datanodes.
                // To be compatible with older datanodes, V1 is set to null
                //  if the RU was finalized.
                HdfsProtos.RollingUpgradeStatusProto rus = PBHelper.ConvertRollingUpgradeStatus(rollingUpdateStatus
                                                                                                );
                builder.SetRollingUpgradeStatusV2(rus);
                if (!rollingUpdateStatus.IsFinalized())
                {
                    builder.SetRollingUpgradeStatus(rus);
                }
            }
            return((DatanodeProtocolProtos.HeartbeatResponseProto)builder.Build());
        }
示例#3
0
 internal virtual void UpdateHeartbeat(DatanodeDescriptor node, StorageReport[] reports
                                       , long cacheCapacity, long cacheUsed, int xceiverCount, int failedVolumes, VolumeFailureSummary
                                       volumeFailureSummary)
 {
     lock (this)
     {
         stats.Subtract(node);
         node.UpdateHeartbeat(reports, cacheCapacity, cacheUsed, xceiverCount, failedVolumes
                              , volumeFailureSummary);
         stats.Add(node);
     }
 }
示例#4
0
        /// <summary>process datanode heartbeat or stats initialization.</summary>
        public virtual void UpdateHeartbeatState(StorageReport[] reports, long cacheCapacity
                                                 , long cacheUsed, int xceiverCount, int volFailures, VolumeFailureSummary volumeFailureSummary
                                                 )
        {
            long totalCapacity      = 0;
            long totalRemaining     = 0;
            long totalBlockPoolUsed = 0;
            long totalDfsUsed       = 0;
            ICollection <DatanodeStorageInfo> failedStorageInfos = null;
            // Decide if we should check for any missing StorageReport and mark it as
            // failed. There are different scenarios.
            // 1. When DN is running, a storage failed. Given the current DN
            //    implementation doesn't add recovered storage back to its storage list
            //    until DN restart, we can assume volFailures won't decrease
            //    during the current DN registration session.
            //    When volumeFailures == this.volumeFailures, it implies there is no
            //    state change. No need to check for failed storage. This is an
            //    optimization.  Recent versions of the DataNode report a
            //    VolumeFailureSummary containing the date/time of the last volume
            //    failure.  If that's available, then we check that instead for greater
            //    accuracy.
            // 2. After DN restarts, volFailures might not increase and it is possible
            //    we still have new failed storage. For example, admins reduce
            //    available storages in configuration. Another corner case
            //    is the failed volumes might change after restart; a) there
            //    is one good storage A, one restored good storage B, so there is
            //    one element in storageReports and that is A. b) A failed. c) Before
            //    DN sends HB to NN to indicate A has failed, DN restarts. d) After DN
            //    restarts, storageReports has one element which is B.
            bool checkFailedStorages;

            if (volumeFailureSummary != null && this.volumeFailureSummary != null)
            {
                checkFailedStorages = volumeFailureSummary.GetLastVolumeFailureDate() > this.volumeFailureSummary
                                      .GetLastVolumeFailureDate();
            }
            else
            {
                checkFailedStorages = (volFailures > this.volumeFailures) || !heartbeatedSinceRegistration;
            }
            if (checkFailedStorages)
            {
                Log.Info("Number of failed storage changes from " + this.volumeFailures + " to "
                         + volFailures);
                failedStorageInfos = new HashSet <DatanodeStorageInfo>(storageMap.Values);
            }
            SetCacheCapacity(cacheCapacity);
            SetCacheUsed(cacheUsed);
            SetXceiverCount(xceiverCount);
            SetLastUpdate(Time.Now());
            SetLastUpdateMonotonic(Time.MonotonicNow());
            this.volumeFailures       = volFailures;
            this.volumeFailureSummary = volumeFailureSummary;
            foreach (StorageReport report in reports)
            {
                DatanodeStorageInfo storage = UpdateStorage(report.GetStorage());
                if (checkFailedStorages)
                {
                    failedStorageInfos.Remove(storage);
                }
                storage.ReceivedHeartbeat(report);
                totalCapacity      += report.GetCapacity();
                totalRemaining     += report.GetRemaining();
                totalBlockPoolUsed += report.GetBlockPoolUsed();
                totalDfsUsed       += report.GetDfsUsed();
            }
            RollBlocksScheduled(GetLastUpdateMonotonic());
            // Update total metrics for the node.
            SetCapacity(totalCapacity);
            SetRemaining(totalRemaining);
            SetBlockPoolUsed(totalBlockPoolUsed);
            SetDfsUsed(totalDfsUsed);
            if (checkFailedStorages)
            {
                UpdateFailedStorage(failedStorageInfos);
            }
            if (storageMap.Count != reports.Length)
            {
                PruneStorageMap(reports);
            }
        }
示例#5
0
 /// <summary>Updates stats from datanode heartbeat.</summary>
 public virtual void UpdateHeartbeat(StorageReport[] reports, long cacheCapacity,
                                     long cacheUsed, int xceiverCount, int volFailures, VolumeFailureSummary volumeFailureSummary
                                     )
 {
     UpdateHeartbeatState(reports, cacheCapacity, cacheUsed, xceiverCount, volFailures
                          , volumeFailureSummary);
     heartbeatedSinceRegistration = true;
 }
示例#6
0
        /// <exception cref="System.IO.IOException"/>
        public virtual HeartbeatResponse SendHeartbeat(DatanodeRegistration registration,
                                                       StorageReport[] reports, long cacheCapacity, long cacheUsed, int xmitsInProgress
                                                       , int xceiverCount, int failedVolumes, VolumeFailureSummary volumeFailureSummary
                                                       )
        {
            DatanodeProtocolProtos.HeartbeatRequestProto.Builder builder = DatanodeProtocolProtos.HeartbeatRequestProto
                                                                           .NewBuilder().SetRegistration(PBHelper.Convert(registration)).SetXmitsInProgress
                                                                               (xmitsInProgress).SetXceiverCount(xceiverCount).SetFailedVolumes(failedVolumes);
            builder.AddAllReports(PBHelper.ConvertStorageReports(reports));
            if (cacheCapacity != 0)
            {
                builder.SetCacheCapacity(cacheCapacity);
            }
            if (cacheUsed != 0)
            {
                builder.SetCacheUsed(cacheUsed);
            }
            if (volumeFailureSummary != null)
            {
                builder.SetVolumeFailureSummary(PBHelper.ConvertVolumeFailureSummary(volumeFailureSummary
                                                                                     ));
            }
            DatanodeProtocolProtos.HeartbeatResponseProto resp;
            try
            {
                resp = rpcProxy.SendHeartbeat(NullController, ((DatanodeProtocolProtos.HeartbeatRequestProto
                                                                )builder.Build()));
            }
            catch (ServiceException se)
            {
                throw ProtobufHelper.GetRemoteException(se);
            }
            DatanodeCommand[] cmds = new DatanodeCommand[resp.GetCmdsList().Count];
            int index = 0;

            foreach (DatanodeProtocolProtos.DatanodeCommandProto p in resp.GetCmdsList())
            {
                cmds[index] = PBHelper.Convert(p);
                index++;
            }
            RollingUpgradeStatus rollingUpdateStatus = null;

            // Use v2 semantics if available.
            if (resp.HasRollingUpgradeStatusV2())
            {
                rollingUpdateStatus = PBHelper.Convert(resp.GetRollingUpgradeStatusV2());
            }
            else
            {
                if (resp.HasRollingUpgradeStatus())
                {
                    rollingUpdateStatus = PBHelper.Convert(resp.GetRollingUpgradeStatus());
                }
            }
            return(new HeartbeatResponse(cmds, PBHelper.Convert(resp.GetHaStatus()), rollingUpdateStatus
                                         ));
        }