/// <summary>process datanode heartbeat or stats initialization.</summary> public virtual void UpdateHeartbeatState(StorageReport[] reports, long cacheCapacity , long cacheUsed, int xceiverCount, int volFailures, VolumeFailureSummary volumeFailureSummary ) { long totalCapacity = 0; long totalRemaining = 0; long totalBlockPoolUsed = 0; long totalDfsUsed = 0; ICollection <DatanodeStorageInfo> failedStorageInfos = null; // Decide if we should check for any missing StorageReport and mark it as // failed. There are different scenarios. // 1. When DN is running, a storage failed. Given the current DN // implementation doesn't add recovered storage back to its storage list // until DN restart, we can assume volFailures won't decrease // during the current DN registration session. // When volumeFailures == this.volumeFailures, it implies there is no // state change. No need to check for failed storage. This is an // optimization. Recent versions of the DataNode report a // VolumeFailureSummary containing the date/time of the last volume // failure. If that's available, then we check that instead for greater // accuracy. // 2. After DN restarts, volFailures might not increase and it is possible // we still have new failed storage. For example, admins reduce // available storages in configuration. Another corner case // is the failed volumes might change after restart; a) there // is one good storage A, one restored good storage B, so there is // one element in storageReports and that is A. b) A failed. c) Before // DN sends HB to NN to indicate A has failed, DN restarts. d) After DN // restarts, storageReports has one element which is B. bool checkFailedStorages; if (volumeFailureSummary != null && this.volumeFailureSummary != null) { checkFailedStorages = volumeFailureSummary.GetLastVolumeFailureDate() > this.volumeFailureSummary .GetLastVolumeFailureDate(); } else { checkFailedStorages = (volFailures > this.volumeFailures) || !heartbeatedSinceRegistration; } if (checkFailedStorages) { Log.Info("Number of failed storage changes from " + this.volumeFailures + " to " + volFailures); failedStorageInfos = new HashSet <DatanodeStorageInfo>(storageMap.Values); } SetCacheCapacity(cacheCapacity); SetCacheUsed(cacheUsed); SetXceiverCount(xceiverCount); SetLastUpdate(Time.Now()); SetLastUpdateMonotonic(Time.MonotonicNow()); this.volumeFailures = volFailures; this.volumeFailureSummary = volumeFailureSummary; foreach (StorageReport report in reports) { DatanodeStorageInfo storage = UpdateStorage(report.GetStorage()); if (checkFailedStorages) { failedStorageInfos.Remove(storage); } storage.ReceivedHeartbeat(report); totalCapacity += report.GetCapacity(); totalRemaining += report.GetRemaining(); totalBlockPoolUsed += report.GetBlockPoolUsed(); totalDfsUsed += report.GetDfsUsed(); } RollBlocksScheduled(GetLastUpdateMonotonic()); // Update total metrics for the node. SetCapacity(totalCapacity); SetRemaining(totalRemaining); SetBlockPoolUsed(totalBlockPoolUsed); SetDfsUsed(totalDfsUsed); if (checkFailedStorages) { UpdateFailedStorage(failedStorageInfos); } if (storageMap.Count != reports.Length) { PruneStorageMap(reports); } }