private long CalculateCleanupRequired() { var directoryInfo = new DirectoryInfo(Utility.LogDirectory); var diskSpaceRemaining = this.GetAvailableSpace(directoryInfo); // The DCA "diskfullsafetyspace" will help with situations where other files // are filling the drive or DCA's quota is >disk size. The quota mechanism would // prevent disk full otherwise. Running the cleanup more often makes sure that we // stay closer to our target usage (less deviation). // This check isFilter specified, but doesn't match any registered for safety so we ignore fairness. var fullProtectionDiskSpaceToFree = Math.Max( this.cachedCurrentDiskFullSafetySpace.Update() - diskSpaceRemaining, 0); var diskQuota = this.cachedCurrentMaxDiskQuota.Update(); // We have exceeded the disk quota and need to free up some disk space var targetDiskUsage = (long)(diskQuota * this.cachedDiskQuotaUsageTargetPercent.Update() / 100); // Figure out how much disk space we need to free up for quota var usageDictionary = new Dictionary <string, long>(); this.WalkFiles( (f, md, u, dict) => { if (!dict.ContainsKey(u)) { dict[u] = 0; } dict[u] += f.Length; return(dict); }, usageDictionary); foreach (var userCount in usageDictionary) { this.traceSource.WriteInfo( TraceType, "Disk space user {0} was found to be using {1:N0}B of disk space.", userCount.Key, userCount.Value); } var totalDiskSpaceUsed = usageDictionary.Values.Sum(); if (fullProtectionDiskSpaceToFree > 0) { HealthClient.SendNodeHealthReport( StringResources.DCAWarning_InsufficientDiskSpaceHealthDescription, HealthState.Warning, HealthSubProperty); this.healthOkSent = false; this.traceSource.WriteWarning( TraceType, "Insufficient space found for logs. Disk space remaining: {0}. Disk space to free: {1}.", diskSpaceRemaining, fullProtectionDiskSpaceToFree); } else { if (!this.healthOkSent) { HealthClient.ClearNodeHealthReport(HealthSubProperty); this.healthOkSent = true; this.traceSource.WriteInfo( TraceType, "Space found for logs. Disk space remaining: {0:N0}B.", diskSpaceRemaining); } } return(Math.Max(totalDiskSpaceUsed - targetDiskUsage, fullProtectionDiskSpaceToFree)); }
internal static void OnUnhandledException(object sender, UnhandledExceptionEventArgs e) { Exception exception = (Exception)e.ExceptionObject; lastUnhandledException = exception; LogException(exception); try { if (exception is ConfigurationException) { HealthClient.SendNodeHealthReport( string.Format(StringResources.DCAError_ConfigurationUnhandledExceptionHealthDescription, exception.Message), HealthState.Error); } else { HealthClient.SendNodeHealthReport( string.Format(StringResources.DCAError_UnhandledExceptionHealthDescription, exception.Message), HealthState.Error); } } catch (Exception healthClientException) { Utility.TraceSource.WriteExceptionAsError(TraceType, healthClientException); } // The DCA is a diagnostic component whose functionality is not critical // to the running of the node. Therefore, crashes in the DCA should not // cause the node to fail. // // However, the current logic in the FabricHost does not make any // distinction between the various Fabric code packages. Therefore the // same failure handling policy is applied to both Fabric.exe and // FabricDCA.exe. The policy is to declare the node as failed if a certain // number of consecutive crashes happen within a certain time span. // However, if the process has been running for longer than that time span, // then the crash count gets reset. // // We achieve the goal of not allowing DCA crashes to bring down the node // by implementing a workaround for the current FabricHost policy. When an // unhandled exception occurs, we check if we have been running longer than // the time span at which our failure count gets reset. If not, we sleep // in the unhandled exception handler for long enough to ensure that we // have been running for longer than the time span. DateTime startTime = Process.GetCurrentProcess().StartTime; TimeSpan runTimeSoFar = DateTime.Now.Subtract(startTime); TimeSpan desiredRunTime = TimeSpan.FromSeconds(fabricHostExitFailureResetTime) .Add(TimeSpan.FromSeconds(FabricHostExitFailureResetTimeBufferSeconds)); if (desiredRunTime.CompareTo(runTimeSoFar) > 0) { TimeSpan sleepTime = desiredRunTime.Subtract(runTimeSoFar); if (null == UnhandledExceptionHandler.StopDcaEvent) { Thread.Sleep(sleepTime); } else { UnhandledExceptionHandler.StopDcaEvent.WaitOne(sleepTime); } } }