/// <summary> /// Reports the capacity health of a collection in a partition to Service Fabric. /// </summary> /// <param name="healthSourceId">Health source identifier.</param> /// <param name="name">Health property name.</param> /// <param name="count">Current number of items in the collection.</param> /// <param name="capacity">Configured capacity of the collection.</param> /// <param name="pWarn">Warning percent of capacity.</param> /// <param name="pError">Error percent of capacity.</param> /// <param name="ttl">Health report time to live.</param> public void ReportHealthPartitionCapacity(string healthSourceId, string name, long count, long capacity, double pWarn, double pError, TimeSpan ttl) { Guard.ArgumentNotNullOrWhitespace(healthSourceId, nameof(healthSourceId)); Guard.ArgumentNotNullOrWhitespace(name, nameof(name)); // Calculate the percentages, warning and error counts. double percentCapacity = (0 == count || 0 == capacity) ? 0.0 : ((double)count / (double)capacity) * 100.0; long queueWarningCount = (long)(capacity * pWarn); long queueErrorCount = (long)(capacity * pError); // Determine the health state based on the count vs. the capacity. HealthState hs = (count >= queueErrorCount) ? HealthState.Error : ((count >= queueWarningCount) ? HealthState.Warning : HealthState.Ok); // Create the health information to report to Service Fabric. HealthInformation hi = new HealthInformation(healthSourceId, name, hs); hi.TimeToLive = (0.0 <= ttl.TotalMilliseconds) ? TimeSpan.FromSeconds(30) : ttl; hi.Description = $"Count: {count:N0}, Capacity: {capacity:N0}, Used: {percentCapacity}%"; hi.RemoveWhenExpired = true; hi.SequenceNumber = HealthInformation.AutoSequenceNumber; // Create a partition health report. PartitionHealthReport phr = new PartitionHealthReport(Context.PartitionId, hi); ServiceFabricClient.HealthManager.ReportHealth(phr); }
/// <summary> /// Reports the requests per second as part of the capacity of a partition. /// </summary> /// <param name="healthSourceId">Health source identifier.</param> /// <param name="name">Health property name.</param> /// <param name="rps">Current requests per second value.</param> /// <param name="capacity">Configured capacity.</param> /// <param name="pWarn">Warning percent of capacity.</param> /// <param name="pError">Error percent of capacity.</param> /// <param name="ttl">Health report time to live.</param> public void ReportHealthRequestPerSecond(string healthSourceId, string name, long rps, long capacity = 0, double pWarn = 0.75, double pError = 0.90, TimeSpan ttl = default(TimeSpan)) { Guard.ArgumentNotNullOrWhitespace(healthSourceId, nameof(healthSourceId)); Guard.ArgumentNotNullOrWhitespace(name, nameof(name)); // Calculate the capacity percentages. capacity = (capacity <= 0.0) ? long.MaxValue : capacity; double percentCapacity = (rps / capacity) * 100.0; long rpsWarningCount = (long)(capacity * pWarn); long rpsErrorCount = (long)(capacity * pError); // Determine the health state based on the count vs. the capacity. HealthState hs = (rps >= rpsErrorCount) ? HealthState.Error : ((rps >= rpsWarningCount) ? HealthState.Warning : HealthState.Ok); // Create the health information to report to Service Fabric. HealthInformation hi = new HealthInformation(healthSourceId, name, hs); hi.TimeToLive = (0.0 <= ttl.TotalMilliseconds) ? TimeSpan.FromSeconds(30) : ttl; hi.Description = $"RPS: {rps:N0}."; hi.RemoveWhenExpired = true; hi.SequenceNumber = HealthInformation.AutoSequenceNumber; // Create a partition health report. PartitionHealthReport phr = new PartitionHealthReport(Context.PartitionId, hi); ServiceFabricClient.HealthManager.ReportHealth(phr); }
public void SendReportForPartition(HealthState healthState, string message) { var error = ""; var handler = HandlersFactory.GetProfilerHandler(_settingService, _loggerService); handler.Start(LOG_TAG, "SendReportForPartition", null); try { HealthReport healthReport = new PartitionHealthReport(_partitionId, new HealthInformation(_serviceName, message, healthState)); healthReport.HealthInformation.TimeToLive = TimeSpan.FromMinutes(_settingService.GetHealthIssuesTimeToLive()); healthReport.HealthInformation.RemoveWhenExpired = false; ReportHealth(healthReport); } catch (Exception ex) { error = ex.Message; /* Ignore */ } finally { handler.Stop(error); } }
/// <summary> /// This function generates Service Fabric Health Reports that will show up in SFX. /// </summary> /// <param name="healthReport">Utilities.HealthReport instance.</param> public void ReportHealthToServiceFabric(HealthReport healthReport) { if (healthReport == null) { return; } // There is no real need to change Immediate to true here for errors/warnings. This only adds unecessary stress to the // Health subsystem. var sendOptions = new HealthReportSendOptions { Immediate = false }; // Quickly send OK (clears warning/errors states). if (healthReport.State == HealthState.Ok) { sendOptions.Immediate = true; } var timeToLive = TimeSpan.FromMinutes(5); if (healthReport.HealthReportTimeToLive != default) { timeToLive = healthReport.HealthReportTimeToLive; } TelemetryData healthData = healthReport.HealthData; string errWarnPreamble = string.Empty; if (healthReport.State == HealthState.Error || healthReport.State == HealthState.Warning) { errWarnPreamble = $"{healthReport.Observer} detected " + $"{Enum.GetName(typeof(HealthState), healthReport.State)} threshold breach. "; // OSObserver does not monitor resources and therefore does not support related usage threshold configuration. if (healthReport.Observer == ObserverConstants.OSObserverName && healthReport.Property == "OSConfiguration") { errWarnPreamble = $"{ObserverConstants.OSObserverName} detected potential problem with OS configuration: "; } } string message = $"{errWarnPreamble}{healthReport.HealthMessage}"; if (healthData != null) { message = JsonConvert.SerializeObject(healthData); } if (string.IsNullOrEmpty(healthReport.SourceId)) { healthReport.SourceId = healthReport.Observer; } if (string.IsNullOrEmpty(healthReport.Property)) { switch (healthReport.Observer) { case ObserverConstants.AppObserverName: healthReport.Property = "ApplicationHealth"; break; case ObserverConstants.CertificateObserverName: healthReport.Property = "SecurityHealth"; break; case ObserverConstants.DiskObserverName: healthReport.Property = "DiskHealth"; break; case ObserverConstants.FabricSystemObserverName: healthReport.Property = "FabricSystemServiceHealth"; break; case ObserverConstants.NetworkObserverName: healthReport.Property = "NetworkHealth"; break; case ObserverConstants.OSObserverName: healthReport.Property = "MachineInformation"; break; case ObserverConstants.NodeObserverName: healthReport.Property = "MachineResourceHealth"; break; default: healthReport.Property = $"{healthReport.Observer}_HealthProperty"; break; } ; } var healthInformation = new HealthInformation(healthReport.SourceId, healthReport.Property, healthReport.State) { Description = $"{message}", TimeToLive = timeToLive, RemoveWhenExpired = true, }; // Log health event locally. if (healthReport.EmitLogEvent) { if (healthReport.State == HealthState.Error) { this.logger.LogError(healthReport.NodeName + ": {0}", healthInformation.Description); } else if (healthReport.State == HealthState.Warning) { this.logger.LogWarning(healthReport.NodeName + ": {0}", healthInformation.Description); } else { this.logger.LogInfo(healthReport.NodeName + ": {0}", healthInformation.Description); } } // To SFX. if (healthReport.ReportType == HealthReportType.Application && healthReport.AppName != null) { var appHealthReport = new ApplicationHealthReport(healthReport.AppName, healthInformation); this.fabricClient.HealthManager.ReportHealth(appHealthReport, sendOptions); } else if (healthReport.ReportType == HealthReportType.Service && healthReport.ServiceName != null) { var serviceHealthReport = new ServiceHealthReport(healthReport.ServiceName, healthInformation); this.fabricClient.HealthManager.ReportHealth(serviceHealthReport, sendOptions); } else if (healthReport.ReportType == HealthReportType.StatefulService && healthReport.PartitionId != Guid.Empty && healthReport.ReplicaOrInstanceId > 0) { var statefulServiceHealthReport = new StatefulServiceReplicaHealthReport(healthReport.PartitionId, healthReport.ReplicaOrInstanceId, healthInformation); this.fabricClient.HealthManager.ReportHealth(statefulServiceHealthReport, sendOptions); } else if (healthReport.ReportType == HealthReportType.StatelessService && healthReport.PartitionId != Guid.Empty && healthReport.ReplicaOrInstanceId > 0) { var statelessServiceHealthReport = new StatelessServiceInstanceHealthReport(healthReport.PartitionId, healthReport.ReplicaOrInstanceId, healthInformation); this.fabricClient.HealthManager.ReportHealth(statelessServiceHealthReport, sendOptions); } else if (healthReport.ReportType == HealthReportType.Partition && healthReport.PartitionId != Guid.Empty) { var partitionHealthReport = new PartitionHealthReport(healthReport.PartitionId, healthInformation); this.fabricClient.HealthManager.ReportHealth(partitionHealthReport, sendOptions); } else if (healthReport.ReportType == HealthReportType.DeployedApplication && healthReport.AppName != null) { var deployedApplicationHealthReport = new DeployedApplicationHealthReport(healthReport.AppName, healthReport.NodeName, healthInformation); this.fabricClient.HealthManager.ReportHealth(deployedApplicationHealthReport, sendOptions); } else { var nodeHealthReport = new NodeHealthReport(healthReport.NodeName, healthInformation); this.fabricClient.HealthManager.ReportHealth(nodeHealthReport, sendOptions); } }