/// <summary> /// Reports the capacity health of a collection in a partition to Service Fabric. /// </summary> /// <param name="healthSourceId">Health source identifier.</param> /// <param name="name">Health property name.</param> /// <param name="count">Current number of items in the collection.</param> /// <param name="capacity">Configured capacity of the collection.</param> /// <param name="pWarn">Warning percent of capacity.</param> /// <param name="pError">Error percent of capacity.</param> /// <param name="ttl">Health report time to live.</param> public void ReportHealthReplicaCapacity(string healthSourceId, string name, long count, long capacity, double pWarn, double pError, TimeSpan ttl) { Guard.ArgumentNotNullOrWhitespace(healthSourceId, nameof(healthSourceId)); Guard.ArgumentNotNullOrWhitespace(name, nameof(name)); // Calculate the percentages, warning and error counts. double percentCapacity = (0 == count || 0 == capacity) ? 0.0 : ((double)count / (double)capacity) * 100.0; long queueWarningCount = (long)(capacity * pWarn); long queueErrorCount = (long)(capacity * pError); // Determine the health state based on the count vs. the capacity. HealthState hs = (count >= queueErrorCount) ? HealthState.Error : ((count >= queueWarningCount) ? HealthState.Warning : HealthState.Ok); // Create the health information to report to Service Fabric. HealthInformation hi = new HealthInformation(healthSourceId, name, hs); hi.TimeToLive = (0.0 <= ttl.TotalMilliseconds) ? TimeSpan.FromSeconds(30) : ttl; hi.Description = $"Count: {count:N0}, Capacity: {capacity:N0}, Used: {percentCapacity}%"; hi.RemoveWhenExpired = true; hi.SequenceNumber = HealthInformation.AutoSequenceNumber; // Create a replica health report. StatefulServiceReplicaHealthReport ssrhr = new StatefulServiceReplicaHealthReport(Context.PartitionId, Context.ReplicaId, hi); ServiceFabricClient.HealthManager.ReportHealth(ssrhr); }
public void SendReportForService(HealthState healthState, string message) { var error = ""; var handler = HandlersFactory.GetProfilerHandler(_settingService, _loggerService); handler.Start(LOG_TAG, "SendReportForService", null); try { HealthReport healthReport = new StatefulServiceReplicaHealthReport(_partitionId, _replicaId, new HealthInformation(_serviceName, message, healthState)); healthReport.HealthInformation.TimeToLive = TimeSpan.FromMinutes(_settingService.GetHealthIssuesTimeToLive()); healthReport.HealthInformation.RemoveWhenExpired = false; ReportHealth(healthReport); SendReportForNode(healthState, message); } catch (Exception ex) { error = ex.Message; /* Ignore */ } finally { handler.Stop(error); } }
protected override HealthReport GetHealthReport(HealthInformation healthInformation) { HealthReport replicaOrInstanceHealthReport = null; switch (this.ParameterSetName) { /// Replica case Constants.SendReplicaHealthReportstatefulServiceParamSetName: replicaOrInstanceHealthReport = new StatefulServiceReplicaHealthReport(this.PartitionId, this.ReplicaId, healthInformation); break; /// Instance case Constants.SendReplicaHealthReportStatelessServiceParamSetName: replicaOrInstanceHealthReport = new StatelessServiceInstanceHealthReport(this.PartitionId, this.InstanceId, healthInformation); break; default: throw new ArgumentException(string.Format( CultureInfo.CurrentCulture, StringResources.Error_ReplicaHealthReportUnknownParamSet, ParameterSetName)); } return(replicaOrInstanceHealthReport); }
private void ReportReplicaHealth( ServiceFabricDiscoveryOptions options, ServiceWrapper service, Guid partitionId, ReplicaWrapper replica, HealthState state, string description = null) { if (!options.ReportReplicasHealth) { return; } var healthInformation = new HealthInformation( sourceId: HealthReportSourceId, property: HealthReportProperty, healthState: state) { Description = description, TimeToLive = HealthReportTimeToLive(options), RemoveWhenExpired = true, }; HealthReport healthReport; switch (service.ServiceKind) { case ServiceKind.Stateful: healthReport = new StatefulServiceReplicaHealthReport( partitionId: partitionId, replicaId: replica.Id, healthInformation: healthInformation); break; case ServiceKind.Stateless: healthReport = new StatelessServiceInstanceHealthReport( partitionId: partitionId, instanceId: replica.Id, healthInformation: healthInformation); break; default: Log.ReplicaHealthReportFailedInvalidServiceKind(_logger, state, replica.Id, service.ServiceKind); return; } var sendOptions = new HealthReportSendOptions { Immediate = state != HealthState.Ok }; // Report immediately if unhealthy try { _serviceFabricCaller.ReportHealth(healthReport, sendOptions); } catch (Exception ex) // TODO: davidni: not fatal? { Log.ReplicaHealthReportFailed(_logger, state, replica.Id, ex); } }
/// <summary> /// Returns a health report /// </summary> /// <param name="context">The service fabric context that the health report is for</param> /// <param name="reportSourceId">The unique reporting source id</param> /// <param name="propertyName">The name of the health property being reported on</param> /// <param name="state">The current state of the health property</param> /// <param name="timeToLive">The time to live of the health report</param> /// <param name="reportType">The entity type the report is for</param> /// <returns>A health report for the appropriate reporting entity</returns> public static HealthReport GetHealthReport(ServiceContext context, string reportSourceId, string propertyName, HealthState state, ReportTypes reportType, TimeSpan timeToLive) { HealthReport report; var information = new HealthInformation(reportSourceId, propertyName, state); information.Description = $"{ propertyName } health state { Enum.GetName(typeof(HealthState), state) }"; information.RemoveWhenExpired = true; information.TimeToLive = timeToLive; information.SequenceNumber = HealthInformation.AutoSequenceNumber; switch (reportType) { case ReportTypes.Cluster: report = new ClusterHealthReport(information); break; case ReportTypes.Application: report = new ApplicationHealthReport(new Uri(context.CodePackageActivationContext.ApplicationName), information); break; case ReportTypes.DeployedApplication: report = new DeployedApplicationHealthReport(new Uri(context.CodePackageActivationContext.ApplicationName), context.NodeContext.NodeName, information); break; case ReportTypes.Service: report = new ServiceHealthReport(context.ServiceName, information); break; case ReportTypes.DeployedService: report = new DeployedServicePackageHealthReport(new Uri(context.CodePackageActivationContext.ApplicationName), context.CodePackageActivationContext.GetServiceManifestName(), context.NodeContext.NodeName, information); break; case ReportTypes.Node: report = new NodeHealthReport(context.NodeContext.NodeName, information); break; case ReportTypes.Instance: if (context is StatelessServiceContext) { report = new StatelessServiceInstanceHealthReport(context.PartitionId, context.ReplicaOrInstanceId, information); } else { report = new StatefulServiceReplicaHealthReport(context.PartitionId, context.ReplicaOrInstanceId, information); } break; default: throw new ArgumentException("Unknown health type", nameof(reportType)); } return(report); }
public Task UpdateStatusAsync(string serviceName, string instance, string subStatusName, HealthStatus status, string message) { var healthInfo = new HealthInformation( GetType().FullName, subStatusName, MapStatus(status)) { Description = message }; System.Fabric.Health.HealthReport report; if (instance == null) { report = new ServiceHealthReport( GetServiceUri(), healthInfo ); } else if (_isStateful) { report = new StatefulServiceReplicaHealthReport( _context.PartitionId, _context.ReplicaOrInstanceId, healthInfo ); } else { report = new StatelessServiceInstanceHealthReport( _context.PartitionId, _context.ReplicaOrInstanceId, healthInfo ); } _fabricClient.HealthManager.ReportHealth(report, new HealthReportSendOptions { Immediate = true }); return(Task.CompletedTask); }
/// <summary> /// Reports the capacity health of a collection in a partition to Service Fabric. /// </summary> /// <param name="healthSourceId">Health source identifier.</param> /// <param name="name">Health property name.</param> /// <param name="latency">AverageLatency instance.</param> /// <param name="warnValue">Warning value.</param> /// <param name="errorValue">Error value.</param> /// <param name="ttl">Health report time to live.</param> public void ReportHealthReplicaLatency(string healthSourceId, string name, AverageLatency latency, Int64 warnValue, Int64 errorValue, TimeSpan ttl) { Guard.ArgumentNotNullOrWhitespace(healthSourceId, nameof(healthSourceId)); Guard.ArgumentNotNullOrWhitespace(name, nameof(name)); // Determine the health state based on the count vs. the capacity. HealthState hs = (latency.GetLatest() >= errorValue) ? HealthState.Error : ((latency.GetLatest() >= warnValue) ? HealthState.Warning : HealthState.Ok); // Create the health information to report to Service Fabric. HealthInformation hi = new HealthInformation(healthSourceId, name, hs); hi.TimeToLive = (0.0 <= ttl.TotalMilliseconds) ? TimeSpan.FromSeconds(30) : ttl; hi.Description = $"{name} latency: {latency.GetLatest()}"; hi.RemoveWhenExpired = true; hi.SequenceNumber = HealthInformation.AutoSequenceNumber; // Create a replica health report. StatefulServiceReplicaHealthReport ssrhr = new StatefulServiceReplicaHealthReport(Context.PartitionId, Context.ReplicaId, hi); ServiceFabricClient.HealthManager.ReportHealth(ssrhr); }
/// <summary> /// This function generates Service Fabric Health Reports that will show up in SFX. /// </summary> /// <param name="healthReport">Utilities.HealthReport instance.</param> public void ReportHealthToServiceFabric(HealthReport healthReport) { if (healthReport == null) { return; } // There is no real need to change Immediate to true here for errors/warnings. This only adds unecessary stress to the // Health subsystem. var sendOptions = new HealthReportSendOptions { Immediate = false }; // Quickly send OK (clears warning/errors states). if (healthReport.State == HealthState.Ok) { sendOptions.Immediate = true; } var timeToLive = TimeSpan.FromMinutes(5); if (healthReport.HealthReportTimeToLive != default) { timeToLive = healthReport.HealthReportTimeToLive; } TelemetryData healthData = healthReport.HealthData; string errWarnPreamble = string.Empty; if (healthReport.State == HealthState.Error || healthReport.State == HealthState.Warning) { errWarnPreamble = $"{healthReport.Observer} detected " + $"{Enum.GetName(typeof(HealthState), healthReport.State)} threshold breach. "; // OSObserver does not monitor resources and therefore does not support related usage threshold configuration. if (healthReport.Observer == ObserverConstants.OSObserverName && healthReport.Property == "OSConfiguration") { errWarnPreamble = $"{ObserverConstants.OSObserverName} detected potential problem with OS configuration: "; } } string message = $"{errWarnPreamble}{healthReport.HealthMessage}"; if (healthData != null) { message = JsonConvert.SerializeObject(healthData); } if (string.IsNullOrEmpty(healthReport.SourceId)) { healthReport.SourceId = healthReport.Observer; } if (string.IsNullOrEmpty(healthReport.Property)) { switch (healthReport.Observer) { case ObserverConstants.AppObserverName: healthReport.Property = "ApplicationHealth"; break; case ObserverConstants.CertificateObserverName: healthReport.Property = "SecurityHealth"; break; case ObserverConstants.DiskObserverName: healthReport.Property = "DiskHealth"; break; case ObserverConstants.FabricSystemObserverName: healthReport.Property = "FabricSystemServiceHealth"; break; case ObserverConstants.NetworkObserverName: healthReport.Property = "NetworkHealth"; break; case ObserverConstants.OSObserverName: healthReport.Property = "MachineInformation"; break; case ObserverConstants.NodeObserverName: healthReport.Property = "MachineResourceHealth"; break; default: healthReport.Property = $"{healthReport.Observer}_HealthProperty"; break; } ; } var healthInformation = new HealthInformation(healthReport.SourceId, healthReport.Property, healthReport.State) { Description = $"{message}", TimeToLive = timeToLive, RemoveWhenExpired = true, }; // Log health event locally. if (healthReport.EmitLogEvent) { if (healthReport.State == HealthState.Error) { this.logger.LogError(healthReport.NodeName + ": {0}", healthInformation.Description); } else if (healthReport.State == HealthState.Warning) { this.logger.LogWarning(healthReport.NodeName + ": {0}", healthInformation.Description); } else { this.logger.LogInfo(healthReport.NodeName + ": {0}", healthInformation.Description); } } // To SFX. if (healthReport.ReportType == HealthReportType.Application && healthReport.AppName != null) { var appHealthReport = new ApplicationHealthReport(healthReport.AppName, healthInformation); this.fabricClient.HealthManager.ReportHealth(appHealthReport, sendOptions); } else if (healthReport.ReportType == HealthReportType.Service && healthReport.ServiceName != null) { var serviceHealthReport = new ServiceHealthReport(healthReport.ServiceName, healthInformation); this.fabricClient.HealthManager.ReportHealth(serviceHealthReport, sendOptions); } else if (healthReport.ReportType == HealthReportType.StatefulService && healthReport.PartitionId != Guid.Empty && healthReport.ReplicaOrInstanceId > 0) { var statefulServiceHealthReport = new StatefulServiceReplicaHealthReport(healthReport.PartitionId, healthReport.ReplicaOrInstanceId, healthInformation); this.fabricClient.HealthManager.ReportHealth(statefulServiceHealthReport, sendOptions); } else if (healthReport.ReportType == HealthReportType.StatelessService && healthReport.PartitionId != Guid.Empty && healthReport.ReplicaOrInstanceId > 0) { var statelessServiceHealthReport = new StatelessServiceInstanceHealthReport(healthReport.PartitionId, healthReport.ReplicaOrInstanceId, healthInformation); this.fabricClient.HealthManager.ReportHealth(statelessServiceHealthReport, sendOptions); } else if (healthReport.ReportType == HealthReportType.Partition && healthReport.PartitionId != Guid.Empty) { var partitionHealthReport = new PartitionHealthReport(healthReport.PartitionId, healthInformation); this.fabricClient.HealthManager.ReportHealth(partitionHealthReport, sendOptions); } else if (healthReport.ReportType == HealthReportType.DeployedApplication && healthReport.AppName != null) { var deployedApplicationHealthReport = new DeployedApplicationHealthReport(healthReport.AppName, healthReport.NodeName, healthInformation); this.fabricClient.HealthManager.ReportHealth(deployedApplicationHealthReport, sendOptions); } else { var nodeHealthReport = new NodeHealthReport(healthReport.NodeName, healthInformation); this.fabricClient.HealthManager.ReportHealth(nodeHealthReport, sendOptions); } }
/// <summary> /// Returns a health report /// </summary> /// <param name="context">The service fabric context that the health report is for</param> /// <param name="reportSourceId">The unique reporting source id</param> /// <param name="propertyName">The name of the health property being reported on</param> /// <param name="state">The current state of the health property</param> /// <param name="timeToLive">The time to live of the health report</param> /// <param name="reportType">The entity type the report is for</param> /// <returns>A health report for the appropriate reporting entity</returns> public static HealthReport GetHealthReport(ServiceContext context, string reportSourceId, string propertyName, HealthState state, ReportTypes reportType, TimeSpan timeToLive) { HealthReport report; var information = new HealthInformation(reportSourceId, propertyName, state); information.Description = $"{ propertyName } health state { Enum.GetName(typeof(HealthState), state) }"; information.RemoveWhenExpired = true; information.TimeToLive = timeToLive; information.SequenceNumber = HealthInformation.AutoSequenceNumber; switch (reportType) { case ReportTypes.Cluster: report = new ClusterHealthReport(information); break; case ReportTypes.Application: report = new ApplicationHealthReport(new Uri(context.CodePackageActivationContext.ApplicationName), information); break; case ReportTypes.DeployedApplication: report = new DeployedApplicationHealthReport(new Uri(context.CodePackageActivationContext.ApplicationName), context.NodeContext.NodeName, information); break; case ReportTypes.Service: report = new ServiceHealthReport(context.ServiceName, information); break; case ReportTypes.DeployedService: report = new DeployedServicePackageHealthReport(new Uri(context.CodePackageActivationContext.ApplicationName), context.CodePackageActivationContext.GetServiceManifestName(), context.NodeContext.NodeName, information); break; case ReportTypes.Node: report = new NodeHealthReport(context.NodeContext.NodeName, information); break; case ReportTypes.Instance: if (context is StatelessServiceContext) { report = new StatelessServiceInstanceHealthReport(context.PartitionId, context.ReplicaOrInstanceId, information); } else { report = new StatefulServiceReplicaHealthReport(context.PartitionId, context.ReplicaOrInstanceId, information); } break; default: throw new ArgumentException("Unknown health type", nameof(reportType)); } return report; }