/// <summary> /// Returns a health report /// </summary> /// <param name="context">The service fabric context that the health report is for</param> /// <param name="reportSourceId">The unique reporting source id</param> /// <param name="propertyName">The name of the health property being reported on</param> /// <param name="state">The current state of the health property</param> /// <param name="timeToLive">The time to live of the health report</param> /// <param name="reportType">The entity type the report is for</param> /// <returns>A health report for the appropriate reporting entity</returns> public static HealthReport GetHealthReport(ServiceContext context, string reportSourceId, string propertyName, HealthState state, ReportTypes reportType, TimeSpan timeToLive) { HealthReport report; var information = new HealthInformation(reportSourceId, propertyName, state); information.Description = $"{ propertyName } health state { Enum.GetName(typeof(HealthState), state) }"; information.RemoveWhenExpired = true; information.TimeToLive = timeToLive; information.SequenceNumber = HealthInformation.AutoSequenceNumber; switch (reportType) { case ReportTypes.Cluster: report = new ClusterHealthReport(information); break; case ReportTypes.Application: report = new ApplicationHealthReport(new Uri(context.CodePackageActivationContext.ApplicationName), information); break; case ReportTypes.DeployedApplication: report = new DeployedApplicationHealthReport(new Uri(context.CodePackageActivationContext.ApplicationName), context.NodeContext.NodeName, information); break; case ReportTypes.Service: report = new ServiceHealthReport(context.ServiceName, information); break; case ReportTypes.DeployedService: report = new DeployedServicePackageHealthReport(new Uri(context.CodePackageActivationContext.ApplicationName), context.CodePackageActivationContext.GetServiceManifestName(), context.NodeContext.NodeName, information); break; case ReportTypes.Node: report = new NodeHealthReport(context.NodeContext.NodeName, information); break; case ReportTypes.Instance: if (context is StatelessServiceContext) { report = new StatelessServiceInstanceHealthReport(context.PartitionId, context.ReplicaOrInstanceId, information); } else { report = new StatefulServiceReplicaHealthReport(context.PartitionId, context.ReplicaOrInstanceId, information); } break; default: throw new ArgumentException("Unknown health type", nameof(reportType)); } return(report); }
public void ReportHealthToServiceFabric(HealthReport healthReport) { if (healthReport == null) { return; } // There is no real need to change Immediate to true here for errors/warnings. This only adds unecessary stress to the // Health subsystem. var sendOptions = new HealthReportSendOptions { Immediate = false }; // Quickly send OK (clears warning/errors states). if (healthReport.State == HealthState.Ok) { sendOptions.Immediate = true; } var timeToLive = TimeSpan.FromMinutes(5); if (healthReport.HealthReportTimeToLive != default) { timeToLive = healthReport.HealthReportTimeToLive; } // In order for multiple Error/Warning/Ok events to show up in SFX Details view from observer instances, // Event Source Ids must be unique, thus the seemingly strange conditionals inside the cases below: // The apparent duplicity in OR checks is for the case when the incoming report is an OK report, where there is // no error code, but the specific ErrorWarningProperty is known. string property; switch (healthReport.Observer) { case ObserverConstants.AppObserverName: property = "AppHealth"; break; case ObserverConstants.CertificateObserverName: property = "SecurityHealth"; break; case ObserverConstants.DiskObserverName: property = "DiskHealth"; break; case ObserverConstants.FabricSystemObserverName: property = "FabricSystemServiceHealth"; break; case ObserverConstants.NetworkObserverName: property = "NetworkingHealth"; break; case ObserverConstants.OsObserverName: property = "MachineInformation"; break; case ObserverConstants.NodeObserverName: property = "MachineResourceHealth"; break; default: property = "FOGenericHealth"; break; } string sourceId = healthReport.Observer; if (!string.IsNullOrEmpty(healthReport.Code)) { // Only use FOErrorWarningCode for source sourceId = $"{healthReport.Code}"; } var healthInformation = new HealthInformation(sourceId, property, healthReport.State) { Description = healthReport.HealthMessage, TimeToLive = timeToLive, RemoveWhenExpired = true, }; // Log event only if ObserverWebApi (REST Log reader.) app is deployed. if (ObserverManager.ObserverWebAppDeployed && healthReport.EmitLogEvent) { if (healthReport.State == HealthState.Error) { this.logger.LogError(healthReport.NodeName + ": {0}", healthInformation.Description); } else if (healthReport.State == HealthState.Warning) { this.logger.LogWarning(healthReport.NodeName + ": {0}", healthInformation.Description); } else { this.logger.LogInfo(healthReport.NodeName + ": {0}", healthInformation.Description); } } // To SFX and Telemetry provider. if (healthReport.ReportType == HealthReportType.Application && healthReport.AppName != null) { var appHealthReport = new ApplicationHealthReport(healthReport.AppName, healthInformation); this.fabricClient.HealthManager.ReportHealth(appHealthReport, sendOptions); } else { var nodeHealthReport = new NodeHealthReport(healthReport.NodeName, healthInformation); this.fabricClient.HealthManager.ReportHealth(nodeHealthReport, sendOptions); } }
public void ReportHealthToServiceFabric(HealthReport healthReport) { if (healthReport == null) { return; } // There is no real need to change Immediate to true here for errors/warnings. This only adds unecessary stress to the // Health subsystem. var sendOptions = new HealthReportSendOptions { Immediate = false }; // Quickly send OK (clears warning/errors states). if (healthReport.State == HealthState.Ok) { sendOptions.Immediate = true; } var timeToLive = TimeSpan.FromMinutes(5); if (healthReport.HealthReportTimeToLive != default) { timeToLive = healthReport.HealthReportTimeToLive; } // Set property for health event. string property = healthReport.Property; if (string.IsNullOrEmpty(property)) { switch (healthReport.Observer) { case ObserverConstants.AppObserverName: property = "ApplicationHealth"; break; case ObserverConstants.CertificateObserverName: property = "SecurityHealth"; break; case ObserverConstants.DiskObserverName: property = "DiskHealth"; break; case ObserverConstants.FabricSystemObserverName: property = "FabricSystemServiceHealth"; break; case ObserverConstants.NetworkObserverName: property = "NetworkHealth"; break; case ObserverConstants.OsObserverName: property = "MachineInformation"; break; case ObserverConstants.NodeObserverName: property = "MachineResourceHealth"; break; default: property = "FOGenericHealth"; break; } } string sourceId = healthReport.Observer; TelemetryData healthData = healthReport.HealthData; if (!string.IsNullOrEmpty(healthReport.Code)) { // Only use FOErrorWarningCode for source sourceId += $"({healthReport.Code})"; } string errWarnPreamble = string.Empty; if (healthReport.State == HealthState.Error || healthReport.State == HealthState.Warning) { errWarnPreamble = $"{healthReport.Observer} detected " + $"{Enum.GetName(typeof(HealthState), healthReport.State)} threshold breach. "; // OSObserver does not monitor resources and therefore does not support related usage threshold configuration. if (healthReport.Observer == ObserverConstants.OsObserverName && property == "OSConfiguration") { errWarnPreamble = $"{ObserverConstants.OsObserverName} detected potential problem with OS configuration: "; property = "OSConfiguration"; } } string message = $"{errWarnPreamble}{healthReport.HealthMessage}"; if (healthData != null) { message = JsonConvert.SerializeObject(healthData); } var healthInformation = new HealthInformation(sourceId, property, healthReport.State) { Description = $"{message}", TimeToLive = timeToLive, RemoveWhenExpired = true, }; // Log event only if ObserverWebApi (REST API Log reader service) app is deployed. if (ObserverManager.ObserverWebAppDeployed && healthReport.EmitLogEvent) { if (healthReport.State == HealthState.Error) { this.logger.LogError(healthReport.NodeName + ": {0}", healthInformation.Description); } else if (healthReport.State == HealthState.Warning) { this.logger.LogWarning(healthReport.NodeName + ": {0}", healthInformation.Description); } else { this.logger.LogInfo(healthReport.NodeName + ": {0}", healthInformation.Description); } } // To SFX. if (healthReport.ReportType == HealthReportType.Application && healthReport.AppName != null) { var appHealthReport = new ApplicationHealthReport(healthReport.AppName, healthInformation); this.fabricClient.HealthManager.ReportHealth(appHealthReport, sendOptions); } else { var nodeHealthReport = new NodeHealthReport(healthReport.NodeName, healthInformation); this.fabricClient.HealthManager.ReportHealth(nodeHealthReport, sendOptions); } }
public void ReportHealthToServiceFabric(Utilities.HealthReport healthReport) { if (healthReport == null) { return; } // There is no real need to change Immediate to true here for errors/warnings. This only adds unecessary stress to the // Health subsystem... var sendOptions = new HealthReportSendOptions { Immediate = false }; // Quickly send OK (clears warning/errors states)... if (healthReport.State == HealthState.Ok) { sendOptions.Immediate = true; } var timeToLive = TimeSpan.FromMinutes(5); if (healthReport.HealthReportTimeToLive != default(TimeSpan)) { timeToLive = healthReport.HealthReportTimeToLive; } string kind = string.Empty; if (healthReport.Code != null) { kind = healthReport.Code + ": "; } string source = healthReport.Observer; string property; // In order for multiple Error/Warning/Ok events to show up in SFX Details view from observer instances, // Event Source Ids must be unique, thus the seemingly strange conditionals inside the cases below: // The apparent duplicity in OR checks is for the case when the incoming report is an OK report, where there is // no error code, but the specific ErrorWarningProperty is known... switch (healthReport.Observer) { case ObserverConstants.AppObserverName: property = "AppHealth"; if (healthReport.Code == ErrorWarningCode.WarningCpuTime || healthReport.ResourceUsageDataProperty == ErrorWarningProperty.TotalCpuTime) { source += "(CPU)"; } else if (healthReport.Code == ErrorWarningCode.WarningMemoryPercentUsed || healthReport.ResourceUsageDataProperty == ErrorWarningProperty.TotalMemoryConsumptionPct) { source += "(Memory%)"; } else if (healthReport.Code == ErrorWarningCode.WarningMemoryCommitted || healthReport.ResourceUsageDataProperty == ErrorWarningProperty.TotalMemoryConsumptionMB) { source += "(MemoryMB)"; } else if (healthReport.Code == ErrorWarningCode.WarningTooManyActiveEphemeralPorts || healthReport.ResourceUsageDataProperty == ErrorWarningProperty.TotalEphemeralPorts) { source += "(ActiveEphemeralPorts)"; } break; case ObserverConstants.CertificateObserverName: property = "SecurityHealth"; break; case ObserverConstants.DiskObserverName: property = "DiskHealth"; if (healthReport.Code == ErrorWarningCode.WarningDiskAverageQueueLength || healthReport.ResourceUsageDataProperty == ErrorWarningProperty.DiskAverageQueueLength) { source += "(DiskQueueLength)"; } else if (healthReport.Code == ErrorWarningCode.WarningDiskSpacePercentUsed || healthReport.ResourceUsageDataProperty == ErrorWarningProperty.DiskSpaceUsagePercentage) { source += "(DiskSpace%)"; } else if (healthReport.Code == ErrorWarningCode.WarningDiskSpaceMB || healthReport.ResourceUsageDataProperty == ErrorWarningProperty.DiskSpaceUsageMB) { source += "(DiskSpaceMB)"; } break; case ObserverConstants.FabricSystemObserverName: property = "FabricSystemServiceHealth"; if (healthReport.Code == ErrorWarningCode.WarningCpuTime || healthReport.ResourceUsageDataProperty == ErrorWarningProperty.TotalCpuTime) { source += "(CPU)"; } else if (healthReport.Code == ErrorWarningCode.WarningMemoryPercentUsed || healthReport.ResourceUsageDataProperty == ErrorWarningProperty.TotalMemoryConsumptionPct) { source += "(Memory%)"; } else if (healthReport.Code == ErrorWarningCode.WarningMemoryCommitted || healthReport.ResourceUsageDataProperty == ErrorWarningProperty.TotalMemoryConsumptionMB) { source += "(MemoryMB)"; } else if (healthReport.Code == ErrorWarningCode.WarningTooManyActiveTcpPorts || healthReport.ResourceUsageDataProperty == ErrorWarningProperty.TotalActivePorts) { source += "(ActivePorts)"; } else if (healthReport.Code == ErrorWarningCode.WarningTooManyActiveEphemeralPorts || healthReport.ResourceUsageDataProperty == ErrorWarningProperty.TotalEphemeralPorts) { source += "(ActiveEphemeralPorts)"; } break; case ObserverConstants.NetworkObserverName: property = "NetworkingHealth"; break; case ObserverConstants.OSObserverName: property = "MachineInformation"; break; case ObserverConstants.NodeObserverName: property = "MachineResourceHealth"; if (healthReport.Code == ErrorWarningCode.WarningCpuTime || healthReport.ResourceUsageDataProperty == ErrorWarningProperty.TotalCpuTime) { source += "(CPU)"; } else if (healthReport.Code == ErrorWarningCode.WarningTooManyFirewallRules || healthReport.ResourceUsageDataProperty == ErrorWarningProperty.TotalActiveFirewallRules) { source += "(FirewallRules)"; } else if (healthReport.Code == ErrorWarningCode.WarningMemoryPercentUsed || healthReport.ResourceUsageDataProperty == ErrorWarningProperty.TotalMemoryConsumptionPct) { source += "(Memory%)"; } else if (healthReport.Code == ErrorWarningCode.WarningMemoryCommitted || healthReport.ResourceUsageDataProperty == ErrorWarningProperty.TotalMemoryConsumptionMB) { source += "(MemoryMB)"; } else if (healthReport.Code == ErrorWarningCode.WarningTooManyActiveTcpPorts || healthReport.ResourceUsageDataProperty == ErrorWarningProperty.TotalActivePorts) { source += "(ActivePorts)"; } else if (healthReport.Code == ErrorWarningCode.WarningTooManyActiveEphemeralPorts || healthReport.ResourceUsageDataProperty == ErrorWarningProperty.TotalEphemeralPorts) { source += "(ActiveEphemeralPorts)"; } break; default: property = "FOGenericHealth"; break; } var healthInformation = new HealthInformation(source, property, healthReport.State) { Description = kind + healthReport.HealthMessage, TimeToLive = timeToLive, RemoveWhenExpired = true, }; // Log event only if ObserverWebApi (REST Log reader...) app is deployed... if (ObserverManager.ObserverWebAppDeployed && healthReport.EmitLogEvent) { if (healthReport.State == HealthState.Error) { this.logger.LogError(healthReport.NodeName + ": {0}", healthInformation.Description); } else if (healthReport.State == HealthState.Warning) { this.logger.LogWarning(healthReport.NodeName + ": {0}", healthInformation.Description); } else { this.logger.LogInfo(healthReport.NodeName + ": {0}", healthInformation.Description); } } // To SFX and Telemetry provider... if (healthReport.ReportType == HealthReportType.Application && healthReport.AppName != null) { var report = new ApplicationHealthReport(healthReport.AppName, healthInformation); this.fabricClient.HealthManager.ReportHealth(report, sendOptions); } else { var report = new NodeHealthReport(healthReport.NodeName, healthInformation); this.fabricClient.HealthManager.ReportHealth(report, sendOptions); } }
/// <summary> /// Returns a health report /// </summary> /// <param name="context">The service fabric context that the health report is for</param> /// <param name="reportSourceId">The unique reporting source id</param> /// <param name="propertyName">The name of the health property being reported on</param> /// <param name="state">The current state of the health property</param> /// <param name="timeToLive">The time to live of the health report</param> /// <param name="reportType">The entity type the report is for</param> /// <returns>A health report for the appropriate reporting entity</returns> public static HealthReport GetHealthReport(ServiceContext context, string reportSourceId, string propertyName, HealthState state, ReportTypes reportType, TimeSpan timeToLive) { HealthReport report; var information = new HealthInformation(reportSourceId, propertyName, state); information.Description = $"{ propertyName } health state { Enum.GetName(typeof(HealthState), state) }"; information.RemoveWhenExpired = true; information.TimeToLive = timeToLive; information.SequenceNumber = HealthInformation.AutoSequenceNumber; switch (reportType) { case ReportTypes.Cluster: report = new ClusterHealthReport(information); break; case ReportTypes.Application: report = new ApplicationHealthReport(new Uri(context.CodePackageActivationContext.ApplicationName), information); break; case ReportTypes.DeployedApplication: report = new DeployedApplicationHealthReport(new Uri(context.CodePackageActivationContext.ApplicationName), context.NodeContext.NodeName, information); break; case ReportTypes.Service: report = new ServiceHealthReport(context.ServiceName, information); break; case ReportTypes.DeployedService: report = new DeployedServicePackageHealthReport(new Uri(context.CodePackageActivationContext.ApplicationName), context.CodePackageActivationContext.GetServiceManifestName(), context.NodeContext.NodeName, information); break; case ReportTypes.Node: report = new NodeHealthReport(context.NodeContext.NodeName, information); break; case ReportTypes.Instance: if (context is StatelessServiceContext) { report = new StatelessServiceInstanceHealthReport(context.PartitionId, context.ReplicaOrInstanceId, information); } else { report = new StatefulServiceReplicaHealthReport(context.PartitionId, context.ReplicaOrInstanceId, information); } break; default: throw new ArgumentException("Unknown health type", nameof(reportType)); } return report; }