public override Task ReportAsync(CancellationToken token) { Token.ThrowIfCancellationRequested(); // Informational report. For now, Linux is where we pay close attention to memory use by Fabric system services as there are still a few issues in that realm.. var timeToLiveWarning = SetHealthReportTimeToLive(); var portInformationReport = new HealthReport { Observer = ObserverName, NodeName = NodeName, HealthMessage = $"Number of ports in use by Fabric services: {TotalActivePortCountAllSystemServices}{Environment.NewLine}" + $"Number of ephemeral ports in use by Fabric services: {TotalActiveEphemeralPortCountAllSystemServices}{Environment.NewLine}" + $"Fabric memory use MB: {this.allMemData.Where(x => x.Id == "Fabric")?.FirstOrDefault()?.AverageDataValue}{Environment.NewLine}" + (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) ? $"FabricGateway memory use MB: {this.allMemData.Where(x => x.Id == "FabricGateway.exe")?.FirstOrDefault()?.AverageDataValue}{Environment.NewLine}" + $"FabricHost memory use MB: {this.allMemData.Where(x => x.Id == "FabricHost")?.FirstOrDefault()?.AverageDataValue}{Environment.NewLine}" : string.Empty), State = HealthState.Ok, HealthReportTimeToLive = timeToLiveWarning, }; HealthReporter.ReportHealthToServiceFabric(portInformationReport); // Reset ports counters. TotalActivePortCountAllSystemServices = 0; TotalActiveEphemeralPortCountAllSystemServices = 0; // CPU ProcessResourceDataList( this.allCpuData, CpuErrorUsageThresholdPct, CpuWarnUsageThresholdPct); // Memory ProcessResourceDataList( this.allMemData, MemErrorUsageThresholdMb, MemWarnUsageThresholdMb); // Ports - Active TCP ProcessResourceDataList( this.allActiveTcpPortData, ActiveTcpPortCountError, ActiveTcpPortCountWarning); // Ports - Ephemeral ProcessResourceDataList( this.allEphemeralTcpPortData, ActiveEphemeralPortCountError, ActiveEphemeralPortCountWarning); // Windows Event Log if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows) && ObserverManager.ObserverWebAppDeployed && this.monitorWinEventLog) { // SF Eventlog Errors? // Write this out to a new file, for use by the web front end log viewer. // Format = HTML. int count = this.evtRecordList.Count(); var logPath = Path.Combine(ObserverLogger.LogFolderBasePath, "EventVwrErrors.txt"); // Remove existing file. if (File.Exists(logPath)) { try { File.Delete(logPath); } catch (IOException) { } catch (UnauthorizedAccessException) { } } if (count >= 10) { var sb = new StringBuilder(); _ = sb.AppendLine("<br/><div><strong>" + "<a href='javascript:toggle(\"evtContainer\")'>" + "<div id=\"plus\" style=\"display: inline; font-size: 25px;\">+</div> " + count + " Error Events in ServiceFabric and System</a> " + "Event logs</strong>.<br/></div>"); _ = sb.AppendLine("<div id='evtContainer' style=\"display: none;\">"); foreach (var evt in this.evtRecordList.Distinct()) { token.ThrowIfCancellationRequested(); try { // Access event properties: _ = sb.AppendLine("<div>" + evt.LogName + "</div>"); _ = sb.AppendLine("<div>" + evt.LevelDisplayName + "</div>"); if (evt.TimeCreated.HasValue) { _ = sb.AppendLine("<div>" + evt.TimeCreated.Value.ToShortDateString() + "</div>"); } foreach (var prop in evt.Properties) { if (prop.Value != null && Convert.ToString(prop.Value).Length > 0) { _ = sb.AppendLine("<div>" + prop.Value + "</div>"); } } } catch (EventLogException) { } } _ = sb.AppendLine("</div>"); _ = ObserverLogger.TryWriteLogFile(logPath, sb.ToString()); _ = sb.Clear(); } // Clean up. if (count > 0) { this.evtRecordList.Clear(); } } ClearDataContainers(); return(Task.CompletedTask); }
public override Task ReportAsync(CancellationToken token) { try { token.ThrowIfCancellationRequested(); // OS Health. if (this.osStatus != null && !string.Equals(this.osStatus, "OK", StringComparison.OrdinalIgnoreCase)) { string healthMessage = $"OS reporting unhealthy: {this.osStatus}"; var healthReport = new HealthReport { Observer = ObserverName, NodeName = NodeName, HealthMessage = healthMessage, State = HealthState.Error, HealthReportTimeToLive = SetHealthReportTimeToLive(), }; HealthReporter.ReportHealthToServiceFabric(healthReport); // This means this observer created a Warning or Error SF Health Report HasActiveFabricErrorOrWarning = true; // Send Health Report as Telemetry (perhaps it signals an Alert from App Insights, for example.). if (IsTelemetryProviderEnabled && IsObserverTelemetryEnabled) { _ = TelemetryClient?.ReportHealthAsync( HealthScope.Application, FabricRuntime.GetActivationContext().ApplicationName, HealthState.Error, $"{NodeName} - OS reporting unhealthy: {this.osStatus}", ObserverName, Token); } } else if (HasActiveFabricErrorOrWarning && string.Equals(this.osStatus, "OK", StringComparison.OrdinalIgnoreCase)) { // Clear Error or Warning with an OK Health Report. string healthMessage = $"OS reporting healthy: {this.osStatus}"; var healthReport = new HealthReport { Observer = ObserverName, NodeName = NodeName, HealthMessage = healthMessage, State = HealthState.Ok, HealthReportTimeToLive = default(TimeSpan), }; HealthReporter.ReportHealthToServiceFabric(healthReport); // Reset internal health state. HasActiveFabricErrorOrWarning = false; } if (ObserverManager.ObserverWebAppDeployed) { var logPath = Path.Combine(ObserverLogger.LogFolderBasePath, "SysInfo.txt"); // This file is used by the web application (log reader.). if (!ObserverLogger.TryWriteLogFile(logPath, $"Last updated on {DateTime.UtcNow.ToString("M/d/yyyy HH:mm:ss")} UTC<br/>{this.osReport}")) { HealthReporter.ReportFabricObserverServiceHealth( FabricServiceContext.ServiceName.OriginalString, ObserverName, HealthState.Warning, "Unable to create SysInfo.txt file."); } } var report = new HealthReport { Observer = ObserverName, HealthMessage = this.osReport, State = HealthState.Ok, NodeName = NodeName, HealthReportTimeToLive = SetHealthReportTimeToLive(), }; HealthReporter.ReportHealthToServiceFabric(report); // Windows Update automatic download enabled? if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows) && this.isWindowsUpdateAutoDownloadEnabled) { string linkText = $"{Environment.NewLine}For clusters of Silver durability or above, " + $"please consider <a href=\"\" target=\"blank\">" + $"enabling VMSS automatic OS image upgrades</a> to prevent unexpected VM reboots. " + $"For Bronze durability clusters, please consider deploying the " + $"<a href=\"\" target=\"blank\">Patch Orchestration Service</a>."; string auServiceEnabledMessage = $"Windows Update Automatic Download is enabled.{linkText}"; report = new HealthReport { Observer = ObserverName, Property = "OSConfiguration", HealthMessage = auServiceEnabledMessage, State = HealthState.Warning, NodeName = NodeName, HealthReportTimeToLive = SetHealthReportTimeToLive(), }; HealthReporter.ReportHealthToServiceFabric(report); if (IsTelemetryProviderEnabled && IsObserverTelemetryEnabled && RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { // Send Health Report as Telemetry (perhaps it signals an Alert from App Insights, for example.). var telemetryData = new TelemetryData(FabricClientInstance, token) { HealthEventDescription = auServiceEnabledMessage, HealthState = "Warning", Metric = "WUAutoDownloadEnabled", Value = this.isWindowsUpdateAutoDownloadEnabled, NodeName = NodeName, ObserverName = ObserverName, Source = ObserverConstants.FabricObserverName, }; _ = TelemetryClient?.ReportMetricAsync( telemetryData, Token); } // ETW. if (IsEtwEnabled && RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { Logger.EtwLogger?.Write( ObserverConstants.FabricObserverETWEventName, new { HealthState = "Warning", HealthEventDescription = auServiceEnabledMessage, ObserverName, Metric = "WUAutoDownloadEnabled", Value = this.isWindowsUpdateAutoDownloadEnabled, NodeName, }); } } if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { // reset au globals for fresh detection during next observer run. this.isWindowsUpdateAutoDownloadEnabled = false; this.auStateUnknown = false; this.isWUADSettingEnabled = false; } return(Task.CompletedTask); } catch (Exception e) { HealthReporter.ReportFabricObserverServiceHealth( FabricServiceContext.ServiceName.OriginalString, ObserverName, HealthState.Error, $"Unhandled exception processing OS information:{Environment.NewLine}{e}"); throw; } }
public override Task ReportAsync(CancellationToken token) { try { var timeToLiveWarning = SetHealthReportTimeToLive(); // User-supplied Disk Space Usage % thresholds from Settings.xml. foreach (var data in this.DiskSpaceUsagePercentageData) { token.ThrowIfCancellationRequested(); ProcessResourceDataReportHealth( data, DiskSpacePercentErrorThreshold, DiskSpacePercentWarningThreshold, timeToLiveWarning); } // User-supplied Average disk queue length thresholds from Settings.xml. foreach (var data in this.DiskAverageQueueLengthData) { token.ThrowIfCancellationRequested(); ProcessResourceDataReportHealth( data, AverageQueueLengthErrorThreshold, AverageQueueLengthWarningThreshold, timeToLiveWarning); } /* For ETW Only - These calls will just produce ETW (note the thresholds). */ if (IsEtwEnabled) { // Disk Space Available foreach (var data in this.DiskSpaceAvailableMbData) { token.ThrowIfCancellationRequested(); ProcessResourceDataReportHealth( data, 0, 0, timeToLiveWarning); } // Disk Space Total foreach (var data in this.DiskSpaceTotalMbData) { token.ThrowIfCancellationRequested(); ProcessResourceDataReportHealth( data, 0, 0, timeToLiveWarning); } } token.ThrowIfCancellationRequested(); // This section only needs to run if you have the FabricObserverWebApi app installed. if (!ObserverManager.ObserverWebAppDeployed) { return(Task.CompletedTask); } var diskInfoPath = Path.Combine(ObserverLogger.LogFolderBasePath, "disks.txt"); _ = ObserverLogger.TryWriteLogFile(diskInfoPath, this.diskInfo.ToString()); _ = this.diskInfo.Clear(); return(Task.CompletedTask); } catch (AggregateException e) when(e.InnerException is OperationCanceledException || e.InnerException is TaskCanceledException || e.InnerException is TimeoutException) { return(Task.CompletedTask); } catch (Exception e) { HealthReporter.ReportFabricObserverServiceHealth( FabricServiceContext.ServiceName.OriginalString, ObserverName, HealthState.Warning, $"Unhandled exception in GetSystemCpuMemoryValuesAsync:{Environment.NewLine}{e}"); throw; } }
private async Task <bool> InitializeAsync() { WriteToLogWithLevel( ObserverName, $"Initializing {ObserverName} for network monitoring. | {NodeName}", LogLevel.Information); this.cancellationToken.ThrowIfCancellationRequested(); // This only needs to be logged once. // This file is used by the ObserverWebApi application. if (ObserverManager.ObserverWebAppDeployed && !this.hasRun) { var logPath = Path.Combine(ObserverLogger.LogFolderBasePath, "NetInfo.txt"); Console.WriteLine($"logPath: {logPath}"); if (!ObserverLogger.TryWriteLogFile(logPath, GetNetworkInterfaceInfo(this.cancellationToken))) { HealthReporter.ReportFabricObserverServiceHealth( FabricServiceContext.ServiceName.OriginalString, ObserverName, HealthState.Warning, "Unable to create NetInfo.txt file."); } } // Is this a unit test run? if (IsTestRun) { return(true); } var settings = FabricServiceContext.CodePackageActivationContext.GetConfigurationPackageObject( ObserverConstants.ObserverConfigurationPackageName)?.Settings; this.configSettings.Initialize( settings, ConfigurationSectionName, "NetworkObserverDataFileName"); var networkObserverConfigFileName = Path.Combine(this.dataPackagePath, this.configSettings.NetworkObserverConfigFileName); if (string.IsNullOrWhiteSpace(networkObserverConfigFileName)) { ObserverLogger.LogError( "Endpoint list file is not specified. " + "Please Add file containing endpoints that need to be monitored."); return(false); } if (!File.Exists(networkObserverConfigFileName)) { ObserverLogger.LogError( "Endpoint list file is not specified. " + "Please Add file containing endpoints that need to be monitored."); return(false); } if (this.userConfig.Count == 0) { using (Stream stream = new FileStream( networkObserverConfigFileName, FileMode.Open, FileAccess.Read, FileShare.Read)) { var configs = JsonHelper.ReadFromJsonStream <NetworkObserverConfig[]>(stream); foreach (var netConfig in configs) { var deployedApps = await FabricClientInstance.QueryManager.GetDeployedApplicationListAsync( NodeName, new Uri(netConfig.TargetApp)).ConfigureAwait(false); if (deployedApps == null || deployedApps.Count < 1) { continue; } this.userConfig.Add(netConfig); } } if (this.userConfig.Count == 0) { HealthReporter.ReportFabricObserverServiceHealth( FabricServiceContext.ServiceName.ToString(), ObserverName, HealthState.Warning, "Missing required configuration data: endpoints."); return(false); } } return(true); }
public override async Task ReportAsync(CancellationToken token) { token.ThrowIfCancellationRequested(); var sb = new StringBuilder(); _ = sb.AppendLine("\nService Fabric information:\n"); if (!string.IsNullOrEmpty(this.SFVersion)) { _ = sb.AppendLine("Runtime Version: " + this.SFVersion); } if (this.SFBinRoot != null) { _ = sb.AppendLine("Fabric Bin root directory: " + this.SFBinRoot); } if (this.SFCodePath != null) { _ = sb.AppendLine("Fabric Code Path: " + this.SFCodePath); } if (!string.IsNullOrEmpty(this.SFDataRoot)) { _ = sb.AppendLine("Data root directory: " + this.SFDataRoot); } if (!string.IsNullOrEmpty(this.SFLogRoot)) { _ = sb.AppendLine("Log root directory: " + this.SFLogRoot); } if (this.SFVolumeDiskServiceEnabled != null) { _ = sb.AppendLine("Volume Disk Service Enabled: " + this.SFVolumeDiskServiceEnabled); } if (this.unsupportedPreviewFeaturesEnabled != null) { _ = sb.AppendLine("Unsupported Preview Features Enabled: " + this.unsupportedPreviewFeaturesEnabled); } if (this.SFCompatibilityJsonPath != null) { _ = sb.AppendLine("Compatibility Json path: " + this.SFCompatibilityJsonPath); } if (this.SFEnableCircularTraceSession != null) { _ = sb.AppendLine("Enable Circular trace session: " + this.SFEnableCircularTraceSession); } _ = sb.Append(await GetDeployedAppsInfoAsync(token).ConfigureAwait(true)); _ = sb.AppendLine(); token.ThrowIfCancellationRequested(); var logPath = Path.Combine(ObserverLogger.LogFolderBasePath, "SFInfraInfo.txt"); // This file is used by the web application (ObserverWebApi). if (!ObserverLogger.TryWriteLogFile(logPath, sb.ToString())) { HealthReporter.ReportFabricObserverServiceHealth( FabricServiceContext.ServiceName.OriginalString, ObserverName, HealthState.Warning, "Unable to create SFInfraInfo.txt file."); } _ = sb.Clear(); }