protected override T ExecuteFunction <T>(string methodName, bool forceThrow, Func <T> function, params object[] parameters) { using (var scope = HealthReporter.BeginTrack(HealthTrackType.CountAndDurationAverage, string.Format(HealthReporter.RESTAPI_FORMAT, this.TrackPrefix + "." + methodName))) { return(base.ExecuteFunction <T>(methodName, forceThrow, function, parameters)); } }
protected virtual K ExecuteFunction <K>(HealthTrackType type, string methodName, Func <K> function, params object[] parameters) { using (var scope = HealthReporter.BeginTrack(type, string.Format(HealthReporter.BUSINESS_FORMAT, this.TrackPrefix + "." + methodName))) { return(base.ExecuteFunction <K>(methodName, function, parameters)); } }
protected override K ExecuteFunction <K>(string methodName, Func <K> function, params object[] parameters) { using (var scope = HealthReporter.BeginTrack(HealthTrackType.CountAndDurationAverage, string.Format(HealthReporter.BUSINESS_FORMAT, this.TrackPrefix + "." + methodName))) { return(base.ExecuteFunction <K>(methodName, function, parameters)); } }
protected virtual void ExecuteMethod(HealthTrackType type, string methodName, Action action, params object[] parameters) { using (var scope = HealthReporter.BeginTrack(type, string.Format(HealthReporter.BUSINESS_FORMAT, this.TrackPrefix + "." + methodName))) { base.ExecuteMethod(methodName, action, parameters); } }
public void Dispose() { if (this.disposed) { return; } this.disposed = true; DisposeOf(this.inputSubscriptions); TimeSpan pipelineDrainWaitTime = PollWaitForPipelineDrain(); pipelineHead.Complete(); // We want to give the completion logic some non-zero wait time for the pipeline blocks to dispose of their internal resources. TimeSpan completionWaitTime = TimeSpan.FromMilliseconds(Math.Max(100, this.pipelineConfiguration.PipelineCompletionTimeoutMsec - pipelineDrainWaitTime.TotalMilliseconds)); Task.WaitAll(this.pipelineCompletionTasks.ToArray(), completionWaitTime); this.cancellationTokenSource.Cancel(); DisposeOf(this.pipelineLinkDisposables); if (this.disposeDependencies) { DisposeOf(this.Inputs); DisposeOf(this.Sinks); HealthReporter.Dispose(); } }
protected override void ExecuteMethod(string methodName, Action action, params object[] parameters) { using (var scope = HealthReporter.BeginTrack(HealthTrackType.CountAndDurationAverage, string.Format(HealthReporter.BUSINESS_FORMAT, this.TrackPrefix + "." + methodName))) { base.ExecuteMethod(methodName, action, parameters); } }
public void Dispose() { lock (this.batcherTimerDisposalLock) { if (this.disposed) { return; } this.disposed = true; this.batcherTimer.Dispose(); } DisposeOf(this.inputSubscriptions); pipelineHead.Complete(); // The completion should propagate all the way to the outputs. When all outputs complete, the pipeline has been drained successfully. Task.WhenAny(Task.WhenAll(this.outputCompletionTasks.ToArray()), Task.Delay(this.pipelineConfiguration.PipelineCompletionTimeoutMsec)).GetAwaiter().GetResult(); this.cancellationTokenSource.Cancel(); if (this.disposeDependencies) { DisposeOf(this.Inputs); DisposeOf(this.Sinks); HealthReporter.Dispose(); } }
protected override void Dispose(bool disposing) { if (!disposing) { return; } var errWarnHealthStates = this.connectionStatus.Where( conn => conn.Health == HealthState.Error || conn.Health == HealthState.Warning); foreach (var state in errWarnHealthStates) { // Clear existing Health Warning. var report = new HealthReport { AppName = new Uri(state.TargetApp), Code = FOErrorWarningCodes.AppWarningNetworkEndpointUnreachable, EmitLogEvent = true, HealthMessage = $"Clearing NetworkObserver's Health Error/Warning for {state.TargetApp}/{state.HostName} connection state since FO is stopping.", HealthReportTimeToLive = default(TimeSpan), State = HealthState.Ok, NodeName = NodeName, Observer = ObserverName, Property = $"EndpointUnreachable({state.HostName})", ReportType = HealthReportType.Application, }; HealthReporter.ReportHealthToServiceFabric(report); } }
/// <inheritdoc/> public override async Task ObserveAsync(CancellationToken token) { // If set, this observer will only run during the supplied interval. // See Settings.xml, CertificateObserverConfiguration section, RunInterval parameter for an example. if (RunInterval > TimeSpan.MinValue && DateTime.Now.Subtract(LastRunDateTime) < RunInterval) { return; } bool initialized = Initialize(); Token = token; if (!initialized) { HealthReporter.ReportFabricObserverServiceHealth( FabricServiceContext.ServiceName.OriginalString, ObserverName, HealthState.Warning, "This observer was unable to initialize correctly due to missing configuration info."); return; } try { perfCounters = new WindowsPerfCounters(); diskUsage = new DiskUsage(); foreach (var app in targetList) { Token.ThrowIfCancellationRequested(); if (string.IsNullOrWhiteSpace(app.Target) && string.IsNullOrWhiteSpace(app.TargetType)) { continue; } await MonitorAppAsync(app).ConfigureAwait(true); } await ReportAsync(token).ConfigureAwait(true); LastRunDateTime = DateTime.Now; } finally { // Clean up. diskUsage?.Dispose(); diskUsage = null; perfCounters?.Dispose(); perfCounters = null; } }
public override async Task ObserveAsync(CancellationToken token) { // If set, this observer will only run during the supplied interval. // See Settings.xml, CertificateObserverConfiguration section, RunInterval parameter for an example. // This observer is only useful if you enable the web api for producing // an html page with a bunch of information that's easy to read in one go. if (!ObserverManager.ObserverWebAppDeployed || (RunInterval > TimeSpan.MinValue && DateTime.Now.Subtract(LastRunDateTime) < RunInterval)) { return; } token.ThrowIfCancellationRequested(); try { ServiceFabricConfiguration config = ServiceFabricConfiguration.Instance; this.SFVersion = config.FabricVersion; this.SFBinRoot = config.FabricBinRoot; this.SFCompatibilityJsonPath = config.CompatibilityJsonPath; this.SFCodePath = config.FabricCodePath; this.SFDataRoot = config.FabricDataRoot; this.SFLogRoot = config.FabricLogRoot; SFRootDir = config.FabricRoot; this.SFEnableCircularTraceSession = config.EnableCircularTraceSession; this.SFVolumeDiskServiceEnabled = config.IsSFVolumeDiskServiceEnabled; this.unsupportedPreviewFeaturesEnabled = config.EnableUnsupportedPreviewFeatures; this.SFNodeLastBootTime = config.NodeLastBootUpTime; } catch (Exception e) when(e is ArgumentException || e is IOException) { HealthReporter.ReportFabricObserverServiceHealth( FabricServiceContext.ServiceName.OriginalString, ObserverName, HealthState.Warning, $"{NodeName} | Handled Exception, but failed to read registry value:\n{e}"); } catch (Exception e) { HealthReporter.ReportFabricObserverServiceHealth( FabricServiceContext.ServiceName.OriginalString, ObserverName, HealthState.Warning, $"this.NodeName | Unhandled Exception trying to read registry value:\n{e}"); throw; } token.ThrowIfCancellationRequested(); await ReportAsync(token).ConfigureAwait(true); LastRunDateTime = DateTime.Now; }
/// <summary> /// This is the main entry point for your service instance. /// </summary> /// <param name="cancellationToken">Canceled when Service Fabric needs to shut down this service instance.</param> protected override async Task RunAsync(CancellationToken cancellationToken) { Guid traceId = Guid.NewGuid(); using (var healthReporter = new HealthReporter(traceId, this.logger, ComponentName, this.healthReporterId, this.HealthReportCallback, this.Context, HealthReporter.ReportTypes.Instance, this.configurationProvider.Config.HealthReportInterval)) { await healthReporter.StartAsync(traceId, cancellationToken).ConfigureAwait(false); this.logger.RunAsyncInvoked(traceId, ComponentName, this.GetType().FullName); while (!cancellationToken.IsCancellationRequested && !this.unsupportedConfigurationChangeOccurred) { await Task.Delay(250, cancellationToken).ConfigureAwait(false); } await healthReporter.StopAsync(traceId, cancellationToken).ConfigureAwait(false); } this.logger.Informational(traceId, ComponentName, "RunAsync completed."); }
public override async Task ObserveAsync(CancellationToken token) { // If set, this observer will only run during the supplied interval. // See Settings.xml, CertificateObserverConfiguration section, RunInterval parameter for an example. if (RunInterval > TimeSpan.MinValue && DateTime.Now.Subtract(LastRunDateTime) < RunInterval) { return; } this.stopwatch.Start(); bool initialized = await InitializeAsync(); Token = token; if (!initialized) { HealthReporter.ReportFabricObserverServiceHealth( FabricServiceContext.ServiceName.OriginalString, ObserverName, HealthState.Warning, "This observer was unable to initialize correctly due to missing configuration info."); this.stopwatch.Stop(); this.stopwatch.Reset(); return; } await MonitorDeployedAppsAsync(token).ConfigureAwait(false); // The time it took to get to ReportAsync. // For use in computing actual HealthReport TTL. this.stopwatch.Stop(); RunDuration = this.stopwatch.Elapsed; this.stopwatch.Reset(); await ReportAsync(token).ConfigureAwait(true); LastRunDateTime = DateTime.Now; }
public void Dispose() { if (this.disposed) { return; } this.disposed = true; DisposeOf(this.inputSubscriptions); this.pipelineHead.Complete(); this.pipelineHead.Completion.Wait(TimeSpan.FromMilliseconds(this.pipelineConfiguration.PipelineCompletionTimeoutMsec)); this.cancellationTokenSource.Cancel(); DisposeOf(this.pipelineDisposables); if (this.disposeDependencies) { DisposeOf(this.Inputs); DisposeOf(this.Sinks); HealthReporter.Dispose(); } }
protected virtual void PerformProcessSync(string specificTable) { base.ExecuteMethod("PerformProcessSync", delegate() { IFindClassTypes finder = this.IFoundation.Resolve <IFindClassTypes>(); IEnumerable <Type> synchronizers = FindInterfacesOfType(typeof(ISynchronizer), finder.GetAssemblies(null)); List <ISynchronizer> synchronizersToRun = new List <ISynchronizer>(); foreach (Type item in synchronizers) { if (string.IsNullOrEmpty(specificTable) || item.Name.Contains(specificTable)) // not perfect.. but should be good enough, its just for dev ease anyway { base.IFoundation.LogWarning(string.Format("ElasticSearchDaemon.{0} Loading", item.ToString())); if (!item.IsGenericTypeDefinition && item != typeof(ISynchronizer)) { try { ISynchronizer synchronizer = this.IFoundation.Container.Resolve(item, string.Empty) as ISynchronizer; base.IFoundation.LogWarning(string.Format("ElasticSearchDaemon.{0} Running", item.ToString())); if (synchronizer != null) { synchronizersToRun.Add(synchronizer); } } catch { // gulp, can't resolve } } else { base.IFoundation.LogWarning("ElasticSearchDaemon: " + item.ToString() + "is a generic or the base interface"); } } } // order them synchronizersToRun = synchronizersToRun.OrderBy(x => x.Priority).ToList(); // process them by bulk-priority while (synchronizersToRun.Count > 0) { int priority = synchronizersToRun[0].Priority; List <ISynchronizer> itemsWithPriority = synchronizersToRun.Where(x => x.Priority == priority).ToList(); List <Task> tasks = new List <Task>(); foreach (var synchronizer in itemsWithPriority) { synchronizersToRun.Remove(synchronizer); tasks.Add(Task.Run(delegate() { try { using (var scope = HealthReporter.BeginTrack(HealthTrackType.DurationAverage, string.Format(HealthReporter.INDEXER_QUEUE_TIME_FORMAT, synchronizer.EntityName))) { #pragma warning disable 612, 618 int count = synchronizer.PerformSynchronization(this.AgentName); #pragma warning restore 612, 618 if (count > 0) { HealthReporter.Current.UpdateMetric(HealthTrackType.Count, string.Format(HealthReporter.INDEXER_QUEUE_SIZE_FORMAT, synchronizer.EntityName), 0, count); } } base.IFoundation.LogWarning(string.Format("ElasticSearchDaemon.{0} Complete", synchronizer.ToString())); } catch (Exception ex) { base.IFoundation.LogError(ex, "PerformProcessSync" + synchronizer.GetType().ToString()); base.IFoundation.LogWarning(string.Format("ElasticSearchDaemon.{0} Error", synchronizer.ToString())); } })); } base.IFoundation.LogWarning(string.Format("ElasticSearchDaemon.Waiting")); Task.WaitAll(tasks.ToArray()); base.IFoundation.LogWarning(string.Format("ElasticSearchDaemon.Done")); } }); }
public override Task ReportAsync(CancellationToken token) { var timeToLiveWarning = SetHealthReportTimeToLive(); // Report on connection state. foreach (var config in this.userConfig) { token.ThrowIfCancellationRequested(); foreach (var conn in this.connectionStatus.Where(cs => cs.TargetApp == config.TargetApp)) { token.ThrowIfCancellationRequested(); var connState = conn; if (!connState.Connected) { this.healthState = HealthState.Warning; var healthMessage = $"Outbound Internet connection failure detected for endpoint {connState.HostName}{Environment.NewLine}"; // Send Health Telemetry (perhaps it signals an Alert in AppInsights or LogAnalytics). // This will also be serialied into the health event (Desf. var telemetryData = new TelemetryData(FabricClientInstance, token) { ApplicationName = conn.TargetApp, Code = FOErrorWarningCodes.AppWarningNetworkEndpointUnreachable, HealthState = "Warning", HealthEventDescription = healthMessage, ObserverName = ObserverName, Metric = ErrorWarningProperty.InternetConnectionFailure, NodeName = NodeName, }; if (IsTelemetryProviderEnabled && IsObserverTelemetryEnabled) { _ = TelemetryClient?.ReportMetricAsync( telemetryData, Token); } var report = new HealthReport { AppName = new Uri(conn.TargetApp), EmitLogEvent = true, HealthData = telemetryData, HealthMessage = healthMessage, HealthReportTimeToLive = timeToLiveWarning, State = this.healthState, NodeName = NodeName, Observer = ObserverName, Property = $"EndpointUnreachable({conn.HostName})", ReportType = HealthReportType.Application, ResourceUsageDataProperty = $"{ErrorWarningProperty.InternetConnectionFailure}: {connState.HostName}", }; // Send health report Warning and log event locally. HealthReporter.ReportHealthToServiceFabric(report); // This means this observer created a Warning or Error SF Health Report HasActiveFabricErrorOrWarning = true; // ETW. if (IsEtwEnabled) { Logger.EtwLogger?.Write( ObserverConstants.FabricObserverETWEventName, new { ApplicationName = conn.TargetApp, Code = FOErrorWarningCodes.AppWarningNetworkEndpointUnreachable, HealthState = "Warning", HealthEventDescription = healthMessage, ObserverName, Metric = ErrorWarningProperty.InternetConnectionFailure, NodeName, }); } } else { if (connState.Health != HealthState.Warning || connState.Health != HealthState.Error) { continue; } this.healthState = HealthState.Ok; var healthMessage = $"Outbound Internet connection successful for {connState?.HostName} from node {NodeName}."; // Clear existing Health Warning. var report = new HealthReport { AppName = new Uri(conn.TargetApp), Code = FOErrorWarningCodes.AppWarningNetworkEndpointUnreachable, EmitLogEvent = true, HealthMessage = healthMessage, HealthReportTimeToLive = default(TimeSpan), State = HealthState.Ok, NodeName = NodeName, Observer = ObserverName, Property = $"EndpointUnreachable({conn.HostName})", ReportType = HealthReportType.Application, }; HealthReporter.ReportHealthToServiceFabric(report); // Telemetry. if (IsTelemetryProviderEnabled && IsObserverTelemetryEnabled) { var telemetryData = new TelemetryData(FabricClientInstance, token) { ApplicationName = conn.TargetApp, Code = FOErrorWarningCodes.Ok, HealthState = "Ok", HealthEventDescription = healthMessage, ObserverName = ObserverName, Metric = "Internet Connection State", NodeName = NodeName, }; _ = TelemetryClient?.ReportMetricAsync( telemetryData, Token); } // ETW. if (IsEtwEnabled) { Logger.EtwLogger?.Write( ObserverConstants.FabricObserverETWEventName, new { ApplicationName = conn.TargetApp, Code = FOErrorWarningCodes.Ok, HealthState = "Ok", HealthEventDescription = healthMessage, ObserverName, Metric = "Internet Connection State", NodeName, }); } // Reset health state. HasActiveFabricErrorOrWarning = false; } } } // Clear _ = this.connectionStatus.RemoveAll(conn => conn.Connected); this.connectionStatus.TrimExcess(); this.connEndpointTestResults.Clear(); return(Task.CompletedTask); }
private async Task MonitorDeployedAppsAsync(CancellationToken token) { Process currentProcess = null; foreach (var repOrInst in ReplicaOrInstanceList) { token.ThrowIfCancellationRequested(); var timer = new Stopwatch(); int processId = (int)repOrInst.HostProcessId; var cpuUsage = new CpuUsage(); bool checkCpu = false, checkMemMb = false, checkMemPct = false, checkAllPorts = false, checkEphemeralPorts = false; var application = this.deployedTargetList?.FirstOrDefault( app => app?.TargetApp?.ToLower() == repOrInst.ApplicationName?.OriginalString?.ToLower() || app?.TargetAppType?.ToLower() == repOrInst.ApplicationTypeName?.ToLower()); if (application?.TargetApp == null && application?.TargetAppType == null) { continue; } try { // App level. currentProcess = Process.GetProcessById(processId); token.ThrowIfCancellationRequested(); var procName = currentProcess.ProcessName; string appNameOrType = GetAppNameOrType(repOrInst); var id = $"{appNameOrType}:{procName}"; // Add new resource data structures for each app service process where the metric is specified in configuration for related observation. if (this.AllAppCpuData.All(list => list.Id != id) && (application.CpuErrorLimitPercent > 0 || application.CpuWarningLimitPercent > 0)) { this.AllAppCpuData.Add(new FabricResourceUsageData <double>(ErrorWarningProperty.TotalCpuTime, id, DataCapacity, UseCircularBuffer)); } if (this.AllAppCpuData.Any(list => list.Id == id)) { checkCpu = true; } if (this.AllAppMemDataMb.All(list => list.Id != id) && (application.MemoryErrorLimitMb > 0 || application.MemoryWarningLimitMb > 0)) { this.AllAppMemDataMb.Add(new FabricResourceUsageData <float>(ErrorWarningProperty.TotalMemoryConsumptionMb, id, DataCapacity, UseCircularBuffer)); } if (this.AllAppMemDataMb.Any(list => list.Id == id)) { checkMemMb = true; } if (this.AllAppMemDataPercent.All(list => list.Id != id) && (application.MemoryErrorLimitPercent > 0 || application.MemoryWarningLimitPercent > 0)) { this.AllAppMemDataPercent.Add(new FabricResourceUsageData <double>(ErrorWarningProperty.TotalMemoryConsumptionPct, id, DataCapacity, UseCircularBuffer)); } if (this.AllAppMemDataPercent.Any(list => list.Id == id)) { checkMemPct = true; } if (this.AllAppTotalActivePortsData.All(list => list.Id != id) && (application.NetworkErrorActivePorts > 0 || application.NetworkWarningActivePorts > 0)) { this.AllAppTotalActivePortsData.Add(new FabricResourceUsageData <int>(ErrorWarningProperty.TotalActivePorts, id, 1)); } if (this.AllAppTotalActivePortsData.Any(list => list.Id == id)) { checkAllPorts = true; } if (this.AllAppEphemeralPortsData.All(list => list.Id != id) && (application.NetworkErrorEphemeralPorts > 0 || application.NetworkWarningEphemeralPorts > 0)) { this.AllAppEphemeralPortsData.Add(new FabricResourceUsageData <int>(ErrorWarningProperty.TotalEphemeralPorts, id, 1)); } if (this.AllAppEphemeralPortsData.Any(list => list.Id == id)) { checkEphemeralPorts = true; } // Measure Total and Ephemeral ports. if (checkAllPorts) { this.AllAppTotalActivePortsData.FirstOrDefault(x => x.Id == id).Data.Add(OperatingSystemInfoProvider.Instance.GetActivePortCount(currentProcess.Id, FabricServiceContext)); } if (checkEphemeralPorts) { this.AllAppEphemeralPortsData.FirstOrDefault(x => x.Id == id).Data.Add(OperatingSystemInfoProvider.Instance.GetActiveEphemeralPortCount(currentProcess.Id, FabricServiceContext)); } // No need to proceed further if no cpu and mem thresholds are specified in configuration. if (!checkCpu && !checkMemMb && !checkMemPct) { continue; } /* CPU and Memory Usage */ TimeSpan duration = TimeSpan.FromSeconds(15); if (MonitorDuration > TimeSpan.MinValue) { duration = MonitorDuration; } // Warm up the counters. if (checkCpu) { _ = cpuUsage.GetCpuUsagePercentageProcess(currentProcess); } if (checkMemMb || checkMemPct) { _ = ProcessInfoProvider.Instance.GetProcessPrivateWorkingSetInMB(currentProcess.Id); } timer.Start(); while (!currentProcess.HasExited && timer.Elapsed.Seconds <= duration.Seconds) { token.ThrowIfCancellationRequested(); if (checkCpu) { // CPU (all cores). double cpu = cpuUsage.GetCpuUsagePercentageProcess(currentProcess); if (cpu >= 0) { if (cpu > 100) { cpu = 100; } this.AllAppCpuData.FirstOrDefault(x => x.Id == id).Data.Add(cpu); } } float processMem = 0; if (checkMemMb || checkMemPct) { processMem = ProcessInfoProvider.Instance.GetProcessPrivateWorkingSetInMB(currentProcess.Id); } if (checkMemMb) { // Memory (private working set (process)). this.AllAppMemDataMb.FirstOrDefault(x => x.Id == id).Data.Add(processMem); } if (checkMemPct) { // Memory (percent in use (total)). var(TotalMemory, PercentInUse) = OperatingSystemInfoProvider.Instance.TupleGetTotalPhysicalMemorySizeAndPercentInUse(); long totalMem = TotalMemory; if (totalMem > 0) { double usedPct = Math.Round(((double)(processMem * 100)) / (totalMem * 1024), 2); this.AllAppMemDataPercent.FirstOrDefault(x => x.Id == id).Data.Add(Math.Round(usedPct, 1)); } } await Task.Delay(250, Token); } timer.Stop(); timer.Reset(); } catch (Exception e) { #if DEBUG // DEBUG INFO var healthReport = new Utilities.HealthReport { AppName = repOrInst.ApplicationName, HealthMessage = $"Error:{Environment.NewLine}{e}{Environment.NewLine}", State = HealthState.Ok, Code = FOErrorWarningCodes.Ok, NodeName = NodeName, Observer = ObserverName, Property = $"{e.Source}", ReportType = HealthReportType.Application, }; HealthReporter.ReportHealthToServiceFabric(healthReport); #endif if (e is Win32Exception || e is ArgumentException || e is InvalidOperationException) { WriteToLogWithLevel( ObserverName, $"MonitorAsync failed to find current service process for {repOrInst.ApplicationName?.OriginalString ?? repOrInst.ApplicationTypeName}{Environment.NewLine}{e}", LogLevel.Information); } else { if (!(e is OperationCanceledException || e is TaskCanceledException)) { WriteToLogWithLevel( ObserverName, $"Unhandled exception in MonitorAsync:{Environment.NewLine}{e}", LogLevel.Warning); } throw; } } finally { currentProcess?.Dispose(); currentProcess = null; } } }
public override async Task ReportAsync(CancellationToken token) { token.ThrowIfCancellationRequested(); var sb = new StringBuilder(); _ = sb.AppendLine("\nService Fabric information:\n"); if (!string.IsNullOrEmpty(this.SFVersion)) { _ = sb.AppendLine("Runtime Version: " + this.SFVersion); } if (this.SFBinRoot != null) { _ = sb.AppendLine("Fabric Bin root directory: " + this.SFBinRoot); } if (this.SFCodePath != null) { _ = sb.AppendLine("Fabric Code Path: " + this.SFCodePath); } if (!string.IsNullOrEmpty(this.SFDataRoot)) { _ = sb.AppendLine("Data root directory: " + this.SFDataRoot); } if (!string.IsNullOrEmpty(this.SFLogRoot)) { _ = sb.AppendLine("Log root directory: " + this.SFLogRoot); } if (this.SFVolumeDiskServiceEnabled != null) { _ = sb.AppendLine("Volume Disk Service Enabled: " + this.SFVolumeDiskServiceEnabled); } if (this.unsupportedPreviewFeaturesEnabled != null) { _ = sb.AppendLine("Unsupported Preview Features Enabled: " + this.unsupportedPreviewFeaturesEnabled); } if (this.SFCompatibilityJsonPath != null) { _ = sb.AppendLine("Compatibility Json path: " + this.SFCompatibilityJsonPath); } if (this.SFEnableCircularTraceSession != null) { _ = sb.AppendLine("Enable Circular trace session: " + this.SFEnableCircularTraceSession); } _ = sb.Append(await GetDeployedAppsInfoAsync(token).ConfigureAwait(true)); _ = sb.AppendLine(); token.ThrowIfCancellationRequested(); var logPath = Path.Combine(ObserverLogger.LogFolderBasePath, "SFInfraInfo.txt"); // This file is used by the web application (ObserverWebApi). if (!ObserverLogger.TryWriteLogFile(logPath, sb.ToString())) { HealthReporter.ReportFabricObserverServiceHealth( FabricServiceContext.ServiceName.OriginalString, ObserverName, HealthState.Warning, "Unable to create SFInfraInfo.txt file."); } _ = sb.Clear(); }
public override Task ReportAsync(CancellationToken token) { try { token.ThrowIfCancellationRequested(); if (CsvFileLogger != null && CsvFileLogger.EnableCsvLogging) { var fileName = "CpuMemFirewallsPorts" + NodeName; // Log (csv) system-wide CPU/Mem data. CsvFileLogger.LogData( fileName, NodeName, "CPU Time", "Average", Math.Round(AllCpuTimeData.AverageDataValue)); CsvFileLogger.LogData( fileName, NodeName, "CPU Time", "Peak", Math.Round(AllCpuTimeData.MaxDataValue)); CsvFileLogger.LogData( fileName, NodeName, "Committed Memory (MB)", "Average", Math.Round(this.allMemDataCommittedBytes.AverageDataValue)); CsvFileLogger.LogData( fileName, NodeName, "Committed Memory (MB)", "Peak", Math.Round(this.allMemDataCommittedBytes.MaxDataValue)); CsvFileLogger.LogData( fileName, NodeName, "All Active Ports", "Total", this.activePortsData.Data[0]); CsvFileLogger.LogData( fileName, NodeName, "Ephemeral Active Ports", "Total", this.ephemeralPortsData.Data[0]); CsvFileLogger.LogData( fileName, NodeName, "Firewall Rules", "Total", this.firewallData.Data[0]); DataTableFileLogger.Flush(); } // Report on the global health state (system-wide (node) metrics). // User-configurable in NodeObserver.config.json var timeToLiveWarning = SetHealthReportTimeToLive(); // CPU if (AllCpuTimeData.AverageDataValue > 0) { ProcessResourceDataReportHealth( AllCpuTimeData, CpuErrorUsageThresholdPct, CpuWarningUsageThresholdPct, timeToLiveWarning); } // Memory if (this.allMemDataCommittedBytes.AverageDataValue > 0) { ProcessResourceDataReportHealth( this.allMemDataCommittedBytes, MemErrorUsageThresholdMb, MemWarningUsageThresholdMb, timeToLiveWarning); } if (this.allMemDataPercentUsed.AverageDataValue > 0) { ProcessResourceDataReportHealth( this.allMemDataPercentUsed, MemoryErrorLimitPercent, MemoryWarningLimitPercent, timeToLiveWarning); } // Firewall rules ProcessResourceDataReportHealth( this.firewallData, FirewallRulesErrorThreshold, FirewallRulesWarningThreshold, timeToLiveWarning); // Ports - Active TCP ProcessResourceDataReportHealth( this.activePortsData, ActivePortsErrorThreshold, ActivePortsWarningThreshold, timeToLiveWarning); // Ports - Active Ephemeral TCP ProcessResourceDataReportHealth( this.ephemeralPortsData, EphemeralPortsErrorThreshold, EphemeralPortsWarningThreshold, timeToLiveWarning); return(Task.CompletedTask); } catch (AggregateException e) when(e.InnerException is OperationCanceledException || e.InnerException is TaskCanceledException || e.InnerException is TimeoutException) { return(Task.CompletedTask); } catch (Exception e) { HealthReporter.ReportFabricObserverServiceHealth( FabricServiceContext.ServiceName.OriginalString, ObserverName, HealthState.Warning, $"Unhandled exception re-thrown:{Environment.NewLine}{e}"); throw; } }
// This runs each time ObserveAsync is run to ensure that any new app targets and config changes will // be up to date across observer loop iterations. private async Task <bool> InitializeAsync() { if (ReplicaOrInstanceList == null) { ReplicaOrInstanceList = new List <ReplicaOrInstanceMonitoringInfo>(); } if (!IsTestRun) { configSettings.Initialize( FabricServiceContext.CodePackageActivationContext.GetConfigurationPackageObject( ObserverConstants.ObserverConfigurationPackageName)?.Settings, ConfigurationSectionName, "AppObserverDataFileName"); } // For unit tests, this path will be an empty string and not generate an exception. var appObserverConfigFileName = Path.Combine( ConfigPackagePath ?? string.Empty, configSettings.AppObserverConfigFileName ?? string.Empty); if (!File.Exists(appObserverConfigFileName)) { WriteToLogWithLevel( ObserverName, $"Will not observe resource consumption as no configuration parameters have been supplied. | {NodeName}", LogLevel.Information); return(false); } // This code runs each time ObserveAsync is called, // so clear app list and deployed replica/instance list in case a new app has been added to watch list. if (this.userTargetList.Count > 0) { this.userTargetList.Clear(); ReplicaOrInstanceList.Clear(); } if (this.deployedTargetList.Count > 0) { this.deployedTargetList.Clear(); } using Stream stream = new FileStream( appObserverConfigFileName, FileMode.Open, FileAccess.Read, FileShare.Read); if (stream.Length > 0 && JsonHelper.IsJson <List <ApplicationInfo> >(File.ReadAllText(appObserverConfigFileName))) { this.userTargetList.AddRange(JsonHelper.ReadFromJsonStream <ApplicationInfo[]>(stream)); } // Are any of the config-supplied apps deployed?. if (this.userTargetList.Count == 0) { WriteToLogWithLevel( ObserverName, $"Will not observe resource consumption as no configuration parameters have been supplied. | {NodeName}", LogLevel.Information); return(false); } int settingSFail = 0; foreach (var application in this.userTargetList) { if (string.IsNullOrWhiteSpace(application.TargetApp) && string.IsNullOrWhiteSpace(application.TargetAppType)) { HealthReporter.ReportFabricObserverServiceHealth( FabricServiceContext.ServiceName.ToString(), ObserverName, HealthState.Warning, $"Initialize() | {application.TargetApp}: Required setting, target, is not set."); settingSFail++; continue; } // No required settings supplied for deployed application(s). if (settingSFail == this.userTargetList.Count) { return(false); } if (!string.IsNullOrEmpty(application.TargetAppType)) { await SetDeployedApplicationReplicaOrInstanceListAsync( null, application.TargetAppType).ConfigureAwait(false); } else { await SetDeployedApplicationReplicaOrInstanceListAsync(new Uri(application.TargetApp)) .ConfigureAwait(false); } } foreach (var repOrInst in ReplicaOrInstanceList) { ObserverLogger.LogInfo( $"Will observe resource consumption by {repOrInst.ApplicationName?.OriginalString} " + $"on Node {NodeName}."); } return(true); }
// Initialize() runs each time ObserveAsync is run to ensure // that any new app targets and config changes will // be up to date across observer loop iterations. private bool Initialize() { if (replicaOrInstanceList == null) { replicaOrInstanceList = new List <ReplicaOrInstanceMonitoringInfo>(); } // Is this a unit test run? if (IsTestRun) { replicaOrInstanceList.Add(new ReplicaOrInstanceMonitoringInfo { ApplicationName = new Uri("fabric:/TestApp"), PartitionId = Guid.NewGuid(), HostProcessId = 0, ReplicaOrInstanceId = default(long), }); return(true); } ConfigSettings.Initialize(FabricServiceContext.CodePackageActivationContext.GetConfigurationPackageObject(ObserverConstants.ObserverConfigurationPackageName)?.Settings, ObserverConstants.AppObserverConfigurationSectionName, "AppObserverDataFileName"); var appObserverConfigFileName = Path.Combine(configPackagePath, ConfigSettings.AppObserverDataFileName); if (!File.Exists(appObserverConfigFileName)) { WriteToLogWithLevel( ObserverName, $"Will not observe resource consumption as no configuration parameters have been supplied. | {NodeName}", LogLevel.Information); return(false); } // this code runs each time ObserveAsync is called, // so clear app list and deployed replica/instance list in case a new app has been added to watch list. if (targetList.Count > 0) { targetList.Clear(); replicaOrInstanceList.Clear(); } using (Stream stream = new FileStream(appObserverConfigFileName, FileMode.Open, FileAccess.Read, FileShare.Read)) { if (stream.Length > 42 && JsonHelper.IsJson <List <ApplicationInfo> >(File.ReadAllText(appObserverConfigFileName))) { targetList.AddRange(JsonHelper.ReadFromJsonStream <ApplicationInfo[]>(stream)); } } // Are any of the config-supplied apps deployed?. if (targetList.Count == 0) { WriteToLogWithLevel( ObserverName, $"Will not observe resource consumption as no configuration parameters have been supplied. | {NodeName}", LogLevel.Information); return(false); } int settingsFail = 0; foreach (var application in targetList) { if (string.IsNullOrWhiteSpace(application.Target) && string.IsNullOrWhiteSpace(application.TargetType)) { HealthReporter.ReportFabricObserverServiceHealth( FabricServiceContext.ServiceName.ToString(), ObserverName, HealthState.Warning, $"Initialize() | {application.Target}: Required setting, target, is not set."); settingsFail++; continue; } // No required settings supplied for deployed application(s). if (settingsFail == targetList.Count) { return(false); } ObserverLogger.LogInfo( $"Will observe resource consumption by {application.Target ?? application.TargetType} " + $"on Node {NodeName}."); } return(true); }
/// <summary> /// Initializes a new instance of the <see cref="LastExceptionHealthCheck"/> class. /// </summary> /// <param name="healthReporter">HealthReporter.</param> public LastExceptionHealthCheck(HealthReporter healthReporter) { this.healthReporter = healthReporter; }
private async Task GetSystemCpuMemoryValuesAsync(CancellationToken token) { token.ThrowIfCancellationRequested(); CpuUtilizationProvider cpuUtilizationProvider = null; try { // Ports. int activePortCountTotal = OperatingSystemInfoProvider.Instance.GetActivePortCount(); int ephemeralPortCountTotal = OperatingSystemInfoProvider.Instance.GetActiveEphemeralPortCount(); this.activePortsData.Data.Add(activePortCountTotal); this.ephemeralPortsData.Data.Add(ephemeralPortCountTotal); // Firewall rules. int firewalls = NetworkUsage.GetActiveFirewallRulesCount(); this.firewallData.Data.Add(firewalls); // CPU and Memory. // Note: Please make sure you understand the normal state of your nodes // with respect to the machine resource use and/or abuse by your service(s). // For example, if it is normal for your services to consume 90% of available CPU and memory // as part of the work they perform under normal traffic flow, then it doesn't make sense to warn or // error on these conditions. // TODO: Look into making this a long running background task with signaling. TimeSpan duration = TimeSpan.FromSeconds(10); if (MonitorDuration > TimeSpan.MinValue) { duration = MonitorDuration; } cpuUtilizationProvider = CpuUtilizationProvider.Create(); // Warm up the counters. _ = await cpuUtilizationProvider.NextValueAsync(); while (this.stopwatch.Elapsed <= duration) { token.ThrowIfCancellationRequested(); if (CpuWarningUsageThresholdPct > 0 && CpuWarningUsageThresholdPct <= 100) { AllCpuTimeData.Data.Add(await cpuUtilizationProvider.NextValueAsync()); } if (MemWarningUsageThresholdMb > 0) { float committedMegaBytes = MemoryUsageProvider.Instance.GetCommittedBytes() / 1048576.0f; this.allMemDataCommittedBytes.Data.Add(committedMegaBytes); } if (MemoryWarningLimitPercent > 0) { this.allMemDataPercentUsed.Data.Add( OperatingSystemInfoProvider.Instance.TupleGetTotalPhysicalMemorySizeAndPercentInUse().PercentInUse); } await Task.Delay(250).ConfigureAwait(false); } } catch (AggregateException e) when(e.InnerException is OperationCanceledException || e.InnerException is TaskCanceledException || e.InnerException is TimeoutException) { return; } catch (Exception e) { HealthReporter.ReportFabricObserverServiceHealth( FabricServiceContext.ServiceName.OriginalString, ObserverName, HealthState.Warning, $"Unhandled exception in GetSystemCpuMemoryValuesAsync:{Environment.NewLine}{e}"); throw; } finally { cpuUtilizationProvider?.Dispose(); } }
public override Task ReportAsync(CancellationToken token) { try { token.ThrowIfCancellationRequested(); // OS Health. if (this.osStatus != null && !string.Equals(this.osStatus, "OK", StringComparison.OrdinalIgnoreCase)) { string healthMessage = $"OS reporting unhealthy: {this.osStatus}"; var healthReport = new HealthReport { Observer = ObserverName, NodeName = NodeName, HealthMessage = healthMessage, State = HealthState.Error, HealthReportTimeToLive = SetHealthReportTimeToLive(), }; HealthReporter.ReportHealthToServiceFabric(healthReport); // This means this observer created a Warning or Error SF Health Report HasActiveFabricErrorOrWarning = true; // Send Health Report as Telemetry (perhaps it signals an Alert from App Insights, for example.). if (IsTelemetryProviderEnabled && IsObserverTelemetryEnabled) { _ = TelemetryClient?.ReportHealthAsync( HealthScope.Application, FabricRuntime.GetActivationContext().ApplicationName, HealthState.Error, $"{NodeName} - OS reporting unhealthy: {this.osStatus}", ObserverName, Token); } } else if (HasActiveFabricErrorOrWarning && string.Equals(this.osStatus, "OK", StringComparison.OrdinalIgnoreCase)) { // Clear Error or Warning with an OK Health Report. string healthMessage = $"OS reporting healthy: {this.osStatus}"; var healthReport = new HealthReport { Observer = ObserverName, NodeName = NodeName, HealthMessage = healthMessage, State = HealthState.Ok, HealthReportTimeToLive = default(TimeSpan), }; HealthReporter.ReportHealthToServiceFabric(healthReport); // Reset internal health state. HasActiveFabricErrorOrWarning = false; } if (ObserverManager.ObserverWebAppDeployed) { var logPath = Path.Combine(ObserverLogger.LogFolderBasePath, "SysInfo.txt"); // This file is used by the web application (log reader.). if (!ObserverLogger.TryWriteLogFile(logPath, $"Last updated on {DateTime.UtcNow.ToString("M/d/yyyy HH:mm:ss")} UTC<br/>{this.osReport}")) { HealthReporter.ReportFabricObserverServiceHealth( FabricServiceContext.ServiceName.OriginalString, ObserverName, HealthState.Warning, "Unable to create SysInfo.txt file."); } } var report = new HealthReport { Observer = ObserverName, HealthMessage = this.osReport, State = HealthState.Ok, NodeName = NodeName, HealthReportTimeToLive = SetHealthReportTimeToLive(), }; HealthReporter.ReportHealthToServiceFabric(report); // Windows Update automatic download enabled? if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows) && this.isWindowsUpdateAutoDownloadEnabled) { string linkText = $"{Environment.NewLine}For clusters of Silver durability or above, " + $"please consider <a href=\"https://docs.microsoft.com/azure/virtual-machine-scale-sets/virtual-machine-scale-sets-automatic-upgrade\" target=\"blank\">" + $"enabling VMSS automatic OS image upgrades</a> to prevent unexpected VM reboots. " + $"For Bronze durability clusters, please consider deploying the " + $"<a href=\"https://docs.microsoft.com/azure/service-fabric/service-fabric-patch-orchestration-application\" target=\"blank\">Patch Orchestration Service</a>."; string auServiceEnabledMessage = $"Windows Update Automatic Download is enabled.{linkText}"; report = new HealthReport { Observer = ObserverName, Property = "OSConfiguration", HealthMessage = auServiceEnabledMessage, State = HealthState.Warning, NodeName = NodeName, HealthReportTimeToLive = SetHealthReportTimeToLive(), }; HealthReporter.ReportHealthToServiceFabric(report); if (IsTelemetryProviderEnabled && IsObserverTelemetryEnabled && RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { // Send Health Report as Telemetry (perhaps it signals an Alert from App Insights, for example.). var telemetryData = new TelemetryData(FabricClientInstance, token) { HealthEventDescription = auServiceEnabledMessage, HealthState = "Warning", Metric = "WUAutoDownloadEnabled", Value = this.isWindowsUpdateAutoDownloadEnabled, NodeName = NodeName, ObserverName = ObserverName, Source = ObserverConstants.FabricObserverName, }; _ = TelemetryClient?.ReportMetricAsync( telemetryData, Token); } // ETW. if (IsEtwEnabled && RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { Logger.EtwLogger?.Write( ObserverConstants.FabricObserverETWEventName, new { HealthState = "Warning", HealthEventDescription = auServiceEnabledMessage, ObserverName, Metric = "WUAutoDownloadEnabled", Value = this.isWindowsUpdateAutoDownloadEnabled, NodeName, }); } } if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { // reset au globals for fresh detection during next observer run. this.isWindowsUpdateAutoDownloadEnabled = false; this.auStateUnknown = false; this.isWUADSettingEnabled = false; } return(Task.CompletedTask); } catch (Exception e) { HealthReporter.ReportFabricObserverServiceHealth( FabricServiceContext.ServiceName.OriginalString, ObserverName, HealthState.Error, $"Unhandled exception processing OS information:{Environment.NewLine}{e}"); throw; } }
private async Task <bool> InitializeAsync() { WriteToLogWithLevel( ObserverName, $"Initializing {ObserverName} for network monitoring. | {NodeName}", LogLevel.Information); this.cancellationToken.ThrowIfCancellationRequested(); // This only needs to be logged once. // This file is used by the ObserverWebApi application. if (ObserverManager.ObserverWebAppDeployed && !this.hasRun) { var logPath = Path.Combine(ObserverLogger.LogFolderBasePath, "NetInfo.txt"); Console.WriteLine($"logPath: {logPath}"); if (!ObserverLogger.TryWriteLogFile(logPath, GetNetworkInterfaceInfo(this.cancellationToken))) { HealthReporter.ReportFabricObserverServiceHealth( FabricServiceContext.ServiceName.OriginalString, ObserverName, HealthState.Warning, "Unable to create NetInfo.txt file."); } } // Is this a unit test run? if (IsTestRun) { return(true); } var settings = FabricServiceContext.CodePackageActivationContext.GetConfigurationPackageObject( ObserverConstants.ObserverConfigurationPackageName)?.Settings; this.configSettings.Initialize( settings, ConfigurationSectionName, "NetworkObserverDataFileName"); var networkObserverConfigFileName = Path.Combine(this.dataPackagePath, this.configSettings.NetworkObserverConfigFileName); if (string.IsNullOrWhiteSpace(networkObserverConfigFileName)) { ObserverLogger.LogError( "Endpoint list file is not specified. " + "Please Add file containing endpoints that need to be monitored."); return(false); } if (!File.Exists(networkObserverConfigFileName)) { ObserverLogger.LogError( "Endpoint list file is not specified. " + "Please Add file containing endpoints that need to be monitored."); return(false); } if (this.userConfig.Count == 0) { using (Stream stream = new FileStream( networkObserverConfigFileName, FileMode.Open, FileAccess.Read, FileShare.Read)) { var configs = JsonHelper.ReadFromJsonStream <NetworkObserverConfig[]>(stream); foreach (var netConfig in configs) { var deployedApps = await FabricClientInstance.QueryManager.GetDeployedApplicationListAsync( NodeName, new Uri(netConfig.TargetApp)).ConfigureAwait(false); if (deployedApps == null || deployedApps.Count < 1) { continue; } this.userConfig.Add(netConfig); } } if (this.userConfig.Count == 0) { HealthReporter.ReportFabricObserverServiceHealth( FabricServiceContext.ServiceName.ToString(), ObserverName, HealthState.Warning, "Missing required configuration data: endpoints."); return(false); } } return(true); }
private void InternetConnectionStateIsConnected() { var configList = this.defaultConfig; if (this.userConfig.Count > 0) { configList = this.userConfig; } foreach (var config in configList) { this.cancellationToken.ThrowIfCancellationRequested(); foreach (var endpoint in config.Endpoints) { if (string.IsNullOrEmpty(endpoint.HostName)) { continue; } // Don't re-test endpoint if it has already been tested for a different targetApp. if (this.connEndpointTestResults.ContainsKey(endpoint.HostName)) { SetHealthState(endpoint, config.TargetApp, this.connEndpointTestResults[endpoint.HostName]); continue; } bool passed = false; this.cancellationToken.ThrowIfCancellationRequested(); // SQL Azure, other database services that are addressable over direct TCP. if (endpoint.Protocol == DirectInternetProtocol.Tcp) { passed = TcpEndpointDoConnectionTest(endpoint.HostName, endpoint.Port); } // Default is http. else { // Service REST endpoints, CosmosDB REST endpoint, etc. // Http protocol means any enpoint/port pair that is addressable over HTTP/s. // E.g., REST enpoints, etc. try { this.cancellationToken.ThrowIfCancellationRequested(); ServicePointManager.SecurityProtocol = SecurityProtocolType.SystemDefault; string prefix = endpoint.Port == 443 ? "https://" : "http://"; if (endpoint.HostName.Contains("://")) { prefix = string.Empty; } var request = (HttpWebRequest)WebRequest.Create( new Uri($"{prefix}{endpoint.HostName}:{endpoint.Port}")); request.AuthenticationLevel = AuthenticationLevel.MutualAuthRequired; request.ImpersonationLevel = TokenImpersonationLevel.Impersonation; request.Timeout = 60000; request.Method = "GET"; using var response = (HttpWebResponse)request.GetResponse(); var status = response.StatusCode; // The target server responded with something. // It doesn't really matter what it "said". if (status == HttpStatusCode.OK || response?.Headers?.Count > 0) { passed = true; } } catch (IOException ie) { if (ie.InnerException != null && ie.InnerException is ProtocolViolationException) { passed = true; } } catch (WebException we) { if (we.Status == WebExceptionStatus.ProtocolError || we.Status == WebExceptionStatus.TrustFailure || we.Status == WebExceptionStatus.SecureChannelFailure || we.Response?.Headers?.Count > 0) { // Could not establish trust or server doesn't want to hear from you, or... // Either way, the Server *responded*. It's reachable. // You could always add code to grab your app or cluster certs from local store // and apply it to the request. See CertificateObserver for how to get // both your App cert(s) and Cluster cert. The goal of NetworkObserver is // to test availability. Nothing more. passed = true; } else if (we.Status == WebExceptionStatus.SendFailure && we.InnerException != null && (we.InnerException.Message.ToLower().Contains("authentication") || we.InnerException.HResult == -2146232800)) { passed = true; } } catch (Exception e) { HealthReporter.ReportFabricObserverServiceHealth( FabricServiceContext.ServiceName.OriginalString, ObserverName, HealthState.Warning, e.ToString()); throw; } } SetHealthState(endpoint, config.TargetApp, passed); if (!this.connEndpointTestResults.ContainsKey(endpoint.HostName)) { this.connEndpointTestResults.Add(endpoint.HostName, passed); } } } }
public override Task ReportAsync(CancellationToken token) { try { var timeToLiveWarning = SetHealthReportTimeToLive(); // User-supplied Disk Space Usage % thresholds from Settings.xml. foreach (var data in this.DiskSpaceUsagePercentageData) { token.ThrowIfCancellationRequested(); ProcessResourceDataReportHealth( data, DiskSpacePercentErrorThreshold, DiskSpacePercentWarningThreshold, timeToLiveWarning); } // User-supplied Average disk queue length thresholds from Settings.xml. foreach (var data in this.DiskAverageQueueLengthData) { token.ThrowIfCancellationRequested(); ProcessResourceDataReportHealth( data, AverageQueueLengthErrorThreshold, AverageQueueLengthWarningThreshold, timeToLiveWarning); } /* For ETW Only - These calls will just produce ETW (note the thresholds). */ if (IsEtwEnabled) { // Disk Space Available foreach (var data in this.DiskSpaceAvailableMbData) { token.ThrowIfCancellationRequested(); ProcessResourceDataReportHealth( data, 0, 0, timeToLiveWarning); } // Disk Space Total foreach (var data in this.DiskSpaceTotalMbData) { token.ThrowIfCancellationRequested(); ProcessResourceDataReportHealth( data, 0, 0, timeToLiveWarning); } } token.ThrowIfCancellationRequested(); // This section only needs to run if you have the FabricObserverWebApi app installed. if (!ObserverManager.ObserverWebAppDeployed) { return(Task.CompletedTask); } var diskInfoPath = Path.Combine(ObserverLogger.LogFolderBasePath, "disks.txt"); _ = ObserverLogger.TryWriteLogFile(diskInfoPath, this.diskInfo.ToString()); _ = this.diskInfo.Clear(); return(Task.CompletedTask); } catch (AggregateException e) when(e.InnerException is OperationCanceledException || e.InnerException is TaskCanceledException || e.InnerException is TimeoutException) { return(Task.CompletedTask); } catch (Exception e) { HealthReporter.ReportFabricObserverServiceHealth( FabricServiceContext.ServiceName.OriginalString, ObserverName, HealthState.Warning, $"Unhandled exception in GetSystemCpuMemoryValuesAsync:{Environment.NewLine}{e}"); throw; } }
private async Task GetComputerInfoAsync(CancellationToken token) { var sb = new StringBuilder(); int logicalProcessorCount = Environment.ProcessorCount; try { OSInfo osInfo = await OperatingSystemInfoProvider.Instance.GetOSInfoAsync(token); this.osStatus = osInfo.Status; // Active, bound ports. int activePorts = OperatingSystemInfoProvider.Instance.GetActivePortCount(); // Active, ephemeral ports. int activeEphemeralPorts = OperatingSystemInfoProvider.Instance.GetActiveEphemeralPortCount(); (int lowPortOS, int highPortOS) = OperatingSystemInfoProvider.Instance.TupleGetDynamicPortRange(); string osEphemeralPortRange = string.Empty; string fabricAppPortRange = string.Empty; string clusterManifestXml = IsTestRun ? File.ReadAllText( TestManifestPath) : await FabricClientInstance.ClusterManager.GetClusterManifestAsync( AsyncClusterOperationTimeoutSeconds, Token).ConfigureAwait(false); (int lowPortApp, int highPortApp) = NetworkUsage.TupleGetFabricApplicationPortRangeForNodeType( FabricServiceContext.NodeContext.NodeType, clusterManifestXml); int firewalls = NetworkUsage.GetActiveFirewallRulesCount(); // OS info. _ = sb.AppendLine("OS Information:\r\n"); _ = sb.AppendLine($"Name: {osInfo.Name}"); _ = sb.AppendLine($"Version: {osInfo.Version}"); if (string.IsNullOrEmpty(osInfo.InstallDate)) { _ = sb.AppendLine($"InstallDate: {osInfo.InstallDate}"); } _ = sb.AppendLine($"LastBootUpTime*: {osInfo.LastBootUpTime}"); if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { // WU AutoUpdate - Download enabled. // If the config setting EnableWindowsAutoUpdateCheck is set to false, then don't add this info to sb. if (this.isWUADSettingEnabled) { string auMessage = "WindowsUpdateAutoDownloadEnabled: "; if (this.auStateUnknown) { auMessage += "Unknown"; } else { auMessage += this.isWindowsUpdateAutoDownloadEnabled; } _ = sb.AppendLine(auMessage); } // Not supported for Linux. _ = sb.AppendLine($"OSLanguage: {osInfo.Language}"); _ = sb.AppendLine($"OSHealthStatus*: {osInfo.Status}"); } _ = sb.AppendLine($"NumberOfProcesses*: {osInfo.NumberOfProcesses}"); if (lowPortOS > -1) { osEphemeralPortRange = $"{lowPortOS} - {highPortOS}"; _ = sb.AppendLine($"OSEphemeralTCPPortRange: {osEphemeralPortRange} (Active*: {activeEphemeralPorts})"); } if (lowPortApp > -1) { fabricAppPortRange = $"{lowPortApp} - {highPortApp}"; _ = sb.AppendLine($"FabricApplicationTCPPortRange: {fabricAppPortRange}"); } if (firewalls > -1) { _ = sb.AppendLine($"ActiveFirewallRules*: {firewalls}"); } if (activePorts > -1) { _ = sb.AppendLine($"TotalActiveTCPPorts*: {activePorts}"); } // Hardware info. // Proc/Mem _ = sb.AppendLine($"{Environment.NewLine}Hardware Information:{Environment.NewLine}"); _ = sb.AppendLine($"LogicalProcessorCount: {logicalProcessorCount}"); if (osInfo.TotalVirtualMemorySizeKB > 0) { _ = sb.AppendLine($"TotalVirtualMemorySize: {osInfo.TotalVirtualMemorySizeKB / 1048576} GB"); } if (osInfo.TotalVisibleMemorySizeKB > 0) { _ = sb.AppendLine($"TotalVisibleMemorySize: {osInfo.TotalVisibleMemorySizeKB / 1048576} GB"); } _ = sb.AppendLine($"FreePhysicalMemory*: {Math.Round(osInfo.AvailableMemoryKB / 1048576.0, 2)} GB"); _ = sb.AppendLine($"FreeVirtualMemory*: {Math.Round(osInfo.FreeVirtualMemoryKB / 1048576.0, 2)} GB"); // Disk var drivesInformationTuple = DiskUsage.GetCurrentDiskSpaceTotalAndUsedPercentAllDrives(SizeUnit.Gigabytes); var logicalDriveCount = drivesInformationTuple.Count; string driveInfo = string.Empty; _ = sb.AppendLine($"LogicalDriveCount: {logicalDriveCount}"); foreach (var(driveName, diskSize, percentConsumed) in drivesInformationTuple) { string drvSize; if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { string systemDrv = "Data"; if (string.Equals(Environment.SystemDirectory.Substring(0, 1), driveName.Substring(0, 1), StringComparison.OrdinalIgnoreCase)) { systemDrv = "System"; } drvSize = $"Drive {driveName} ({systemDrv}) Size: {diskSize} GB, Consumed*: {percentConsumed}%"; } else { drvSize = $"Mount point: {driveName}, Size: {diskSize} GB, Consumed*: {percentConsumed}%"; } _ = sb.AppendLine(drvSize); driveInfo += $"{drvSize}{Environment.NewLine}"; } string osHotFixes = string.Empty; if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { osHotFixes = GetWindowsHotFixes(token); } if (!string.IsNullOrEmpty(osHotFixes)) { _ = sb.AppendLine($"\nWindows Patches/Hot Fixes*:\n\n{osHotFixes}"); } // Dynamic info qualifier (*) _ = sb.AppendLine($"\n* Dynamic data."); this.osReport = sb.ToString(); string hotFixes = string.Empty; // ETW. if (IsEtwEnabled) { if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { hotFixes = GetWindowsHotFixes(token, generateUrl: false).Replace("\r\n", ", ").TrimEnd(','); } Logger.EtwLogger?.Write( ObserverConstants.FabricObserverETWEventName, new { HealthState = "Ok", Node = NodeName, Observer = ObserverName, OS = osInfo.Name, OSVersion = osInfo.Version, OSInstallDate = osInfo.InstallDate, AutoUpdateEnabled = this.auStateUnknown ? "Unknown" : this.isWindowsUpdateAutoDownloadEnabled.ToString(), osInfo.LastBootUpTime, WindowsAutoUpdateEnabled = this.isWindowsUpdateAutoDownloadEnabled, TotalMemorySizeGB = (int)(osInfo.TotalVisibleMemorySizeKB / 1048576), AvailablePhysicalMemoryGB = Math.Round(osInfo.FreePhysicalMemoryKB / 1048576.0, 2), AvailableVirtualMemoryGB = Math.Round(osInfo.FreeVirtualMemoryKB / 1048576.0, 2), LogicalProcessorCount = logicalProcessorCount, LogicalDriveCount = logicalDriveCount, DriveInfo = driveInfo, NumberOfRunningProcesses = osInfo.NumberOfProcesses, ActiveFirewallRules = firewalls, ActivePorts = activePorts, ActiveEphemeralPorts = activeEphemeralPorts, WindowsDynamicPortRange = osEphemeralPortRange, FabricAppPortRange = fabricAppPortRange, HotFixes = hotFixes, }); } // Telemetry if (IsTelemetryProviderEnabled && IsObserverTelemetryEnabled) { if (string.IsNullOrEmpty(hotFixes) && RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { hotFixes = GetWindowsHotFixes(token, generateUrl: false).Replace("\r\n", ", ").TrimEnd(','); } TelemetryClient?.ReportMetricAsync( new MachineTelemetryData { HealthState = "Ok", Node = NodeName, Observer = ObserverName, OS = osInfo.Name, OSVersion = osInfo.Version, OSInstallDate = osInfo.InstallDate, LastBootUpTime = osInfo.LastBootUpTime, WindowsUpdateAutoDownloadEnabled = this.isWindowsUpdateAutoDownloadEnabled, TotalMemorySizeGB = (int)osInfo.TotalVisibleMemorySizeKB / 1048576, AvailablePhysicalMemoryGB = Math.Round(osInfo.FreePhysicalMemoryKB / 1048576.0, 2), AvailableVirtualMemoryGB = Math.Round(osInfo.FreeVirtualMemoryKB / 1048576.0, 2), LogicalProcessorCount = logicalProcessorCount, LogicalDriveCount = logicalDriveCount, DriveInfo = driveInfo, NumberOfRunningProcesses = osInfo.NumberOfProcesses, ActiveFirewallRules = firewalls, ActivePorts = activePorts, ActiveEphemeralPorts = activeEphemeralPorts, WindowsDynamicPortRange = osEphemeralPortRange, FabricAppPortRange = fabricAppPortRange, HotFixes = hotFixes, }, Token); } } catch (Exception e) when(e is FabricException || e is OperationCanceledException || e is TaskCanceledException || e is InvalidComObjectException) { HealthReporter.ReportFabricObserverServiceHealth( FabricServiceContext.ServiceName.OriginalString, ObserverName, HealthState.Warning, $"Handled Exception processing OS information:{Environment.NewLine}{e}"); } catch (Exception e) { HealthReporter.ReportFabricObserverServiceHealth( FabricServiceContext.ServiceName.OriginalString, ObserverName, HealthState.Error, $"Unhandled Exception processing OS information:{Environment.NewLine}{e}"); throw; } }
private async Task MonitorDeployedAppsAsync(CancellationToken token) { Process currentProcess = null; foreach (var repOrInst in ReplicaOrInstanceList) { token.ThrowIfCancellationRequested(); var timer = new Stopwatch(); int processId = (int)repOrInst.HostProcessId; var cpuUsage = new CpuUsage(); try { // App level. currentProcess = Process.GetProcessById(processId); token.ThrowIfCancellationRequested(); var procName = currentProcess.ProcessName; string appNameOrType = GetAppNameOrType(repOrInst); var id = $"{appNameOrType}:{procName}"; // Add new resource data structures for each app service process. if (this.allAppCpuData.All(list => list.Id != id)) { this.allAppCpuData.Add(new FabricResourceUsageData <double>(ErrorWarningProperty.TotalCpuTime, id, DataCapacity, UseCircularBuffer)); this.allAppMemDataMb.Add(new FabricResourceUsageData <float>(ErrorWarningProperty.TotalMemoryConsumptionMb, id, DataCapacity, UseCircularBuffer)); this.allAppMemDataPercent.Add(new FabricResourceUsageData <double>(ErrorWarningProperty.TotalMemoryConsumptionPct, id, DataCapacity, UseCircularBuffer)); this.allAppTotalActivePortsData.Add(new FabricResourceUsageData <int>(ErrorWarningProperty.TotalActivePorts, id, 1)); this.allAppEphemeralPortsData.Add(new FabricResourceUsageData <int>(ErrorWarningProperty.TotalEphemeralPorts, id, 1)); } TimeSpan duration = TimeSpan.FromSeconds(15); if (MonitorDuration > TimeSpan.MinValue) { duration = MonitorDuration; } // Warm up the counters. _ = cpuUsage.GetCpuUsagePercentageProcess(currentProcess); _ = ProcessInfoProvider.Instance.GetProcessPrivateWorkingSetInMB(currentProcess.Id); timer.Start(); while (!currentProcess.HasExited && timer.Elapsed.Seconds <= duration.Seconds) { token.ThrowIfCancellationRequested(); // CPU (all cores). double cpu = cpuUsage.GetCpuUsagePercentageProcess(currentProcess); if (cpu >= 0) { if (cpu > 100) { cpu = 100; } this.allAppCpuData.FirstOrDefault(x => x.Id == id).Data.Add(cpu); } // Memory (private working set (process)). var processMem = ProcessInfoProvider.Instance.GetProcessPrivateWorkingSetInMB(currentProcess.Id); this.allAppMemDataMb.FirstOrDefault(x => x.Id == id).Data.Add(processMem); // Memory (percent in use (total)). var(TotalMemory, PercentInUse) = OperatingSystemInfoProvider.Instance.TupleGetTotalPhysicalMemorySizeAndPercentInUse(); long totalMem = TotalMemory; if (totalMem > -1) { double usedPct = Math.Round(((double)(processMem * 100)) / (totalMem * 1024), 2); this.allAppMemDataPercent.FirstOrDefault(x => x.Id == id).Data.Add(Math.Round(usedPct, 1)); } await Task.Delay(250, Token); } timer.Stop(); timer.Reset(); // Total and Ephemeral ports.. this.allAppTotalActivePortsData.FirstOrDefault(x => x.Id == id) .Data.Add(OperatingSystemInfoProvider.Instance.GetActivePortCount(currentProcess.Id, FabricServiceContext)); this.allAppEphemeralPortsData.FirstOrDefault(x => x.Id == id) .Data.Add(OperatingSystemInfoProvider.Instance.GetActiveEphemeralPortCount(currentProcess.Id, FabricServiceContext)); } catch (Exception e) { #if DEBUG // DEBUG INFO var healthReport = new Utilities.HealthReport { AppName = repOrInst.ApplicationName, HealthMessage = $"Error: {e}\n\n", State = HealthState.Ok, Code = FOErrorWarningCodes.Ok, NodeName = NodeName, Observer = ObserverName, Property = $"{e.Source}", ReportType = HealthReportType.Application, }; HealthReporter.ReportHealthToServiceFabric(healthReport); #endif if (e is Win32Exception || e is ArgumentException || e is InvalidOperationException) { WriteToLogWithLevel( ObserverName, $"MonitorAsync failed to find current service process for {repOrInst.ApplicationName?.OriginalString ?? repOrInst.ApplicationTypeName}/n{e}", LogLevel.Information); } else { if (!(e is OperationCanceledException || e is TaskCanceledException)) { WriteToLogWithLevel( ObserverName, $"Unhandled exception in MonitorAsync: \n {e}", LogLevel.Warning); } throw; } } finally { currentProcess?.Dispose(); currentProcess = null; } } }
public override Task ReportAsync(CancellationToken token) { Token.ThrowIfCancellationRequested(); // Informational report. For now, Linux is where we pay close attention to memory use by Fabric system services as there are still a few issues in that realm.. var timeToLiveWarning = SetHealthReportTimeToLive(); var portInformationReport = new HealthReport { Observer = ObserverName, NodeName = NodeName, HealthMessage = $"Number of ports in use by Fabric services: {TotalActivePortCountAllSystemServices}{Environment.NewLine}" + $"Number of ephemeral ports in use by Fabric services: {TotalActiveEphemeralPortCountAllSystemServices}{Environment.NewLine}" + $"Fabric memory use MB: {this.allMemData.Where(x => x.Id == "Fabric")?.FirstOrDefault()?.AverageDataValue}{Environment.NewLine}" + (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) ? $"FabricGateway memory use MB: {this.allMemData.Where(x => x.Id == "FabricGateway.exe")?.FirstOrDefault()?.AverageDataValue}{Environment.NewLine}" + $"FabricHost memory use MB: {this.allMemData.Where(x => x.Id == "FabricHost")?.FirstOrDefault()?.AverageDataValue}{Environment.NewLine}" : string.Empty), State = HealthState.Ok, HealthReportTimeToLive = timeToLiveWarning, }; HealthReporter.ReportHealthToServiceFabric(portInformationReport); // Reset ports counters. TotalActivePortCountAllSystemServices = 0; TotalActiveEphemeralPortCountAllSystemServices = 0; // CPU ProcessResourceDataList( this.allCpuData, CpuErrorUsageThresholdPct, CpuWarnUsageThresholdPct); // Memory ProcessResourceDataList( this.allMemData, MemErrorUsageThresholdMb, MemWarnUsageThresholdMb); // Ports - Active TCP ProcessResourceDataList( this.allActiveTcpPortData, ActiveTcpPortCountError, ActiveTcpPortCountWarning); // Ports - Ephemeral ProcessResourceDataList( this.allEphemeralTcpPortData, ActiveEphemeralPortCountError, ActiveEphemeralPortCountWarning); // Windows Event Log if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows) && ObserverManager.ObserverWebAppDeployed && this.monitorWinEventLog) { // SF Eventlog Errors? // Write this out to a new file, for use by the web front end log viewer. // Format = HTML. int count = this.evtRecordList.Count(); var logPath = Path.Combine(ObserverLogger.LogFolderBasePath, "EventVwrErrors.txt"); // Remove existing file. if (File.Exists(logPath)) { try { File.Delete(logPath); } catch (IOException) { } catch (UnauthorizedAccessException) { } } if (count >= 10) { var sb = new StringBuilder(); _ = sb.AppendLine("<br/><div><strong>" + "<a href='javascript:toggle(\"evtContainer\")'>" + "<div id=\"plus\" style=\"display: inline; font-size: 25px;\">+</div> " + count + " Error Events in ServiceFabric and System</a> " + "Event logs</strong>.<br/></div>"); _ = sb.AppendLine("<div id='evtContainer' style=\"display: none;\">"); foreach (var evt in this.evtRecordList.Distinct()) { token.ThrowIfCancellationRequested(); try { // Access event properties: _ = sb.AppendLine("<div>" + evt.LogName + "</div>"); _ = sb.AppendLine("<div>" + evt.LevelDisplayName + "</div>"); if (evt.TimeCreated.HasValue) { _ = sb.AppendLine("<div>" + evt.TimeCreated.Value.ToShortDateString() + "</div>"); } foreach (var prop in evt.Properties) { if (prop.Value != null && Convert.ToString(prop.Value).Length > 0) { _ = sb.AppendLine("<div>" + prop.Value + "</div>"); } } } catch (EventLogException) { } } _ = sb.AppendLine("</div>"); _ = ObserverLogger.TryWriteLogFile(logPath, sb.ToString()); _ = sb.Clear(); } // Clean up. if (count > 0) { this.evtRecordList.Clear(); } } ClearDataContainers(); return(Task.CompletedTask); }
// Windows process dmp creator. public bool DumpServiceProcess(int processId, DumpType dumpType = DumpType.Full) { if (string.IsNullOrEmpty(this.dumpsPath)) { return(false); } string processName = string.Empty; NativeMethods.MINIDUMP_TYPE miniDumpType; switch (dumpType) { case DumpType.Full: miniDumpType = NativeMethods.MINIDUMP_TYPE.MiniDumpWithFullMemory | NativeMethods.MINIDUMP_TYPE.MiniDumpWithFullMemoryInfo | NativeMethods.MINIDUMP_TYPE.MiniDumpWithHandleData | NativeMethods.MINIDUMP_TYPE.MiniDumpWithThreadInfo | NativeMethods.MINIDUMP_TYPE.MiniDumpWithUnloadedModules; break; case DumpType.MiniPlus: miniDumpType = NativeMethods.MINIDUMP_TYPE.MiniDumpWithPrivateReadWriteMemory | NativeMethods.MINIDUMP_TYPE.MiniDumpWithDataSegs | NativeMethods.MINIDUMP_TYPE.MiniDumpWithHandleData | NativeMethods.MINIDUMP_TYPE.MiniDumpWithFullMemoryInfo | NativeMethods.MINIDUMP_TYPE.MiniDumpWithThreadInfo | NativeMethods.MINIDUMP_TYPE.MiniDumpWithUnloadedModules; break; case DumpType.Mini: miniDumpType = NativeMethods.MINIDUMP_TYPE.MiniDumpWithIndirectlyReferencedMemory | NativeMethods.MINIDUMP_TYPE.MiniDumpScanMemory; break; default: throw new ArgumentOutOfRangeException(nameof(dumpType), dumpType, null); } try { // This is to ensure friendly-name of resulting dmp file. processName = Process.GetProcessById(processId).ProcessName; if (string.IsNullOrEmpty(processName)) { return(false); } IntPtr processHandle = Process.GetProcessById(processId).Handle; processName += "_" + DateTime.Now.ToString("ddMMyyyyHHmmss") + ".dmp"; // Check disk space availability before writing dump file. // This will not work on Linux string driveName = this.dumpsPath.Substring(0, 2); if (DiskUsage.GetCurrentDiskSpaceUsedPercent(driveName) > 90) { HealthReporter.ReportFabricObserverServiceHealth( FabricServiceContext.ServiceName.OriginalString, ObserverName, HealthState.Warning, "Not enough disk space available for dump file creation."); return(false); } using (var file = File.Create(Path.Combine(this.dumpsPath, processName))) { if (!NativeMethods.MiniDumpWriteDump( processHandle, (uint)processId, file.SafeFileHandle, miniDumpType, IntPtr.Zero, IntPtr.Zero, IntPtr.Zero)) { throw new Win32Exception(Marshal.GetLastWin32Error()); } } return(true); } catch (Exception e) when(e is ArgumentException || e is InvalidOperationException || e is Win32Exception) { ObserverLogger.LogInfo( $"Unable to generate dump file {processName} with error{Environment.NewLine}{e}"); } return(false); }