/// <inheritdoc/> public override async Task ObserveAsync(CancellationToken token) { // If set, this observer will only run during the supplied interval. // See Settings.xml, CertificateObserverConfiguration section, RunInterval parameter for an example. if (RunInterval > TimeSpan.MinValue && DateTime.Now.Subtract(LastRunDateTime) < RunInterval) { return; } bool initialized = Initialize(); Token = token; if (!initialized) { HealthReporter.ReportFabricObserverServiceHealth( FabricServiceContext.ServiceName.OriginalString, ObserverName, HealthState.Warning, "This observer was unable to initialize correctly due to missing configuration info."); return; } try { perfCounters = new WindowsPerfCounters(); diskUsage = new DiskUsage(); foreach (var app in targetList) { Token.ThrowIfCancellationRequested(); if (string.IsNullOrWhiteSpace(app.Target) && string.IsNullOrWhiteSpace(app.TargetType)) { continue; } await MonitorAppAsync(app).ConfigureAwait(true); } await ReportAsync(token).ConfigureAwait(true); LastRunDateTime = DateTime.Now; } finally { // Clean up. diskUsage?.Dispose(); diskUsage = null; perfCounters?.Dispose(); perfCounters = null; } }
public override async Task ObserveAsync(CancellationToken token) { // If set, this observer will only run during the supplied interval. // See Settings.xml, CertificateObserverConfiguration section, RunInterval parameter for an example. // This observer is only useful if you enable the web api for producing // an html page with a bunch of information that's easy to read in one go. if (!ObserverManager.ObserverWebAppDeployed || (RunInterval > TimeSpan.MinValue && DateTime.Now.Subtract(LastRunDateTime) < RunInterval)) { return; } token.ThrowIfCancellationRequested(); try { ServiceFabricConfiguration config = ServiceFabricConfiguration.Instance; this.SFVersion = config.FabricVersion; this.SFBinRoot = config.FabricBinRoot; this.SFCompatibilityJsonPath = config.CompatibilityJsonPath; this.SFCodePath = config.FabricCodePath; this.SFDataRoot = config.FabricDataRoot; this.SFLogRoot = config.FabricLogRoot; SFRootDir = config.FabricRoot; this.SFEnableCircularTraceSession = config.EnableCircularTraceSession; this.SFVolumeDiskServiceEnabled = config.IsSFVolumeDiskServiceEnabled; this.unsupportedPreviewFeaturesEnabled = config.EnableUnsupportedPreviewFeatures; this.SFNodeLastBootTime = config.NodeLastBootUpTime; } catch (Exception e) when(e is ArgumentException || e is IOException) { HealthReporter.ReportFabricObserverServiceHealth( FabricServiceContext.ServiceName.OriginalString, ObserverName, HealthState.Warning, $"{NodeName} | Handled Exception, but failed to read registry value:\n{e}"); } catch (Exception e) { HealthReporter.ReportFabricObserverServiceHealth( FabricServiceContext.ServiceName.OriginalString, ObserverName, HealthState.Warning, $"this.NodeName | Unhandled Exception trying to read registry value:\n{e}"); throw; } token.ThrowIfCancellationRequested(); await ReportAsync(token).ConfigureAwait(true); LastRunDateTime = DateTime.Now; }
public override async Task ObserveAsync(CancellationToken token) { // If set, this observer will only run during the supplied interval. // See Settings.xml, CertificateObserverConfiguration section, RunInterval parameter for an example. if (RunInterval > TimeSpan.MinValue && DateTime.Now.Subtract(LastRunDateTime) < RunInterval) { return; } this.stopwatch.Start(); bool initialized = await InitializeAsync(); Token = token; if (!initialized) { HealthReporter.ReportFabricObserverServiceHealth( FabricServiceContext.ServiceName.OriginalString, ObserverName, HealthState.Warning, "This observer was unable to initialize correctly due to missing configuration info."); this.stopwatch.Stop(); this.stopwatch.Reset(); return; } await MonitorDeployedAppsAsync(token).ConfigureAwait(false); // The time it took to get to ReportAsync. // For use in computing actual HealthReport TTL. this.stopwatch.Stop(); RunDuration = this.stopwatch.Elapsed; this.stopwatch.Reset(); await ReportAsync(token).ConfigureAwait(true); LastRunDateTime = DateTime.Now; }
public override Task ReportAsync(CancellationToken token) { try { token.ThrowIfCancellationRequested(); // OS Health. if (this.osStatus != null && !string.Equals(this.osStatus, "OK", StringComparison.OrdinalIgnoreCase)) { string healthMessage = $"OS reporting unhealthy: {this.osStatus}"; var healthReport = new HealthReport { Observer = ObserverName, NodeName = NodeName, HealthMessage = healthMessage, State = HealthState.Error, HealthReportTimeToLive = SetHealthReportTimeToLive(), }; HealthReporter.ReportHealthToServiceFabric(healthReport); // This means this observer created a Warning or Error SF Health Report HasActiveFabricErrorOrWarning = true; // Send Health Report as Telemetry (perhaps it signals an Alert from App Insights, for example.). if (IsTelemetryProviderEnabled && IsObserverTelemetryEnabled) { _ = TelemetryClient?.ReportHealthAsync( HealthScope.Application, FabricRuntime.GetActivationContext().ApplicationName, HealthState.Error, $"{NodeName} - OS reporting unhealthy: {this.osStatus}", ObserverName, Token); } } else if (HasActiveFabricErrorOrWarning && string.Equals(this.osStatus, "OK", StringComparison.OrdinalIgnoreCase)) { // Clear Error or Warning with an OK Health Report. string healthMessage = $"OS reporting healthy: {this.osStatus}"; var healthReport = new HealthReport { Observer = ObserverName, NodeName = NodeName, HealthMessage = healthMessage, State = HealthState.Ok, HealthReportTimeToLive = default(TimeSpan), }; HealthReporter.ReportHealthToServiceFabric(healthReport); // Reset internal health state. HasActiveFabricErrorOrWarning = false; } if (ObserverManager.ObserverWebAppDeployed) { var logPath = Path.Combine(ObserverLogger.LogFolderBasePath, "SysInfo.txt"); // This file is used by the web application (log reader.). if (!ObserverLogger.TryWriteLogFile(logPath, $"Last updated on {DateTime.UtcNow.ToString("M/d/yyyy HH:mm:ss")} UTC<br/>{this.osReport}")) { HealthReporter.ReportFabricObserverServiceHealth( FabricServiceContext.ServiceName.OriginalString, ObserverName, HealthState.Warning, "Unable to create SysInfo.txt file."); } } var report = new HealthReport { Observer = ObserverName, HealthMessage = this.osReport, State = HealthState.Ok, NodeName = NodeName, HealthReportTimeToLive = SetHealthReportTimeToLive(), }; HealthReporter.ReportHealthToServiceFabric(report); // Windows Update automatic download enabled? if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows) && this.isWindowsUpdateAutoDownloadEnabled) { string linkText = $"{Environment.NewLine}For clusters of Silver durability or above, " + $"please consider <a href=\"https://docs.microsoft.com/azure/virtual-machine-scale-sets/virtual-machine-scale-sets-automatic-upgrade\" target=\"blank\">" + $"enabling VMSS automatic OS image upgrades</a> to prevent unexpected VM reboots. " + $"For Bronze durability clusters, please consider deploying the " + $"<a href=\"https://docs.microsoft.com/azure/service-fabric/service-fabric-patch-orchestration-application\" target=\"blank\">Patch Orchestration Service</a>."; string auServiceEnabledMessage = $"Windows Update Automatic Download is enabled.{linkText}"; report = new HealthReport { Observer = ObserverName, Property = "OSConfiguration", HealthMessage = auServiceEnabledMessage, State = HealthState.Warning, NodeName = NodeName, HealthReportTimeToLive = SetHealthReportTimeToLive(), }; HealthReporter.ReportHealthToServiceFabric(report); if (IsTelemetryProviderEnabled && IsObserverTelemetryEnabled && RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { // Send Health Report as Telemetry (perhaps it signals an Alert from App Insights, for example.). var telemetryData = new TelemetryData(FabricClientInstance, token) { HealthEventDescription = auServiceEnabledMessage, HealthState = "Warning", Metric = "WUAutoDownloadEnabled", Value = this.isWindowsUpdateAutoDownloadEnabled, NodeName = NodeName, ObserverName = ObserverName, Source = ObserverConstants.FabricObserverName, }; _ = TelemetryClient?.ReportMetricAsync( telemetryData, Token); } // ETW. if (IsEtwEnabled && RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { Logger.EtwLogger?.Write( ObserverConstants.FabricObserverETWEventName, new { HealthState = "Warning", HealthEventDescription = auServiceEnabledMessage, ObserverName, Metric = "WUAutoDownloadEnabled", Value = this.isWindowsUpdateAutoDownloadEnabled, NodeName, }); } } if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { // reset au globals for fresh detection during next observer run. this.isWindowsUpdateAutoDownloadEnabled = false; this.auStateUnknown = false; this.isWUADSettingEnabled = false; } return(Task.CompletedTask); } catch (Exception e) { HealthReporter.ReportFabricObserverServiceHealth( FabricServiceContext.ServiceName.OriginalString, ObserverName, HealthState.Error, $"Unhandled exception processing OS information:{Environment.NewLine}{e}"); throw; } }
private async Task GetComputerInfoAsync(CancellationToken token) { var sb = new StringBuilder(); int logicalProcessorCount = Environment.ProcessorCount; try { OSInfo osInfo = await OperatingSystemInfoProvider.Instance.GetOSInfoAsync(token); this.osStatus = osInfo.Status; // Active, bound ports. int activePorts = OperatingSystemInfoProvider.Instance.GetActivePortCount(); // Active, ephemeral ports. int activeEphemeralPorts = OperatingSystemInfoProvider.Instance.GetActiveEphemeralPortCount(); (int lowPortOS, int highPortOS) = OperatingSystemInfoProvider.Instance.TupleGetDynamicPortRange(); string osEphemeralPortRange = string.Empty; string fabricAppPortRange = string.Empty; string clusterManifestXml = IsTestRun ? File.ReadAllText( TestManifestPath) : await FabricClientInstance.ClusterManager.GetClusterManifestAsync( AsyncClusterOperationTimeoutSeconds, Token).ConfigureAwait(false); (int lowPortApp, int highPortApp) = NetworkUsage.TupleGetFabricApplicationPortRangeForNodeType( FabricServiceContext.NodeContext.NodeType, clusterManifestXml); int firewalls = NetworkUsage.GetActiveFirewallRulesCount(); // OS info. _ = sb.AppendLine("OS Information:\r\n"); _ = sb.AppendLine($"Name: {osInfo.Name}"); _ = sb.AppendLine($"Version: {osInfo.Version}"); if (string.IsNullOrEmpty(osInfo.InstallDate)) { _ = sb.AppendLine($"InstallDate: {osInfo.InstallDate}"); } _ = sb.AppendLine($"LastBootUpTime*: {osInfo.LastBootUpTime}"); if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { // WU AutoUpdate - Download enabled. // If the config setting EnableWindowsAutoUpdateCheck is set to false, then don't add this info to sb. if (this.isWUADSettingEnabled) { string auMessage = "WindowsUpdateAutoDownloadEnabled: "; if (this.auStateUnknown) { auMessage += "Unknown"; } else { auMessage += this.isWindowsUpdateAutoDownloadEnabled; } _ = sb.AppendLine(auMessage); } // Not supported for Linux. _ = sb.AppendLine($"OSLanguage: {osInfo.Language}"); _ = sb.AppendLine($"OSHealthStatus*: {osInfo.Status}"); } _ = sb.AppendLine($"NumberOfProcesses*: {osInfo.NumberOfProcesses}"); if (lowPortOS > -1) { osEphemeralPortRange = $"{lowPortOS} - {highPortOS}"; _ = sb.AppendLine($"OSEphemeralTCPPortRange: {osEphemeralPortRange} (Active*: {activeEphemeralPorts})"); } if (lowPortApp > -1) { fabricAppPortRange = $"{lowPortApp} - {highPortApp}"; _ = sb.AppendLine($"FabricApplicationTCPPortRange: {fabricAppPortRange}"); } if (firewalls > -1) { _ = sb.AppendLine($"ActiveFirewallRules*: {firewalls}"); } if (activePorts > -1) { _ = sb.AppendLine($"TotalActiveTCPPorts*: {activePorts}"); } // Hardware info. // Proc/Mem _ = sb.AppendLine($"{Environment.NewLine}Hardware Information:{Environment.NewLine}"); _ = sb.AppendLine($"LogicalProcessorCount: {logicalProcessorCount}"); if (osInfo.TotalVirtualMemorySizeKB > 0) { _ = sb.AppendLine($"TotalVirtualMemorySize: {osInfo.TotalVirtualMemorySizeKB / 1048576} GB"); } if (osInfo.TotalVisibleMemorySizeKB > 0) { _ = sb.AppendLine($"TotalVisibleMemorySize: {osInfo.TotalVisibleMemorySizeKB / 1048576} GB"); } _ = sb.AppendLine($"FreePhysicalMemory*: {Math.Round(osInfo.AvailableMemoryKB / 1048576.0, 2)} GB"); _ = sb.AppendLine($"FreeVirtualMemory*: {Math.Round(osInfo.FreeVirtualMemoryKB / 1048576.0, 2)} GB"); // Disk var drivesInformationTuple = DiskUsage.GetCurrentDiskSpaceTotalAndUsedPercentAllDrives(SizeUnit.Gigabytes); var logicalDriveCount = drivesInformationTuple.Count; string driveInfo = string.Empty; _ = sb.AppendLine($"LogicalDriveCount: {logicalDriveCount}"); foreach (var(driveName, diskSize, percentConsumed) in drivesInformationTuple) { string drvSize; if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { string systemDrv = "Data"; if (string.Equals(Environment.SystemDirectory.Substring(0, 1), driveName.Substring(0, 1), StringComparison.OrdinalIgnoreCase)) { systemDrv = "System"; } drvSize = $"Drive {driveName} ({systemDrv}) Size: {diskSize} GB, Consumed*: {percentConsumed}%"; } else { drvSize = $"Mount point: {driveName}, Size: {diskSize} GB, Consumed*: {percentConsumed}%"; } _ = sb.AppendLine(drvSize); driveInfo += $"{drvSize}{Environment.NewLine}"; } string osHotFixes = string.Empty; if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { osHotFixes = GetWindowsHotFixes(token); } if (!string.IsNullOrEmpty(osHotFixes)) { _ = sb.AppendLine($"\nWindows Patches/Hot Fixes*:\n\n{osHotFixes}"); } // Dynamic info qualifier (*) _ = sb.AppendLine($"\n* Dynamic data."); this.osReport = sb.ToString(); string hotFixes = string.Empty; // ETW. if (IsEtwEnabled) { if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { hotFixes = GetWindowsHotFixes(token, generateUrl: false).Replace("\r\n", ", ").TrimEnd(','); } Logger.EtwLogger?.Write( ObserverConstants.FabricObserverETWEventName, new { HealthState = "Ok", Node = NodeName, Observer = ObserverName, OS = osInfo.Name, OSVersion = osInfo.Version, OSInstallDate = osInfo.InstallDate, AutoUpdateEnabled = this.auStateUnknown ? "Unknown" : this.isWindowsUpdateAutoDownloadEnabled.ToString(), osInfo.LastBootUpTime, WindowsAutoUpdateEnabled = this.isWindowsUpdateAutoDownloadEnabled, TotalMemorySizeGB = (int)(osInfo.TotalVisibleMemorySizeKB / 1048576), AvailablePhysicalMemoryGB = Math.Round(osInfo.FreePhysicalMemoryKB / 1048576.0, 2), AvailableVirtualMemoryGB = Math.Round(osInfo.FreeVirtualMemoryKB / 1048576.0, 2), LogicalProcessorCount = logicalProcessorCount, LogicalDriveCount = logicalDriveCount, DriveInfo = driveInfo, NumberOfRunningProcesses = osInfo.NumberOfProcesses, ActiveFirewallRules = firewalls, ActivePorts = activePorts, ActiveEphemeralPorts = activeEphemeralPorts, WindowsDynamicPortRange = osEphemeralPortRange, FabricAppPortRange = fabricAppPortRange, HotFixes = hotFixes, }); } // Telemetry if (IsTelemetryProviderEnabled && IsObserverTelemetryEnabled) { if (string.IsNullOrEmpty(hotFixes) && RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { hotFixes = GetWindowsHotFixes(token, generateUrl: false).Replace("\r\n", ", ").TrimEnd(','); } TelemetryClient?.ReportMetricAsync( new MachineTelemetryData { HealthState = "Ok", Node = NodeName, Observer = ObserverName, OS = osInfo.Name, OSVersion = osInfo.Version, OSInstallDate = osInfo.InstallDate, LastBootUpTime = osInfo.LastBootUpTime, WindowsUpdateAutoDownloadEnabled = this.isWindowsUpdateAutoDownloadEnabled, TotalMemorySizeGB = (int)osInfo.TotalVisibleMemorySizeKB / 1048576, AvailablePhysicalMemoryGB = Math.Round(osInfo.FreePhysicalMemoryKB / 1048576.0, 2), AvailableVirtualMemoryGB = Math.Round(osInfo.FreeVirtualMemoryKB / 1048576.0, 2), LogicalProcessorCount = logicalProcessorCount, LogicalDriveCount = logicalDriveCount, DriveInfo = driveInfo, NumberOfRunningProcesses = osInfo.NumberOfProcesses, ActiveFirewallRules = firewalls, ActivePorts = activePorts, ActiveEphemeralPorts = activeEphemeralPorts, WindowsDynamicPortRange = osEphemeralPortRange, FabricAppPortRange = fabricAppPortRange, HotFixes = hotFixes, }, Token); } } catch (Exception e) when(e is FabricException || e is OperationCanceledException || e is TaskCanceledException || e is InvalidComObjectException) { HealthReporter.ReportFabricObserverServiceHealth( FabricServiceContext.ServiceName.OriginalString, ObserverName, HealthState.Warning, $"Handled Exception processing OS information:{Environment.NewLine}{e}"); } catch (Exception e) { HealthReporter.ReportFabricObserverServiceHealth( FabricServiceContext.ServiceName.OriginalString, ObserverName, HealthState.Error, $"Unhandled Exception processing OS information:{Environment.NewLine}{e}"); throw; } }
private void InternetConnectionStateIsConnected() { var configList = this.defaultConfig; if (this.userConfig.Count > 0) { configList = this.userConfig; } foreach (var config in configList) { this.cancellationToken.ThrowIfCancellationRequested(); foreach (var endpoint in config.Endpoints) { if (string.IsNullOrEmpty(endpoint.HostName)) { continue; } // Don't re-test endpoint if it has already been tested for a different targetApp. if (this.connEndpointTestResults.ContainsKey(endpoint.HostName)) { SetHealthState(endpoint, config.TargetApp, this.connEndpointTestResults[endpoint.HostName]); continue; } bool passed = false; this.cancellationToken.ThrowIfCancellationRequested(); // SQL Azure, other database services that are addressable over direct TCP. if (endpoint.Protocol == DirectInternetProtocol.Tcp) { passed = TcpEndpointDoConnectionTest(endpoint.HostName, endpoint.Port); } // Default is http. else { // Service REST endpoints, CosmosDB REST endpoint, etc. // Http protocol means any enpoint/port pair that is addressable over HTTP/s. // E.g., REST enpoints, etc. try { this.cancellationToken.ThrowIfCancellationRequested(); ServicePointManager.SecurityProtocol = SecurityProtocolType.SystemDefault; string prefix = endpoint.Port == 443 ? "https://" : "http://"; if (endpoint.HostName.Contains("://")) { prefix = string.Empty; } var request = (HttpWebRequest)WebRequest.Create( new Uri($"{prefix}{endpoint.HostName}:{endpoint.Port}")); request.AuthenticationLevel = AuthenticationLevel.MutualAuthRequired; request.ImpersonationLevel = TokenImpersonationLevel.Impersonation; request.Timeout = 60000; request.Method = "GET"; using var response = (HttpWebResponse)request.GetResponse(); var status = response.StatusCode; // The target server responded with something. // It doesn't really matter what it "said". if (status == HttpStatusCode.OK || response?.Headers?.Count > 0) { passed = true; } } catch (IOException ie) { if (ie.InnerException != null && ie.InnerException is ProtocolViolationException) { passed = true; } } catch (WebException we) { if (we.Status == WebExceptionStatus.ProtocolError || we.Status == WebExceptionStatus.TrustFailure || we.Status == WebExceptionStatus.SecureChannelFailure || we.Response?.Headers?.Count > 0) { // Could not establish trust or server doesn't want to hear from you, or... // Either way, the Server *responded*. It's reachable. // You could always add code to grab your app or cluster certs from local store // and apply it to the request. See CertificateObserver for how to get // both your App cert(s) and Cluster cert. The goal of NetworkObserver is // to test availability. Nothing more. passed = true; } else if (we.Status == WebExceptionStatus.SendFailure && we.InnerException != null && (we.InnerException.Message.ToLower().Contains("authentication") || we.InnerException.HResult == -2146232800)) { passed = true; } } catch (Exception e) { HealthReporter.ReportFabricObserverServiceHealth( FabricServiceContext.ServiceName.OriginalString, ObserverName, HealthState.Warning, e.ToString()); throw; } } SetHealthState(endpoint, config.TargetApp, passed); if (!this.connEndpointTestResults.ContainsKey(endpoint.HostName)) { this.connEndpointTestResults.Add(endpoint.HostName, passed); } } } }
private async Task <bool> InitializeAsync() { WriteToLogWithLevel( ObserverName, $"Initializing {ObserverName} for network monitoring. | {NodeName}", LogLevel.Information); this.cancellationToken.ThrowIfCancellationRequested(); // This only needs to be logged once. // This file is used by the ObserverWebApi application. if (ObserverManager.ObserverWebAppDeployed && !this.hasRun) { var logPath = Path.Combine(ObserverLogger.LogFolderBasePath, "NetInfo.txt"); Console.WriteLine($"logPath: {logPath}"); if (!ObserverLogger.TryWriteLogFile(logPath, GetNetworkInterfaceInfo(this.cancellationToken))) { HealthReporter.ReportFabricObserverServiceHealth( FabricServiceContext.ServiceName.OriginalString, ObserverName, HealthState.Warning, "Unable to create NetInfo.txt file."); } } // Is this a unit test run? if (IsTestRun) { return(true); } var settings = FabricServiceContext.CodePackageActivationContext.GetConfigurationPackageObject( ObserverConstants.ObserverConfigurationPackageName)?.Settings; this.configSettings.Initialize( settings, ConfigurationSectionName, "NetworkObserverDataFileName"); var networkObserverConfigFileName = Path.Combine(this.dataPackagePath, this.configSettings.NetworkObserverConfigFileName); if (string.IsNullOrWhiteSpace(networkObserverConfigFileName)) { ObserverLogger.LogError( "Endpoint list file is not specified. " + "Please Add file containing endpoints that need to be monitored."); return(false); } if (!File.Exists(networkObserverConfigFileName)) { ObserverLogger.LogError( "Endpoint list file is not specified. " + "Please Add file containing endpoints that need to be monitored."); return(false); } if (this.userConfig.Count == 0) { using (Stream stream = new FileStream( networkObserverConfigFileName, FileMode.Open, FileAccess.Read, FileShare.Read)) { var configs = JsonHelper.ReadFromJsonStream <NetworkObserverConfig[]>(stream); foreach (var netConfig in configs) { var deployedApps = await FabricClientInstance.QueryManager.GetDeployedApplicationListAsync( NodeName, new Uri(netConfig.TargetApp)).ConfigureAwait(false); if (deployedApps == null || deployedApps.Count < 1) { continue; } this.userConfig.Add(netConfig); } } if (this.userConfig.Count == 0) { HealthReporter.ReportFabricObserverServiceHealth( FabricServiceContext.ServiceName.ToString(), ObserverName, HealthState.Warning, "Missing required configuration data: endpoints."); return(false); } } return(true); }
// This runs each time ObserveAsync is run to ensure that any new app targets and config changes will // be up to date across observer loop iterations. private async Task <bool> InitializeAsync() { if (ReplicaOrInstanceList == null) { ReplicaOrInstanceList = new List <ReplicaOrInstanceMonitoringInfo>(); } if (!IsTestRun) { configSettings.Initialize( FabricServiceContext.CodePackageActivationContext.GetConfigurationPackageObject( ObserverConstants.ObserverConfigurationPackageName)?.Settings, ConfigurationSectionName, "AppObserverDataFileName"); } // For unit tests, this path will be an empty string and not generate an exception. var appObserverConfigFileName = Path.Combine( ConfigPackagePath ?? string.Empty, configSettings.AppObserverConfigFileName ?? string.Empty); if (!File.Exists(appObserverConfigFileName)) { WriteToLogWithLevel( ObserverName, $"Will not observe resource consumption as no configuration parameters have been supplied. | {NodeName}", LogLevel.Information); return(false); } // This code runs each time ObserveAsync is called, // so clear app list and deployed replica/instance list in case a new app has been added to watch list. if (this.userTargetList.Count > 0) { this.userTargetList.Clear(); ReplicaOrInstanceList.Clear(); } if (this.deployedTargetList.Count > 0) { this.deployedTargetList.Clear(); } using Stream stream = new FileStream( appObserverConfigFileName, FileMode.Open, FileAccess.Read, FileShare.Read); if (stream.Length > 0 && JsonHelper.IsJson <List <ApplicationInfo> >(File.ReadAllText(appObserverConfigFileName))) { this.userTargetList.AddRange(JsonHelper.ReadFromJsonStream <ApplicationInfo[]>(stream)); } // Are any of the config-supplied apps deployed?. if (this.userTargetList.Count == 0) { WriteToLogWithLevel( ObserverName, $"Will not observe resource consumption as no configuration parameters have been supplied. | {NodeName}", LogLevel.Information); return(false); } int settingSFail = 0; foreach (var application in this.userTargetList) { if (string.IsNullOrWhiteSpace(application.TargetApp) && string.IsNullOrWhiteSpace(application.TargetAppType)) { HealthReporter.ReportFabricObserverServiceHealth( FabricServiceContext.ServiceName.ToString(), ObserverName, HealthState.Warning, $"Initialize() | {application.TargetApp}: Required setting, target, is not set."); settingSFail++; continue; } // No required settings supplied for deployed application(s). if (settingSFail == this.userTargetList.Count) { return(false); } if (!string.IsNullOrEmpty(application.TargetAppType)) { await SetDeployedApplicationReplicaOrInstanceListAsync( null, application.TargetAppType).ConfigureAwait(false); } else { await SetDeployedApplicationReplicaOrInstanceListAsync(new Uri(application.TargetApp)) .ConfigureAwait(false); } } foreach (var repOrInst in ReplicaOrInstanceList) { ObserverLogger.LogInfo( $"Will observe resource consumption by {repOrInst.ApplicationName?.OriginalString} " + $"on Node {NodeName}."); } return(true); }
// Initialize() runs each time ObserveAsync is run to ensure // that any new app targets and config changes will // be up to date across observer loop iterations. private bool Initialize() { if (replicaOrInstanceList == null) { replicaOrInstanceList = new List <ReplicaOrInstanceMonitoringInfo>(); } // Is this a unit test run? if (IsTestRun) { replicaOrInstanceList.Add(new ReplicaOrInstanceMonitoringInfo { ApplicationName = new Uri("fabric:/TestApp"), PartitionId = Guid.NewGuid(), HostProcessId = 0, ReplicaOrInstanceId = default(long), }); return(true); } ConfigSettings.Initialize(FabricServiceContext.CodePackageActivationContext.GetConfigurationPackageObject(ObserverConstants.ObserverConfigurationPackageName)?.Settings, ObserverConstants.AppObserverConfigurationSectionName, "AppObserverDataFileName"); var appObserverConfigFileName = Path.Combine(configPackagePath, ConfigSettings.AppObserverDataFileName); if (!File.Exists(appObserverConfigFileName)) { WriteToLogWithLevel( ObserverName, $"Will not observe resource consumption as no configuration parameters have been supplied. | {NodeName}", LogLevel.Information); return(false); } // this code runs each time ObserveAsync is called, // so clear app list and deployed replica/instance list in case a new app has been added to watch list. if (targetList.Count > 0) { targetList.Clear(); replicaOrInstanceList.Clear(); } using (Stream stream = new FileStream(appObserverConfigFileName, FileMode.Open, FileAccess.Read, FileShare.Read)) { if (stream.Length > 42 && JsonHelper.IsJson <List <ApplicationInfo> >(File.ReadAllText(appObserverConfigFileName))) { targetList.AddRange(JsonHelper.ReadFromJsonStream <ApplicationInfo[]>(stream)); } } // Are any of the config-supplied apps deployed?. if (targetList.Count == 0) { WriteToLogWithLevel( ObserverName, $"Will not observe resource consumption as no configuration parameters have been supplied. | {NodeName}", LogLevel.Information); return(false); } int settingsFail = 0; foreach (var application in targetList) { if (string.IsNullOrWhiteSpace(application.Target) && string.IsNullOrWhiteSpace(application.TargetType)) { HealthReporter.ReportFabricObserverServiceHealth( FabricServiceContext.ServiceName.ToString(), ObserverName, HealthState.Warning, $"Initialize() | {application.Target}: Required setting, target, is not set."); settingsFail++; continue; } // No required settings supplied for deployed application(s). if (settingsFail == targetList.Count) { return(false); } ObserverLogger.LogInfo( $"Will observe resource consumption by {application.Target ?? application.TargetType} " + $"on Node {NodeName}."); } return(true); }
// Windows process dmp creator. public bool DumpServiceProcess(int processId, DumpType dumpType = DumpType.Full) { if (string.IsNullOrEmpty(this.dumpsPath)) { return(false); } string processName = string.Empty; NativeMethods.MINIDUMP_TYPE miniDumpType; switch (dumpType) { case DumpType.Full: miniDumpType = NativeMethods.MINIDUMP_TYPE.MiniDumpWithFullMemory | NativeMethods.MINIDUMP_TYPE.MiniDumpWithFullMemoryInfo | NativeMethods.MINIDUMP_TYPE.MiniDumpWithHandleData | NativeMethods.MINIDUMP_TYPE.MiniDumpWithThreadInfo | NativeMethods.MINIDUMP_TYPE.MiniDumpWithUnloadedModules; break; case DumpType.MiniPlus: miniDumpType = NativeMethods.MINIDUMP_TYPE.MiniDumpWithPrivateReadWriteMemory | NativeMethods.MINIDUMP_TYPE.MiniDumpWithDataSegs | NativeMethods.MINIDUMP_TYPE.MiniDumpWithHandleData | NativeMethods.MINIDUMP_TYPE.MiniDumpWithFullMemoryInfo | NativeMethods.MINIDUMP_TYPE.MiniDumpWithThreadInfo | NativeMethods.MINIDUMP_TYPE.MiniDumpWithUnloadedModules; break; case DumpType.Mini: miniDumpType = NativeMethods.MINIDUMP_TYPE.MiniDumpWithIndirectlyReferencedMemory | NativeMethods.MINIDUMP_TYPE.MiniDumpScanMemory; break; default: throw new ArgumentOutOfRangeException(nameof(dumpType), dumpType, null); } try { // This is to ensure friendly-name of resulting dmp file. processName = Process.GetProcessById(processId).ProcessName; if (string.IsNullOrEmpty(processName)) { return(false); } IntPtr processHandle = Process.GetProcessById(processId).Handle; processName += "_" + DateTime.Now.ToString("ddMMyyyyHHmmss") + ".dmp"; // Check disk space availability before writing dump file. // This will not work on Linux string driveName = this.dumpsPath.Substring(0, 2); if (DiskUsage.GetCurrentDiskSpaceUsedPercent(driveName) > 90) { HealthReporter.ReportFabricObserverServiceHealth( FabricServiceContext.ServiceName.OriginalString, ObserverName, HealthState.Warning, "Not enough disk space available for dump file creation."); return(false); } using (var file = File.Create(Path.Combine(this.dumpsPath, processName))) { if (!NativeMethods.MiniDumpWriteDump( processHandle, (uint)processId, file.SafeFileHandle, miniDumpType, IntPtr.Zero, IntPtr.Zero, IntPtr.Zero)) { throw new Win32Exception(Marshal.GetLastWin32Error()); } } return(true); } catch (Exception e) when(e is ArgumentException || e is InvalidOperationException || e is Win32Exception) { ObserverLogger.LogInfo( $"Unable to generate dump file {processName} with error{Environment.NewLine}{e}"); } return(false); }
public override Task ReportAsync(CancellationToken token) { try { var timeToLiveWarning = SetHealthReportTimeToLive(); // User-supplied Disk Space Usage % thresholds from Settings.xml. foreach (var data in this.DiskSpaceUsagePercentageData) { token.ThrowIfCancellationRequested(); ProcessResourceDataReportHealth( data, DiskSpacePercentErrorThreshold, DiskSpacePercentWarningThreshold, timeToLiveWarning); } // User-supplied Average disk queue length thresholds from Settings.xml. foreach (var data in this.DiskAverageQueueLengthData) { token.ThrowIfCancellationRequested(); ProcessResourceDataReportHealth( data, AverageQueueLengthErrorThreshold, AverageQueueLengthWarningThreshold, timeToLiveWarning); } /* For ETW Only - These calls will just produce ETW (note the thresholds). */ if (IsEtwEnabled) { // Disk Space Available foreach (var data in this.DiskSpaceAvailableMbData) { token.ThrowIfCancellationRequested(); ProcessResourceDataReportHealth( data, 0, 0, timeToLiveWarning); } // Disk Space Total foreach (var data in this.DiskSpaceTotalMbData) { token.ThrowIfCancellationRequested(); ProcessResourceDataReportHealth( data, 0, 0, timeToLiveWarning); } } token.ThrowIfCancellationRequested(); // This section only needs to run if you have the FabricObserverWebApi app installed. if (!ObserverManager.ObserverWebAppDeployed) { return(Task.CompletedTask); } var diskInfoPath = Path.Combine(ObserverLogger.LogFolderBasePath, "disks.txt"); _ = ObserverLogger.TryWriteLogFile(diskInfoPath, this.diskInfo.ToString()); _ = this.diskInfo.Clear(); return(Task.CompletedTask); } catch (AggregateException e) when(e.InnerException is OperationCanceledException || e.InnerException is TaskCanceledException || e.InnerException is TimeoutException) { return(Task.CompletedTask); } catch (Exception e) { HealthReporter.ReportFabricObserverServiceHealth( FabricServiceContext.ServiceName.OriginalString, ObserverName, HealthState.Warning, $"Unhandled exception in GetSystemCpuMemoryValuesAsync:{Environment.NewLine}{e}"); throw; } }
private async Task GetSystemCpuMemoryValuesAsync(CancellationToken token) { token.ThrowIfCancellationRequested(); CpuUtilizationProvider cpuUtilizationProvider = null; try { // Ports. int activePortCountTotal = OperatingSystemInfoProvider.Instance.GetActivePortCount(); int ephemeralPortCountTotal = OperatingSystemInfoProvider.Instance.GetActiveEphemeralPortCount(); this.activePortsData.Data.Add(activePortCountTotal); this.ephemeralPortsData.Data.Add(ephemeralPortCountTotal); // Firewall rules. int firewalls = NetworkUsage.GetActiveFirewallRulesCount(); this.firewallData.Data.Add(firewalls); // CPU and Memory. // Note: Please make sure you understand the normal state of your nodes // with respect to the machine resource use and/or abuse by your service(s). // For example, if it is normal for your services to consume 90% of available CPU and memory // as part of the work they perform under normal traffic flow, then it doesn't make sense to warn or // error on these conditions. // TODO: Look into making this a long running background task with signaling. TimeSpan duration = TimeSpan.FromSeconds(10); if (MonitorDuration > TimeSpan.MinValue) { duration = MonitorDuration; } cpuUtilizationProvider = CpuUtilizationProvider.Create(); // Warm up the counters. _ = await cpuUtilizationProvider.NextValueAsync(); while (this.stopwatch.Elapsed <= duration) { token.ThrowIfCancellationRequested(); if (CpuWarningUsageThresholdPct > 0 && CpuWarningUsageThresholdPct <= 100) { AllCpuTimeData.Data.Add(await cpuUtilizationProvider.NextValueAsync()); } if (MemWarningUsageThresholdMb > 0) { float committedMegaBytes = MemoryUsageProvider.Instance.GetCommittedBytes() / 1048576.0f; this.allMemDataCommittedBytes.Data.Add(committedMegaBytes); } if (MemoryWarningLimitPercent > 0) { this.allMemDataPercentUsed.Data.Add( OperatingSystemInfoProvider.Instance.TupleGetTotalPhysicalMemorySizeAndPercentInUse().PercentInUse); } await Task.Delay(250).ConfigureAwait(false); } } catch (AggregateException e) when(e.InnerException is OperationCanceledException || e.InnerException is TaskCanceledException || e.InnerException is TimeoutException) { return; } catch (Exception e) { HealthReporter.ReportFabricObserverServiceHealth( FabricServiceContext.ServiceName.OriginalString, ObserverName, HealthState.Warning, $"Unhandled exception in GetSystemCpuMemoryValuesAsync:{Environment.NewLine}{e}"); throw; } finally { cpuUtilizationProvider?.Dispose(); } }
public override Task ReportAsync(CancellationToken token) { try { token.ThrowIfCancellationRequested(); if (CsvFileLogger != null && CsvFileLogger.EnableCsvLogging) { var fileName = "CpuMemFirewallsPorts" + NodeName; // Log (csv) system-wide CPU/Mem data. CsvFileLogger.LogData( fileName, NodeName, "CPU Time", "Average", Math.Round(AllCpuTimeData.AverageDataValue)); CsvFileLogger.LogData( fileName, NodeName, "CPU Time", "Peak", Math.Round(AllCpuTimeData.MaxDataValue)); CsvFileLogger.LogData( fileName, NodeName, "Committed Memory (MB)", "Average", Math.Round(this.allMemDataCommittedBytes.AverageDataValue)); CsvFileLogger.LogData( fileName, NodeName, "Committed Memory (MB)", "Peak", Math.Round(this.allMemDataCommittedBytes.MaxDataValue)); CsvFileLogger.LogData( fileName, NodeName, "All Active Ports", "Total", this.activePortsData.Data[0]); CsvFileLogger.LogData( fileName, NodeName, "Ephemeral Active Ports", "Total", this.ephemeralPortsData.Data[0]); CsvFileLogger.LogData( fileName, NodeName, "Firewall Rules", "Total", this.firewallData.Data[0]); DataTableFileLogger.Flush(); } // Report on the global health state (system-wide (node) metrics). // User-configurable in NodeObserver.config.json var timeToLiveWarning = SetHealthReportTimeToLive(); // CPU if (AllCpuTimeData.AverageDataValue > 0) { ProcessResourceDataReportHealth( AllCpuTimeData, CpuErrorUsageThresholdPct, CpuWarningUsageThresholdPct, timeToLiveWarning); } // Memory if (this.allMemDataCommittedBytes.AverageDataValue > 0) { ProcessResourceDataReportHealth( this.allMemDataCommittedBytes, MemErrorUsageThresholdMb, MemWarningUsageThresholdMb, timeToLiveWarning); } if (this.allMemDataPercentUsed.AverageDataValue > 0) { ProcessResourceDataReportHealth( this.allMemDataPercentUsed, MemoryErrorLimitPercent, MemoryWarningLimitPercent, timeToLiveWarning); } // Firewall rules ProcessResourceDataReportHealth( this.firewallData, FirewallRulesErrorThreshold, FirewallRulesWarningThreshold, timeToLiveWarning); // Ports - Active TCP ProcessResourceDataReportHealth( this.activePortsData, ActivePortsErrorThreshold, ActivePortsWarningThreshold, timeToLiveWarning); // Ports - Active Ephemeral TCP ProcessResourceDataReportHealth( this.ephemeralPortsData, EphemeralPortsErrorThreshold, EphemeralPortsWarningThreshold, timeToLiveWarning); return(Task.CompletedTask); } catch (AggregateException e) when(e.InnerException is OperationCanceledException || e.InnerException is TaskCanceledException || e.InnerException is TimeoutException) { return(Task.CompletedTask); } catch (Exception e) { HealthReporter.ReportFabricObserverServiceHealth( FabricServiceContext.ServiceName.OriginalString, ObserverName, HealthState.Warning, $"Unhandled exception re-thrown:{Environment.NewLine}{e}"); throw; } }
public override async Task ReportAsync(CancellationToken token) { token.ThrowIfCancellationRequested(); var sb = new StringBuilder(); _ = sb.AppendLine("\nService Fabric information:\n"); if (!string.IsNullOrEmpty(this.SFVersion)) { _ = sb.AppendLine("Runtime Version: " + this.SFVersion); } if (this.SFBinRoot != null) { _ = sb.AppendLine("Fabric Bin root directory: " + this.SFBinRoot); } if (this.SFCodePath != null) { _ = sb.AppendLine("Fabric Code Path: " + this.SFCodePath); } if (!string.IsNullOrEmpty(this.SFDataRoot)) { _ = sb.AppendLine("Data root directory: " + this.SFDataRoot); } if (!string.IsNullOrEmpty(this.SFLogRoot)) { _ = sb.AppendLine("Log root directory: " + this.SFLogRoot); } if (this.SFVolumeDiskServiceEnabled != null) { _ = sb.AppendLine("Volume Disk Service Enabled: " + this.SFVolumeDiskServiceEnabled); } if (this.unsupportedPreviewFeaturesEnabled != null) { _ = sb.AppendLine("Unsupported Preview Features Enabled: " + this.unsupportedPreviewFeaturesEnabled); } if (this.SFCompatibilityJsonPath != null) { _ = sb.AppendLine("Compatibility Json path: " + this.SFCompatibilityJsonPath); } if (this.SFEnableCircularTraceSession != null) { _ = sb.AppendLine("Enable Circular trace session: " + this.SFEnableCircularTraceSession); } _ = sb.Append(await GetDeployedAppsInfoAsync(token).ConfigureAwait(true)); _ = sb.AppendLine(); token.ThrowIfCancellationRequested(); var logPath = Path.Combine(ObserverLogger.LogFolderBasePath, "SFInfraInfo.txt"); // This file is used by the web application (ObserverWebApi). if (!ObserverLogger.TryWriteLogFile(logPath, sb.ToString())) { HealthReporter.ReportFabricObserverServiceHealth( FabricServiceContext.ServiceName.OriginalString, ObserverName, HealthState.Warning, "Unable to create SFInfraInfo.txt file."); } _ = sb.Clear(); }