Beispiel #1
0
        public override Task ReportAsync(CancellationToken token)
        {
            Token.ThrowIfCancellationRequested();

            // Informational report. For now, Linux is where we pay close attention to memory use by Fabric system services as there are still a few issues in that realm..
            var timeToLiveWarning     = SetHealthReportTimeToLive();
            var portInformationReport = new HealthReport
            {
                Observer      = ObserverName,
                NodeName      = NodeName,
                HealthMessage = $"Number of ports in use by Fabric services: {TotalActivePortCountAllSystemServices}{Environment.NewLine}" +
                                $"Number of ephemeral ports in use by Fabric services: {TotalActiveEphemeralPortCountAllSystemServices}{Environment.NewLine}" +
                                $"Fabric memory use MB: {this.allMemData.Where(x => x.Id == "Fabric")?.FirstOrDefault()?.AverageDataValue}{Environment.NewLine}" +
                                (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) ?
                                 $"FabricGateway memory use MB: {this.allMemData.Where(x => x.Id == "FabricGateway.exe")?.FirstOrDefault()?.AverageDataValue}{Environment.NewLine}" +
                                 $"FabricHost memory use MB: {this.allMemData.Where(x => x.Id == "FabricHost")?.FirstOrDefault()?.AverageDataValue}{Environment.NewLine}" : string.Empty),

                State = HealthState.Ok,
                HealthReportTimeToLive = timeToLiveWarning,
            };

            HealthReporter.ReportHealthToServiceFabric(portInformationReport);

            // Reset ports counters.
            TotalActivePortCountAllSystemServices          = 0;
            TotalActiveEphemeralPortCountAllSystemServices = 0;

            // CPU
            ProcessResourceDataList(
                this.allCpuData,
                CpuErrorUsageThresholdPct,
                CpuWarnUsageThresholdPct);

            // Memory
            ProcessResourceDataList(
                this.allMemData,
                MemErrorUsageThresholdMb,
                MemWarnUsageThresholdMb);

            // Ports - Active TCP
            ProcessResourceDataList(
                this.allActiveTcpPortData,
                ActiveTcpPortCountError,
                ActiveTcpPortCountWarning);

            // Ports - Ephemeral
            ProcessResourceDataList(
                this.allEphemeralTcpPortData,
                ActiveEphemeralPortCountError,
                ActiveEphemeralPortCountWarning);

            // Windows Event Log
            if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows) && ObserverManager.ObserverWebAppDeployed &&
                this.monitorWinEventLog)
            {
                // SF Eventlog Errors?
                // Write this out to a new file, for use by the web front end log viewer.
                // Format = HTML.
                int count   = this.evtRecordList.Count();
                var logPath = Path.Combine(ObserverLogger.LogFolderBasePath, "EventVwrErrors.txt");

                // Remove existing file.
                if (File.Exists(logPath))
                {
                    try
                    {
                        File.Delete(logPath);
                    }
                    catch (IOException)
                    {
                    }
                    catch (UnauthorizedAccessException)
                    {
                    }
                }

                if (count >= 10)
                {
                    var sb = new StringBuilder();

                    _ = sb.AppendLine("<br/><div><strong>" +
                                      "<a href='javascript:toggle(\"evtContainer\")'>" +
                                      "<div id=\"plus\" style=\"display: inline; font-size: 25px;\">+</div> " + count +
                                      " Error Events in ServiceFabric and System</a> " +
                                      "Event logs</strong>.<br/></div>");

                    _ = sb.AppendLine("<div id='evtContainer' style=\"display: none;\">");

                    foreach (var evt in this.evtRecordList.Distinct())
                    {
                        token.ThrowIfCancellationRequested();

                        try
                        {
                            // Access event properties:
                            _ = sb.AppendLine("<div>" + evt.LogName + "</div>");
                            _ = sb.AppendLine("<div>" + evt.LevelDisplayName + "</div>");
                            if (evt.TimeCreated.HasValue)
                            {
                                _ = sb.AppendLine("<div>" + evt.TimeCreated.Value.ToShortDateString() + "</div>");
                            }

                            foreach (var prop in evt.Properties)
                            {
                                if (prop.Value != null && Convert.ToString(prop.Value).Length > 0)
                                {
                                    _ = sb.AppendLine("<div>" + prop.Value + "</div>");
                                }
                            }
                        }
                        catch (EventLogException)
                        {
                        }
                    }

                    _ = sb.AppendLine("</div>");

                    _ = ObserverLogger.TryWriteLogFile(logPath, sb.ToString());
                    _ = sb.Clear();
                }

                // Clean up.
                if (count > 0)
                {
                    this.evtRecordList.Clear();
                }
            }

            ClearDataContainers();

            return(Task.CompletedTask);
        }
        public override Task ReportAsync(CancellationToken token)
        {
            try
            {
                token.ThrowIfCancellationRequested();

                // OS Health.
                if (this.osStatus != null && !string.Equals(this.osStatus, "OK", StringComparison.OrdinalIgnoreCase))
                {
                    string healthMessage = $"OS reporting unhealthy: {this.osStatus}";
                    var    healthReport  = new HealthReport
                    {
                        Observer               = ObserverName,
                        NodeName               = NodeName,
                        HealthMessage          = healthMessage,
                        State                  = HealthState.Error,
                        HealthReportTimeToLive = SetHealthReportTimeToLive(),
                    };

                    HealthReporter.ReportHealthToServiceFabric(healthReport);

                    // This means this observer created a Warning or Error SF Health Report
                    HasActiveFabricErrorOrWarning = true;

                    // Send Health Report as Telemetry (perhaps it signals an Alert from App Insights, for example.).
                    if (IsTelemetryProviderEnabled && IsObserverTelemetryEnabled)
                    {
                        _ = TelemetryClient?.ReportHealthAsync(
                            HealthScope.Application,
                            FabricRuntime.GetActivationContext().ApplicationName,
                            HealthState.Error,
                            $"{NodeName} - OS reporting unhealthy: {this.osStatus}",
                            ObserverName,
                            Token);
                    }
                }
                else if (HasActiveFabricErrorOrWarning && string.Equals(this.osStatus, "OK", StringComparison.OrdinalIgnoreCase))
                {
                    // Clear Error or Warning with an OK Health Report.
                    string healthMessage = $"OS reporting healthy: {this.osStatus}";

                    var healthReport = new HealthReport
                    {
                        Observer               = ObserverName,
                        NodeName               = NodeName,
                        HealthMessage          = healthMessage,
                        State                  = HealthState.Ok,
                        HealthReportTimeToLive = default(TimeSpan),
                    };

                    HealthReporter.ReportHealthToServiceFabric(healthReport);

                    // Reset internal health state.
                    HasActiveFabricErrorOrWarning = false;
                }

                if (ObserverManager.ObserverWebAppDeployed)
                {
                    var logPath = Path.Combine(ObserverLogger.LogFolderBasePath, "SysInfo.txt");

                    // This file is used by the web application (log reader.).
                    if (!ObserverLogger.TryWriteLogFile(logPath, $"Last updated on {DateTime.UtcNow.ToString("M/d/yyyy HH:mm:ss")} UTC<br/>{this.osReport}"))
                    {
                        HealthReporter.ReportFabricObserverServiceHealth(
                            FabricServiceContext.ServiceName.OriginalString,
                            ObserverName,
                            HealthState.Warning,
                            "Unable to create SysInfo.txt file.");
                    }
                }

                var report = new HealthReport
                {
                    Observer               = ObserverName,
                    HealthMessage          = this.osReport,
                    State                  = HealthState.Ok,
                    NodeName               = NodeName,
                    HealthReportTimeToLive = SetHealthReportTimeToLive(),
                };

                HealthReporter.ReportHealthToServiceFabric(report);

                // Windows Update automatic download enabled?
                if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows) &&
                    this.isWindowsUpdateAutoDownloadEnabled)
                {
                    string linkText =
                        $"{Environment.NewLine}For clusters of Silver durability or above, " +
                        $"please consider <a href=\"https://docs.microsoft.com/azure/virtual-machine-scale-sets/virtual-machine-scale-sets-automatic-upgrade\" target=\"blank\">" +
                        $"enabling VMSS automatic OS image upgrades</a> to prevent unexpected VM reboots. " +
                        $"For Bronze durability clusters, please consider deploying the " +
                        $"<a href=\"https://docs.microsoft.com/azure/service-fabric/service-fabric-patch-orchestration-application\" target=\"blank\">Patch Orchestration Service</a>.";

                    string auServiceEnabledMessage = $"Windows Update Automatic Download is enabled.{linkText}";

                    report = new HealthReport
                    {
                        Observer               = ObserverName,
                        Property               = "OSConfiguration",
                        HealthMessage          = auServiceEnabledMessage,
                        State                  = HealthState.Warning,
                        NodeName               = NodeName,
                        HealthReportTimeToLive = SetHealthReportTimeToLive(),
                    };

                    HealthReporter.ReportHealthToServiceFabric(report);

                    if (IsTelemetryProviderEnabled &&
                        IsObserverTelemetryEnabled &&
                        RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
                    {
                        // Send Health Report as Telemetry (perhaps it signals an Alert from App Insights, for example.).
                        var telemetryData = new TelemetryData(FabricClientInstance, token)
                        {
                            HealthEventDescription = auServiceEnabledMessage,
                            HealthState            = "Warning",
                            Metric       = "WUAutoDownloadEnabled",
                            Value        = this.isWindowsUpdateAutoDownloadEnabled,
                            NodeName     = NodeName,
                            ObserverName = ObserverName,
                            Source       = ObserverConstants.FabricObserverName,
                        };

                        _ = TelemetryClient?.ReportMetricAsync(
                            telemetryData,
                            Token);
                    }

                    // ETW.
                    if (IsEtwEnabled && RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
                    {
                        Logger.EtwLogger?.Write(
                            ObserverConstants.FabricObserverETWEventName,
                            new
                        {
                            HealthState            = "Warning",
                            HealthEventDescription = auServiceEnabledMessage,
                            ObserverName,
                            Metric = "WUAutoDownloadEnabled",
                            Value  = this.isWindowsUpdateAutoDownloadEnabled,
                            NodeName,
                        });
                    }
                }

                if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
                {
                    // reset au globals for fresh detection during next observer run.
                    this.isWindowsUpdateAutoDownloadEnabled = false;
                    this.auStateUnknown       = false;
                    this.isWUADSettingEnabled = false;
                }

                return(Task.CompletedTask);
            }
            catch (Exception e)
            {
                HealthReporter.ReportFabricObserverServiceHealth(
                    FabricServiceContext.ServiceName.OriginalString,
                    ObserverName,
                    HealthState.Error,
                    $"Unhandled exception processing OS information:{Environment.NewLine}{e}");

                throw;
            }
        }
Beispiel #3
0
        public override Task ReportAsync(CancellationToken token)
        {
            try
            {
                var timeToLiveWarning = SetHealthReportTimeToLive();

                // User-supplied Disk Space Usage % thresholds from Settings.xml.
                foreach (var data in this.DiskSpaceUsagePercentageData)
                {
                    token.ThrowIfCancellationRequested();
                    ProcessResourceDataReportHealth(
                        data,
                        DiskSpacePercentErrorThreshold,
                        DiskSpacePercentWarningThreshold,
                        timeToLiveWarning);
                }

                // User-supplied Average disk queue length thresholds from Settings.xml.
                foreach (var data in this.DiskAverageQueueLengthData)
                {
                    token.ThrowIfCancellationRequested();
                    ProcessResourceDataReportHealth(
                        data,
                        AverageQueueLengthErrorThreshold,
                        AverageQueueLengthWarningThreshold,
                        timeToLiveWarning);
                }

                /* For ETW Only - These calls will just produce ETW (note the thresholds). */
                if (IsEtwEnabled)
                {
                    // Disk Space Available
                    foreach (var data in this.DiskSpaceAvailableMbData)
                    {
                        token.ThrowIfCancellationRequested();
                        ProcessResourceDataReportHealth(
                            data,
                            0,
                            0,
                            timeToLiveWarning);
                    }

                    // Disk Space Total
                    foreach (var data in this.DiskSpaceTotalMbData)
                    {
                        token.ThrowIfCancellationRequested();
                        ProcessResourceDataReportHealth(
                            data,
                            0,
                            0,
                            timeToLiveWarning);
                    }
                }

                token.ThrowIfCancellationRequested();

                // This section only needs to run if you have the FabricObserverWebApi app installed.
                if (!ObserverManager.ObserverWebAppDeployed)
                {
                    return(Task.CompletedTask);
                }

                var diskInfoPath = Path.Combine(ObserverLogger.LogFolderBasePath, "disks.txt");

                _ = ObserverLogger.TryWriteLogFile(diskInfoPath, this.diskInfo.ToString());

                _ = this.diskInfo.Clear();

                return(Task.CompletedTask);
            }
            catch (AggregateException e) when(e.InnerException is OperationCanceledException || e.InnerException is TaskCanceledException || e.InnerException is TimeoutException)
            {
                return(Task.CompletedTask);
            }
            catch (Exception e)
            {
                HealthReporter.ReportFabricObserverServiceHealth(
                    FabricServiceContext.ServiceName.OriginalString,
                    ObserverName,
                    HealthState.Warning,
                    $"Unhandled exception in GetSystemCpuMemoryValuesAsync:{Environment.NewLine}{e}");

                throw;
            }
        }
        private async Task <bool> InitializeAsync()
        {
            WriteToLogWithLevel(
                ObserverName,
                $"Initializing {ObserverName} for network monitoring. | {NodeName}",
                LogLevel.Information);

            this.cancellationToken.ThrowIfCancellationRequested();

            // This only needs to be logged once.
            // This file is used by the ObserverWebApi application.
            if (ObserverManager.ObserverWebAppDeployed && !this.hasRun)
            {
                var logPath = Path.Combine(ObserverLogger.LogFolderBasePath, "NetInfo.txt");

                Console.WriteLine($"logPath: {logPath}");

                if (!ObserverLogger.TryWriteLogFile(logPath, GetNetworkInterfaceInfo(this.cancellationToken)))
                {
                    HealthReporter.ReportFabricObserverServiceHealth(
                        FabricServiceContext.ServiceName.OriginalString,
                        ObserverName,
                        HealthState.Warning,
                        "Unable to create NetInfo.txt file.");
                }
            }

            // Is this a unit test run?
            if (IsTestRun)
            {
                return(true);
            }

            var settings =
                FabricServiceContext.CodePackageActivationContext.GetConfigurationPackageObject(
                    ObserverConstants.ObserverConfigurationPackageName)?.Settings;

            this.configSettings.Initialize(
                settings,
                ConfigurationSectionName,
                "NetworkObserverDataFileName");

            var networkObserverConfigFileName =
                Path.Combine(this.dataPackagePath, this.configSettings.NetworkObserverConfigFileName);

            if (string.IsNullOrWhiteSpace(networkObserverConfigFileName))
            {
                ObserverLogger.LogError(
                    "Endpoint list file is not specified. " +
                    "Please Add file containing endpoints that need to be monitored.");

                return(false);
            }

            if (!File.Exists(networkObserverConfigFileName))
            {
                ObserverLogger.LogError(
                    "Endpoint list file is not specified. " +
                    "Please Add file containing endpoints that need to be monitored.");

                return(false);
            }

            if (this.userConfig.Count == 0)
            {
                using (Stream stream = new FileStream(
                           networkObserverConfigFileName,
                           FileMode.Open,
                           FileAccess.Read,
                           FileShare.Read))
                {
                    var configs = JsonHelper.ReadFromJsonStream <NetworkObserverConfig[]>(stream);

                    foreach (var netConfig in configs)
                    {
                        var deployedApps = await FabricClientInstance.QueryManager.GetDeployedApplicationListAsync(
                            NodeName,
                            new Uri(netConfig.TargetApp)).ConfigureAwait(false);

                        if (deployedApps == null || deployedApps.Count < 1)
                        {
                            continue;
                        }

                        this.userConfig.Add(netConfig);
                    }
                }

                if (this.userConfig.Count == 0)
                {
                    HealthReporter.ReportFabricObserverServiceHealth(
                        FabricServiceContext.ServiceName.ToString(),
                        ObserverName,
                        HealthState.Warning,
                        "Missing required configuration data: endpoints.");

                    return(false);
                }
            }

            return(true);
        }
        public override async Task ReportAsync(CancellationToken token)
        {
            token.ThrowIfCancellationRequested();

            var sb = new StringBuilder();

            _ = sb.AppendLine("\nService Fabric information:\n");

            if (!string.IsNullOrEmpty(this.SFVersion))
            {
                _ = sb.AppendLine("Runtime Version: " + this.SFVersion);
            }

            if (this.SFBinRoot != null)
            {
                _ = sb.AppendLine("Fabric Bin root directory: " + this.SFBinRoot);
            }

            if (this.SFCodePath != null)
            {
                _ = sb.AppendLine("Fabric Code Path: " + this.SFCodePath);
            }

            if (!string.IsNullOrEmpty(this.SFDataRoot))
            {
                _ = sb.AppendLine("Data root directory: " + this.SFDataRoot);
            }

            if (!string.IsNullOrEmpty(this.SFLogRoot))
            {
                _ = sb.AppendLine("Log root directory: " + this.SFLogRoot);
            }

            if (this.SFVolumeDiskServiceEnabled != null)
            {
                _ = sb.AppendLine("Volume Disk Service Enabled: " + this.SFVolumeDiskServiceEnabled);
            }

            if (this.unsupportedPreviewFeaturesEnabled != null)
            {
                _ = sb.AppendLine("Unsupported Preview Features Enabled: " + this.unsupportedPreviewFeaturesEnabled);
            }

            if (this.SFCompatibilityJsonPath != null)
            {
                _ = sb.AppendLine("Compatibility Json path: " + this.SFCompatibilityJsonPath);
            }

            if (this.SFEnableCircularTraceSession != null)
            {
                _ = sb.AppendLine("Enable Circular trace session: " + this.SFEnableCircularTraceSession);
            }

            _ = sb.Append(await GetDeployedAppsInfoAsync(token).ConfigureAwait(true));
            _ = sb.AppendLine();

            token.ThrowIfCancellationRequested();

            var logPath = Path.Combine(ObserverLogger.LogFolderBasePath, "SFInfraInfo.txt");

            // This file is used by the web application (ObserverWebApi).
            if (!ObserverLogger.TryWriteLogFile(logPath, sb.ToString()))
            {
                HealthReporter.ReportFabricObserverServiceHealth(
                    FabricServiceContext.ServiceName.OriginalString,
                    ObserverName,
                    HealthState.Warning,
                    "Unable to create SFInfraInfo.txt file.");
            }

            _ = sb.Clear();
        }