protected override void Dispose(bool disposing)
        {
            if (!disposing)
            {
                return;
            }

            var errWarnHealthStates = this.connectionStatus.Where(
                conn => conn.Health == HealthState.Error || conn.Health == HealthState.Warning);

            foreach (var state in errWarnHealthStates)
            {
                // Clear existing Health Warning.
                var report = new HealthReport
                {
                    AppName                = new Uri(state.TargetApp),
                    Code                   = FOErrorWarningCodes.AppWarningNetworkEndpointUnreachable,
                    EmitLogEvent           = true,
                    HealthMessage          = $"Clearing NetworkObserver's Health Error/Warning for {state.TargetApp}/{state.HostName} connection state since FO is stopping.",
                    HealthReportTimeToLive = default(TimeSpan),
                    State                  = HealthState.Ok,
                    NodeName               = NodeName,
                    Observer               = ObserverName,
                    Property               = $"EndpointUnreachable({state.HostName})",
                    ReportType             = HealthReportType.Application,
                };

                HealthReporter.ReportHealthToServiceFabric(report);
            }
        }
Example #2
0
        public override Task ReportAsync(CancellationToken token)
        {
            Token.ThrowIfCancellationRequested();

            // Informational report. For now, Linux is where we pay close attention to memory use by Fabric system services as there are still a few issues in that realm..
            var timeToLiveWarning     = SetHealthReportTimeToLive();
            var portInformationReport = new HealthReport
            {
                Observer      = ObserverName,
                NodeName      = NodeName,
                HealthMessage = $"Number of ports in use by Fabric services: {TotalActivePortCountAllSystemServices}{Environment.NewLine}" +
                                $"Number of ephemeral ports in use by Fabric services: {TotalActiveEphemeralPortCountAllSystemServices}{Environment.NewLine}" +
                                $"Fabric memory use MB: {this.allMemData.Where(x => x.Id == "Fabric")?.FirstOrDefault()?.AverageDataValue}{Environment.NewLine}" +
                                (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) ?
                                 $"FabricGateway memory use MB: {this.allMemData.Where(x => x.Id == "FabricGateway.exe")?.FirstOrDefault()?.AverageDataValue}{Environment.NewLine}" +
                                 $"FabricHost memory use MB: {this.allMemData.Where(x => x.Id == "FabricHost")?.FirstOrDefault()?.AverageDataValue}{Environment.NewLine}" : string.Empty),

                State = HealthState.Ok,
                HealthReportTimeToLive = timeToLiveWarning,
            };

            HealthReporter.ReportHealthToServiceFabric(portInformationReport);

            // Reset ports counters.
            TotalActivePortCountAllSystemServices          = 0;
            TotalActiveEphemeralPortCountAllSystemServices = 0;

            // CPU
            ProcessResourceDataList(
                this.allCpuData,
                CpuErrorUsageThresholdPct,
                CpuWarnUsageThresholdPct);

            // Memory
            ProcessResourceDataList(
                this.allMemData,
                MemErrorUsageThresholdMb,
                MemWarnUsageThresholdMb);

            // Ports - Active TCP
            ProcessResourceDataList(
                this.allActiveTcpPortData,
                ActiveTcpPortCountError,
                ActiveTcpPortCountWarning);

            // Ports - Ephemeral
            ProcessResourceDataList(
                this.allEphemeralTcpPortData,
                ActiveEphemeralPortCountError,
                ActiveEphemeralPortCountWarning);

            // Windows Event Log
            if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows) && ObserverManager.ObserverWebAppDeployed &&
                this.monitorWinEventLog)
            {
                // SF Eventlog Errors?
                // Write this out to a new file, for use by the web front end log viewer.
                // Format = HTML.
                int count   = this.evtRecordList.Count();
                var logPath = Path.Combine(ObserverLogger.LogFolderBasePath, "EventVwrErrors.txt");

                // Remove existing file.
                if (File.Exists(logPath))
                {
                    try
                    {
                        File.Delete(logPath);
                    }
                    catch (IOException)
                    {
                    }
                    catch (UnauthorizedAccessException)
                    {
                    }
                }

                if (count >= 10)
                {
                    var sb = new StringBuilder();

                    _ = sb.AppendLine("<br/><div><strong>" +
                                      "<a href='javascript:toggle(\"evtContainer\")'>" +
                                      "<div id=\"plus\" style=\"display: inline; font-size: 25px;\">+</div> " + count +
                                      " Error Events in ServiceFabric and System</a> " +
                                      "Event logs</strong>.<br/></div>");

                    _ = sb.AppendLine("<div id='evtContainer' style=\"display: none;\">");

                    foreach (var evt in this.evtRecordList.Distinct())
                    {
                        token.ThrowIfCancellationRequested();

                        try
                        {
                            // Access event properties:
                            _ = sb.AppendLine("<div>" + evt.LogName + "</div>");
                            _ = sb.AppendLine("<div>" + evt.LevelDisplayName + "</div>");
                            if (evt.TimeCreated.HasValue)
                            {
                                _ = sb.AppendLine("<div>" + evt.TimeCreated.Value.ToShortDateString() + "</div>");
                            }

                            foreach (var prop in evt.Properties)
                            {
                                if (prop.Value != null && Convert.ToString(prop.Value).Length > 0)
                                {
                                    _ = sb.AppendLine("<div>" + prop.Value + "</div>");
                                }
                            }
                        }
                        catch (EventLogException)
                        {
                        }
                    }

                    _ = sb.AppendLine("</div>");

                    _ = ObserverLogger.TryWriteLogFile(logPath, sb.ToString());
                    _ = sb.Clear();
                }

                // Clean up.
                if (count > 0)
                {
                    this.evtRecordList.Clear();
                }
            }

            ClearDataContainers();

            return(Task.CompletedTask);
        }
        public override Task ReportAsync(CancellationToken token)
        {
            try
            {
                token.ThrowIfCancellationRequested();

                // OS Health.
                if (this.osStatus != null && !string.Equals(this.osStatus, "OK", StringComparison.OrdinalIgnoreCase))
                {
                    string healthMessage = $"OS reporting unhealthy: {this.osStatus}";
                    var    healthReport  = new HealthReport
                    {
                        Observer               = ObserverName,
                        NodeName               = NodeName,
                        HealthMessage          = healthMessage,
                        State                  = HealthState.Error,
                        HealthReportTimeToLive = SetHealthReportTimeToLive(),
                    };

                    HealthReporter.ReportHealthToServiceFabric(healthReport);

                    // This means this observer created a Warning or Error SF Health Report
                    HasActiveFabricErrorOrWarning = true;

                    // Send Health Report as Telemetry (perhaps it signals an Alert from App Insights, for example.).
                    if (IsTelemetryProviderEnabled && IsObserverTelemetryEnabled)
                    {
                        _ = TelemetryClient?.ReportHealthAsync(
                            HealthScope.Application,
                            FabricRuntime.GetActivationContext().ApplicationName,
                            HealthState.Error,
                            $"{NodeName} - OS reporting unhealthy: {this.osStatus}",
                            ObserverName,
                            Token);
                    }
                }
                else if (HasActiveFabricErrorOrWarning && string.Equals(this.osStatus, "OK", StringComparison.OrdinalIgnoreCase))
                {
                    // Clear Error or Warning with an OK Health Report.
                    string healthMessage = $"OS reporting healthy: {this.osStatus}";

                    var healthReport = new HealthReport
                    {
                        Observer               = ObserverName,
                        NodeName               = NodeName,
                        HealthMessage          = healthMessage,
                        State                  = HealthState.Ok,
                        HealthReportTimeToLive = default(TimeSpan),
                    };

                    HealthReporter.ReportHealthToServiceFabric(healthReport);

                    // Reset internal health state.
                    HasActiveFabricErrorOrWarning = false;
                }

                if (ObserverManager.ObserverWebAppDeployed)
                {
                    var logPath = Path.Combine(ObserverLogger.LogFolderBasePath, "SysInfo.txt");

                    // This file is used by the web application (log reader.).
                    if (!ObserverLogger.TryWriteLogFile(logPath, $"Last updated on {DateTime.UtcNow.ToString("M/d/yyyy HH:mm:ss")} UTC<br/>{this.osReport}"))
                    {
                        HealthReporter.ReportFabricObserverServiceHealth(
                            FabricServiceContext.ServiceName.OriginalString,
                            ObserverName,
                            HealthState.Warning,
                            "Unable to create SysInfo.txt file.");
                    }
                }

                var report = new HealthReport
                {
                    Observer               = ObserverName,
                    HealthMessage          = this.osReport,
                    State                  = HealthState.Ok,
                    NodeName               = NodeName,
                    HealthReportTimeToLive = SetHealthReportTimeToLive(),
                };

                HealthReporter.ReportHealthToServiceFabric(report);

                // Windows Update automatic download enabled?
                if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows) &&
                    this.isWindowsUpdateAutoDownloadEnabled)
                {
                    string linkText =
                        $"{Environment.NewLine}For clusters of Silver durability or above, " +
                        $"please consider <a href=\"https://docs.microsoft.com/azure/virtual-machine-scale-sets/virtual-machine-scale-sets-automatic-upgrade\" target=\"blank\">" +
                        $"enabling VMSS automatic OS image upgrades</a> to prevent unexpected VM reboots. " +
                        $"For Bronze durability clusters, please consider deploying the " +
                        $"<a href=\"https://docs.microsoft.com/azure/service-fabric/service-fabric-patch-orchestration-application\" target=\"blank\">Patch Orchestration Service</a>.";

                    string auServiceEnabledMessage = $"Windows Update Automatic Download is enabled.{linkText}";

                    report = new HealthReport
                    {
                        Observer               = ObserverName,
                        Property               = "OSConfiguration",
                        HealthMessage          = auServiceEnabledMessage,
                        State                  = HealthState.Warning,
                        NodeName               = NodeName,
                        HealthReportTimeToLive = SetHealthReportTimeToLive(),
                    };

                    HealthReporter.ReportHealthToServiceFabric(report);

                    if (IsTelemetryProviderEnabled &&
                        IsObserverTelemetryEnabled &&
                        RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
                    {
                        // Send Health Report as Telemetry (perhaps it signals an Alert from App Insights, for example.).
                        var telemetryData = new TelemetryData(FabricClientInstance, token)
                        {
                            HealthEventDescription = auServiceEnabledMessage,
                            HealthState            = "Warning",
                            Metric       = "WUAutoDownloadEnabled",
                            Value        = this.isWindowsUpdateAutoDownloadEnabled,
                            NodeName     = NodeName,
                            ObserverName = ObserverName,
                            Source       = ObserverConstants.FabricObserverName,
                        };

                        _ = TelemetryClient?.ReportMetricAsync(
                            telemetryData,
                            Token);
                    }

                    // ETW.
                    if (IsEtwEnabled && RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
                    {
                        Logger.EtwLogger?.Write(
                            ObserverConstants.FabricObserverETWEventName,
                            new
                        {
                            HealthState            = "Warning",
                            HealthEventDescription = auServiceEnabledMessage,
                            ObserverName,
                            Metric = "WUAutoDownloadEnabled",
                            Value  = this.isWindowsUpdateAutoDownloadEnabled,
                            NodeName,
                        });
                    }
                }

                if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
                {
                    // reset au globals for fresh detection during next observer run.
                    this.isWindowsUpdateAutoDownloadEnabled = false;
                    this.auStateUnknown       = false;
                    this.isWUADSettingEnabled = false;
                }

                return(Task.CompletedTask);
            }
            catch (Exception e)
            {
                HealthReporter.ReportFabricObserverServiceHealth(
                    FabricServiceContext.ServiceName.OriginalString,
                    ObserverName,
                    HealthState.Error,
                    $"Unhandled exception processing OS information:{Environment.NewLine}{e}");

                throw;
            }
        }
        public override Task ReportAsync(CancellationToken token)
        {
            var timeToLiveWarning = SetHealthReportTimeToLive();

            // Report on connection state.
            foreach (var config in this.userConfig)
            {
                token.ThrowIfCancellationRequested();

                foreach (var conn in this.connectionStatus.Where(cs => cs.TargetApp == config.TargetApp))
                {
                    token.ThrowIfCancellationRequested();

                    var connState = conn;

                    if (!connState.Connected)
                    {
                        this.healthState = HealthState.Warning;
                        var healthMessage = $"Outbound Internet connection failure detected for endpoint {connState.HostName}{Environment.NewLine}";

                        // Send Health Telemetry (perhaps it signals an Alert in AppInsights or LogAnalytics).
                        // This will also be serialied into the health event (Desf.
                        var telemetryData = new TelemetryData(FabricClientInstance, token)
                        {
                            ApplicationName        = conn.TargetApp,
                            Code                   = FOErrorWarningCodes.AppWarningNetworkEndpointUnreachable,
                            HealthState            = "Warning",
                            HealthEventDescription = healthMessage,
                            ObserverName           = ObserverName,
                            Metric                 = ErrorWarningProperty.InternetConnectionFailure,
                            NodeName               = NodeName,
                        };

                        if (IsTelemetryProviderEnabled && IsObserverTelemetryEnabled)
                        {
                            _ = TelemetryClient?.ReportMetricAsync(
                                telemetryData,
                                Token);
                        }

                        var report = new HealthReport
                        {
                            AppName                = new Uri(conn.TargetApp),
                            EmitLogEvent           = true,
                            HealthData             = telemetryData,
                            HealthMessage          = healthMessage,
                            HealthReportTimeToLive = timeToLiveWarning,
                            State      = this.healthState,
                            NodeName   = NodeName,
                            Observer   = ObserverName,
                            Property   = $"EndpointUnreachable({conn.HostName})",
                            ReportType = HealthReportType.Application,
                            ResourceUsageDataProperty = $"{ErrorWarningProperty.InternetConnectionFailure}: {connState.HostName}",
                        };

                        // Send health report Warning and log event locally.
                        HealthReporter.ReportHealthToServiceFabric(report);

                        // This means this observer created a Warning or Error SF Health Report
                        HasActiveFabricErrorOrWarning = true;

                        // ETW.
                        if (IsEtwEnabled)
                        {
                            Logger.EtwLogger?.Write(
                                ObserverConstants.FabricObserverETWEventName,
                                new
                            {
                                ApplicationName        = conn.TargetApp,
                                Code                   = FOErrorWarningCodes.AppWarningNetworkEndpointUnreachable,
                                HealthState            = "Warning",
                                HealthEventDescription = healthMessage,
                                ObserverName,
                                Metric = ErrorWarningProperty.InternetConnectionFailure,
                                NodeName,
                            });
                        }
                    }
                    else
                    {
                        if (connState.Health != HealthState.Warning ||
                            connState.Health != HealthState.Error)
                        {
                            continue;
                        }

                        this.healthState = HealthState.Ok;
                        var healthMessage = $"Outbound Internet connection successful for {connState?.HostName} from node {NodeName}.";

                        // Clear existing Health Warning.
                        var report = new HealthReport
                        {
                            AppName                = new Uri(conn.TargetApp),
                            Code                   = FOErrorWarningCodes.AppWarningNetworkEndpointUnreachable,
                            EmitLogEvent           = true,
                            HealthMessage          = healthMessage,
                            HealthReportTimeToLive = default(TimeSpan),
                            State                  = HealthState.Ok,
                            NodeName               = NodeName,
                            Observer               = ObserverName,
                            Property               = $"EndpointUnreachable({conn.HostName})",
                            ReportType             = HealthReportType.Application,
                        };

                        HealthReporter.ReportHealthToServiceFabric(report);

                        // Telemetry.
                        if (IsTelemetryProviderEnabled && IsObserverTelemetryEnabled)
                        {
                            var telemetryData = new TelemetryData(FabricClientInstance, token)
                            {
                                ApplicationName        = conn.TargetApp,
                                Code                   = FOErrorWarningCodes.Ok,
                                HealthState            = "Ok",
                                HealthEventDescription = healthMessage,
                                ObserverName           = ObserverName,
                                Metric                 = "Internet Connection State",
                                NodeName               = NodeName,
                            };

                            _ = TelemetryClient?.ReportMetricAsync(
                                telemetryData,
                                Token);
                        }

                        // ETW.
                        if (IsEtwEnabled)
                        {
                            Logger.EtwLogger?.Write(
                                ObserverConstants.FabricObserverETWEventName,
                                new
                            {
                                ApplicationName        = conn.TargetApp,
                                Code                   = FOErrorWarningCodes.Ok,
                                HealthState            = "Ok",
                                HealthEventDescription = healthMessage,
                                ObserverName,
                                Metric = "Internet Connection State",
                                NodeName,
                            });
                        }

                        // Reset health state.
                        HasActiveFabricErrorOrWarning = false;
                    }
                }
            }

            // Clear
            _ = this.connectionStatus.RemoveAll(conn => conn.Connected);
            this.connectionStatus.TrimExcess();
            this.connEndpointTestResults.Clear();

            return(Task.CompletedTask);
        }
        private async Task MonitorDeployedAppsAsync(CancellationToken token)
        {
            Process currentProcess = null;

            foreach (var repOrInst in ReplicaOrInstanceList)
            {
                token.ThrowIfCancellationRequested();

                var  timer = new Stopwatch();
                int  processId = (int)repOrInst.HostProcessId;
                var  cpuUsage = new CpuUsage();
                bool checkCpu = false, checkMemMb = false, checkMemPct = false, checkAllPorts = false, checkEphemeralPorts = false;
                var  application = this.deployedTargetList?.FirstOrDefault(
                    app => app?.TargetApp?.ToLower() == repOrInst.ApplicationName?.OriginalString?.ToLower() ||
                    app?.TargetAppType?.ToLower() == repOrInst.ApplicationTypeName?.ToLower());

                if (application?.TargetApp == null && application?.TargetAppType == null)
                {
                    continue;
                }

                try
                {
                    // App level.
                    currentProcess = Process.GetProcessById(processId);

                    token.ThrowIfCancellationRequested();

                    var    procName      = currentProcess.ProcessName;
                    string appNameOrType = GetAppNameOrType(repOrInst);

                    var id = $"{appNameOrType}:{procName}";

                    // Add new resource data structures for each app service process where the metric is specified in configuration for related observation.
                    if (this.AllAppCpuData.All(list => list.Id != id) && (application.CpuErrorLimitPercent > 0 || application.CpuWarningLimitPercent > 0))
                    {
                        this.AllAppCpuData.Add(new FabricResourceUsageData <double>(ErrorWarningProperty.TotalCpuTime, id, DataCapacity, UseCircularBuffer));
                    }

                    if (this.AllAppCpuData.Any(list => list.Id == id))
                    {
                        checkCpu = true;
                    }

                    if (this.AllAppMemDataMb.All(list => list.Id != id) && (application.MemoryErrorLimitMb > 0 || application.MemoryWarningLimitMb > 0))
                    {
                        this.AllAppMemDataMb.Add(new FabricResourceUsageData <float>(ErrorWarningProperty.TotalMemoryConsumptionMb, id, DataCapacity, UseCircularBuffer));
                    }

                    if (this.AllAppMemDataMb.Any(list => list.Id == id))
                    {
                        checkMemMb = true;
                    }

                    if (this.AllAppMemDataPercent.All(list => list.Id != id) && (application.MemoryErrorLimitPercent > 0 || application.MemoryWarningLimitPercent > 0))
                    {
                        this.AllAppMemDataPercent.Add(new FabricResourceUsageData <double>(ErrorWarningProperty.TotalMemoryConsumptionPct, id, DataCapacity, UseCircularBuffer));
                    }

                    if (this.AllAppMemDataPercent.Any(list => list.Id == id))
                    {
                        checkMemPct = true;
                    }

                    if (this.AllAppTotalActivePortsData.All(list => list.Id != id) && (application.NetworkErrorActivePorts > 0 || application.NetworkWarningActivePorts > 0))
                    {
                        this.AllAppTotalActivePortsData.Add(new FabricResourceUsageData <int>(ErrorWarningProperty.TotalActivePorts, id, 1));
                    }

                    if (this.AllAppTotalActivePortsData.Any(list => list.Id == id))
                    {
                        checkAllPorts = true;
                    }

                    if (this.AllAppEphemeralPortsData.All(list => list.Id != id) && (application.NetworkErrorEphemeralPorts > 0 || application.NetworkWarningEphemeralPorts > 0))
                    {
                        this.AllAppEphemeralPortsData.Add(new FabricResourceUsageData <int>(ErrorWarningProperty.TotalEphemeralPorts, id, 1));
                    }

                    if (this.AllAppEphemeralPortsData.Any(list => list.Id == id))
                    {
                        checkEphemeralPorts = true;
                    }

                    // Measure Total and Ephemeral ports.
                    if (checkAllPorts)
                    {
                        this.AllAppTotalActivePortsData.FirstOrDefault(x => x.Id == id).Data.Add(OperatingSystemInfoProvider.Instance.GetActivePortCount(currentProcess.Id, FabricServiceContext));
                    }

                    if (checkEphemeralPorts)
                    {
                        this.AllAppEphemeralPortsData.FirstOrDefault(x => x.Id == id).Data.Add(OperatingSystemInfoProvider.Instance.GetActiveEphemeralPortCount(currentProcess.Id, FabricServiceContext));
                    }

                    // No need to proceed further if no cpu and mem thresholds are specified in configuration.
                    if (!checkCpu && !checkMemMb && !checkMemPct)
                    {
                        continue;
                    }

                    /* CPU and Memory Usage */

                    TimeSpan duration = TimeSpan.FromSeconds(15);

                    if (MonitorDuration > TimeSpan.MinValue)
                    {
                        duration = MonitorDuration;
                    }

                    // Warm up the counters.
                    if (checkCpu)
                    {
                        _ = cpuUsage.GetCpuUsagePercentageProcess(currentProcess);
                    }

                    if (checkMemMb || checkMemPct)
                    {
                        _ = ProcessInfoProvider.Instance.GetProcessPrivateWorkingSetInMB(currentProcess.Id);
                    }

                    timer.Start();

                    while (!currentProcess.HasExited && timer.Elapsed.Seconds <= duration.Seconds)
                    {
                        token.ThrowIfCancellationRequested();

                        if (checkCpu)
                        {
                            // CPU (all cores).
                            double cpu = cpuUsage.GetCpuUsagePercentageProcess(currentProcess);

                            if (cpu >= 0)
                            {
                                if (cpu > 100)
                                {
                                    cpu = 100;
                                }

                                this.AllAppCpuData.FirstOrDefault(x => x.Id == id).Data.Add(cpu);
                            }
                        }

                        float processMem = 0;

                        if (checkMemMb || checkMemPct)
                        {
                            processMem = ProcessInfoProvider.Instance.GetProcessPrivateWorkingSetInMB(currentProcess.Id);
                        }

                        if (checkMemMb)
                        {
                            // Memory (private working set (process)).
                            this.AllAppMemDataMb.FirstOrDefault(x => x.Id == id).Data.Add(processMem);
                        }

                        if (checkMemPct)
                        {
                            // Memory (percent in use (total)).
                            var(TotalMemory, PercentInUse) = OperatingSystemInfoProvider.Instance.TupleGetTotalPhysicalMemorySizeAndPercentInUse();
                            long totalMem = TotalMemory;

                            if (totalMem > 0)
                            {
                                double usedPct = Math.Round(((double)(processMem * 100)) / (totalMem * 1024), 2);
                                this.AllAppMemDataPercent.FirstOrDefault(x => x.Id == id).Data.Add(Math.Round(usedPct, 1));
                            }
                        }

                        await Task.Delay(250, Token);
                    }

                    timer.Stop();
                    timer.Reset();
                }
                catch (Exception e)
                {
#if DEBUG
                    // DEBUG INFO
                    var healthReport = new Utilities.HealthReport
                    {
                        AppName       = repOrInst.ApplicationName,
                        HealthMessage = $"Error:{Environment.NewLine}{e}{Environment.NewLine}",
                        State         = HealthState.Ok,
                        Code          = FOErrorWarningCodes.Ok,
                        NodeName      = NodeName,
                        Observer      = ObserverName,
                        Property      = $"{e.Source}",
                        ReportType    = HealthReportType.Application,
                    };

                    HealthReporter.ReportHealthToServiceFabric(healthReport);
#endif
                    if (e is Win32Exception || e is ArgumentException || e is InvalidOperationException)
                    {
                        WriteToLogWithLevel(
                            ObserverName,
                            $"MonitorAsync failed to find current service process for {repOrInst.ApplicationName?.OriginalString ?? repOrInst.ApplicationTypeName}{Environment.NewLine}{e}",
                            LogLevel.Information);
                    }
                    else
                    {
                        if (!(e is OperationCanceledException || e is TaskCanceledException))
                        {
                            WriteToLogWithLevel(
                                ObserverName,
                                $"Unhandled exception in MonitorAsync:{Environment.NewLine}{e}",
                                LogLevel.Warning);
                        }

                        throw;
                    }
                }
                finally
                {
                    currentProcess?.Dispose();
                    currentProcess = null;
                }
            }
        }
        private async Task MonitorDeployedAppsAsync(CancellationToken token)
        {
            Process currentProcess = null;

            foreach (var repOrInst in ReplicaOrInstanceList)
            {
                token.ThrowIfCancellationRequested();

                var timer     = new Stopwatch();
                int processId = (int)repOrInst.HostProcessId;
                var cpuUsage  = new CpuUsage();

                try
                {
                    // App level.
                    currentProcess = Process.GetProcessById(processId);

                    token.ThrowIfCancellationRequested();

                    var    procName      = currentProcess.ProcessName;
                    string appNameOrType = GetAppNameOrType(repOrInst);

                    var id = $"{appNameOrType}:{procName}";

                    // Add new resource data structures for each app service process.
                    if (this.allAppCpuData.All(list => list.Id != id))
                    {
                        this.allAppCpuData.Add(new FabricResourceUsageData <double>(ErrorWarningProperty.TotalCpuTime, id, DataCapacity, UseCircularBuffer));
                        this.allAppMemDataMb.Add(new FabricResourceUsageData <float>(ErrorWarningProperty.TotalMemoryConsumptionMb, id, DataCapacity, UseCircularBuffer));
                        this.allAppMemDataPercent.Add(new FabricResourceUsageData <double>(ErrorWarningProperty.TotalMemoryConsumptionPct, id, DataCapacity, UseCircularBuffer));
                        this.allAppTotalActivePortsData.Add(new FabricResourceUsageData <int>(ErrorWarningProperty.TotalActivePorts, id, 1));
                        this.allAppEphemeralPortsData.Add(new FabricResourceUsageData <int>(ErrorWarningProperty.TotalEphemeralPorts, id, 1));
                    }

                    TimeSpan duration = TimeSpan.FromSeconds(15);

                    if (MonitorDuration > TimeSpan.MinValue)
                    {
                        duration = MonitorDuration;
                    }

                    // Warm up the counters.
                    _ = cpuUsage.GetCpuUsagePercentageProcess(currentProcess);
                    _ = ProcessInfoProvider.Instance.GetProcessPrivateWorkingSetInMB(currentProcess.Id);

                    timer.Start();

                    while (!currentProcess.HasExited && timer.Elapsed.Seconds <= duration.Seconds)
                    {
                        token.ThrowIfCancellationRequested();

                        // CPU (all cores).
                        double cpu = cpuUsage.GetCpuUsagePercentageProcess(currentProcess);

                        if (cpu >= 0)
                        {
                            if (cpu > 100)
                            {
                                cpu = 100;
                            }

                            this.allAppCpuData.FirstOrDefault(x => x.Id == id).Data.Add(cpu);
                        }

                        // Memory (private working set (process)).
                        var processMem = ProcessInfoProvider.Instance.GetProcessPrivateWorkingSetInMB(currentProcess.Id);
                        this.allAppMemDataMb.FirstOrDefault(x => x.Id == id).Data.Add(processMem);

                        // Memory (percent in use (total)).
                        var(TotalMemory, PercentInUse) = OperatingSystemInfoProvider.Instance.TupleGetTotalPhysicalMemorySizeAndPercentInUse();
                        long totalMem = TotalMemory;

                        if (totalMem > -1)
                        {
                            double usedPct = Math.Round(((double)(processMem * 100)) / (totalMem * 1024), 2);
                            this.allAppMemDataPercent.FirstOrDefault(x => x.Id == id).Data.Add(Math.Round(usedPct, 1));
                        }

                        await Task.Delay(250, Token);
                    }

                    timer.Stop();
                    timer.Reset();

                    // Total and Ephemeral ports..
                    this.allAppTotalActivePortsData.FirstOrDefault(x => x.Id == id)
                    .Data.Add(OperatingSystemInfoProvider.Instance.GetActivePortCount(currentProcess.Id, FabricServiceContext));

                    this.allAppEphemeralPortsData.FirstOrDefault(x => x.Id == id)
                    .Data.Add(OperatingSystemInfoProvider.Instance.GetActiveEphemeralPortCount(currentProcess.Id, FabricServiceContext));
                }
                catch (Exception e)
                {
#if DEBUG
                    // DEBUG INFO
                    var healthReport = new Utilities.HealthReport
                    {
                        AppName       = repOrInst.ApplicationName,
                        HealthMessage = $"Error: {e}\n\n",
                        State         = HealthState.Ok,
                        Code          = FOErrorWarningCodes.Ok,
                        NodeName      = NodeName,
                        Observer      = ObserverName,
                        Property      = $"{e.Source}",
                        ReportType    = HealthReportType.Application,
                    };

                    HealthReporter.ReportHealthToServiceFabric(healthReport);
#endif
                    if (e is Win32Exception || e is ArgumentException || e is InvalidOperationException)
                    {
                        WriteToLogWithLevel(
                            ObserverName,
                            $"MonitorAsync failed to find current service process for {repOrInst.ApplicationName?.OriginalString ?? repOrInst.ApplicationTypeName}/n{e}",
                            LogLevel.Information);
                    }
                    else
                    {
                        if (!(e is OperationCanceledException || e is TaskCanceledException))
                        {
                            WriteToLogWithLevel(
                                ObserverName,
                                $"Unhandled exception in MonitorAsync: \n {e}",
                                LogLevel.Warning);
                        }

                        throw;
                    }
                }
                finally
                {
                    currentProcess?.Dispose();
                    currentProcess = null;
                }
            }
        }
Example #7
0
        public override Task ReportAsync(CancellationToken token)
        {
            token.ThrowIfCancellationRequested();

            // Someone calling without observing first, must be run after a new run of ObserveAsync
            if (ExpiringWarnings == null ||
                ExpiredWarnings == null ||
                NotFoundWarnings == null)
            {
                return(Task.CompletedTask);
            }

            HealthReport healthReport;

            if (ExpiringWarnings.Count == 0 &&
                ExpiredWarnings.Count == 0 &&
                NotFoundWarnings.Count == 0)
            {
                healthReport = new HealthReport
                {
                    Observer               = ObserverName,
                    ReportType             = HealthReportType.Node,
                    EmitLogEvent           = true,
                    NodeName               = NodeName,
                    HealthMessage          = $"All cluster and monitored app certificates are healthy.",
                    State                  = HealthState.Ok,
                    HealthReportTimeToLive = RunInterval > TimeSpan.MinValue ? RunInterval : HealthReportTimeToLive,
                };

                HasActiveFabricErrorOrWarning = false;
            }
            else
            {
                string healthMessage = (ExpiredWarnings.Count == 0 ? string.Empty : (ExpiredWarnings.Aggregate(string.Empty, (i, j) => i + "\n" + j) + "\n")) +
                                       (NotFoundWarnings.Count == 0 ? string.Empty : (NotFoundWarnings.Aggregate(string.Empty, (i, j) => i + "\n" + j) + "\n")) +
                                       (ExpiringWarnings.Count == 0 ? string.Empty : ExpiringWarnings.Aggregate(string.Empty, (i, j) => i + "\n" + j));

                healthReport = new HealthReport
                {
                    Code                   = FOErrorWarningCodes.WarningCertificateExpiration,
                    Observer               = ObserverName,
                    ReportType             = HealthReportType.Node,
                    EmitLogEvent           = true,
                    NodeName               = NodeName,
                    HealthMessage          = healthMessage,
                    State                  = HealthState.Warning,
                    HealthReportTimeToLive = RunInterval > TimeSpan.MinValue ? RunInterval : HealthReportTimeToLive,
                };

                HasActiveFabricErrorOrWarning = true;

                if (IsTelemetryProviderEnabled && IsObserverTelemetryEnabled)
                {
                    TelemetryData telemetryData = new TelemetryData(FabricClientInstance, token)
                    {
                        Code                   = FOErrorWarningCodes.WarningCertificateExpiration,
                        HealthState            = "Warning",
                        NodeName               = NodeName,
                        Metric                 = ErrorWarningProperty.CertificateExpiration,
                        HealthEventDescription = healthMessage,
                        ObserverName           = ObserverName,
                        OS     = RuntimeInformation.IsOSPlatform(OSPlatform.Windows) ? "Windows" : "Linux",
                        Source = ObserverConstants.FabricObserverName,
                        Value  = FOErrorWarningCodes.GetErrorWarningNameFromFOCode(FOErrorWarningCodes.WarningCertificateExpiration),
                    };

                    _ = TelemetryClient?.ReportMetricAsync(
                        telemetryData,
                        Token);
                }

                if (IsEtwEnabled)
                {
                    Logger.EtwLogger?.Write(
                        ObserverConstants.FabricObserverETWEventName,
                        new
                    {
                        Code        = FOErrorWarningCodes.WarningCertificateExpiration,
                        HealthState = "Warning",
                        NodeName,
                        Metric = ErrorWarningProperty.CertificateExpiration,
                        HealthEventDescription = healthMessage,
                        ObserverName,
                        OS     = RuntimeInformation.IsOSPlatform(OSPlatform.Windows) ? "Windows" : "Linux",
                        Source = ObserverConstants.FabricObserverName,
                        Value  = FOErrorWarningCodes.GetErrorWarningNameFromFOCode(FOErrorWarningCodes.WarningCertificateExpiration),
                    });
                }
            }

            HealthReporter.ReportHealthToServiceFabric(healthReport);

            return(Task.CompletedTask);
        }