Пример #1
0
 protected override T ExecuteFunction <T>(string methodName, bool forceThrow, Func <T> function, params object[] parameters)
 {
     using (var scope = HealthReporter.BeginTrack(HealthTrackType.CountAndDurationAverage, string.Format(HealthReporter.RESTAPI_FORMAT, this.TrackPrefix + "." + methodName)))
     {
         return(base.ExecuteFunction <T>(methodName, forceThrow, function, parameters));
     }
 }
Пример #2
0
 protected virtual K ExecuteFunction <K>(HealthTrackType type, string methodName, Func <K> function, params object[] parameters)
 {
     using (var scope = HealthReporter.BeginTrack(type, string.Format(HealthReporter.BUSINESS_FORMAT, this.TrackPrefix + "." + methodName)))
     {
         return(base.ExecuteFunction <K>(methodName, function, parameters));
     }
 }
Пример #3
0
 protected override K ExecuteFunction <K>(string methodName, Func <K> function, params object[] parameters)
 {
     using (var scope = HealthReporter.BeginTrack(HealthTrackType.CountAndDurationAverage, string.Format(HealthReporter.BUSINESS_FORMAT, this.TrackPrefix + "." + methodName)))
     {
         return(base.ExecuteFunction <K>(methodName, function, parameters));
     }
 }
Пример #4
0
 protected virtual void ExecuteMethod(HealthTrackType type, string methodName, Action action, params object[] parameters)
 {
     using (var scope = HealthReporter.BeginTrack(type, string.Format(HealthReporter.BUSINESS_FORMAT, this.TrackPrefix + "." + methodName)))
     {
         base.ExecuteMethod(methodName, action, parameters);
     }
 }
        public void Dispose()
        {
            if (this.disposed)
            {
                return;
            }

            this.disposed = true;

            DisposeOf(this.inputSubscriptions);

            TimeSpan pipelineDrainWaitTime = PollWaitForPipelineDrain();

            pipelineHead.Complete();
            // We want to give the completion logic some non-zero wait time for the pipeline blocks to dispose of their internal resources.
            TimeSpan completionWaitTime = TimeSpan.FromMilliseconds(Math.Max(100, this.pipelineConfiguration.PipelineCompletionTimeoutMsec - pipelineDrainWaitTime.TotalMilliseconds));

            Task.WaitAll(this.pipelineCompletionTasks.ToArray(), completionWaitTime);

            this.cancellationTokenSource.Cancel();
            DisposeOf(this.pipelineLinkDisposables);

            if (this.disposeDependencies)
            {
                DisposeOf(this.Inputs);
                DisposeOf(this.Sinks);
                HealthReporter.Dispose();
            }
        }
Пример #6
0
 protected override void ExecuteMethod(string methodName, Action action, params object[] parameters)
 {
     using (var scope = HealthReporter.BeginTrack(HealthTrackType.CountAndDurationAverage, string.Format(HealthReporter.BUSINESS_FORMAT, this.TrackPrefix + "." + methodName)))
     {
         base.ExecuteMethod(methodName, action, parameters);
     }
 }
        public void Dispose()
        {
            lock (this.batcherTimerDisposalLock)
            {
                if (this.disposed)
                {
                    return;
                }

                this.disposed = true;
                this.batcherTimer.Dispose();
            }

            DisposeOf(this.inputSubscriptions);

            pipelineHead.Complete();
            // The completion should propagate all the way to the outputs. When all outputs complete, the pipeline has been drained successfully.
            Task.WhenAny(Task.WhenAll(this.outputCompletionTasks.ToArray()), Task.Delay(this.pipelineConfiguration.PipelineCompletionTimeoutMsec)).GetAwaiter().GetResult();

            this.cancellationTokenSource.Cancel();

            if (this.disposeDependencies)
            {
                DisposeOf(this.Inputs);
                DisposeOf(this.Sinks);
                HealthReporter.Dispose();
            }
        }
        protected override void Dispose(bool disposing)
        {
            if (!disposing)
            {
                return;
            }

            var errWarnHealthStates = this.connectionStatus.Where(
                conn => conn.Health == HealthState.Error || conn.Health == HealthState.Warning);

            foreach (var state in errWarnHealthStates)
            {
                // Clear existing Health Warning.
                var report = new HealthReport
                {
                    AppName                = new Uri(state.TargetApp),
                    Code                   = FOErrorWarningCodes.AppWarningNetworkEndpointUnreachable,
                    EmitLogEvent           = true,
                    HealthMessage          = $"Clearing NetworkObserver's Health Error/Warning for {state.TargetApp}/{state.HostName} connection state since FO is stopping.",
                    HealthReportTimeToLive = default(TimeSpan),
                    State                  = HealthState.Ok,
                    NodeName               = NodeName,
                    Observer               = ObserverName,
                    Property               = $"EndpointUnreachable({state.HostName})",
                    ReportType             = HealthReportType.Application,
                };

                HealthReporter.ReportHealthToServiceFabric(report);
            }
        }
Пример #9
0
        /// <inheritdoc/>
        public override async Task ObserveAsync(CancellationToken token)
        {
            // If set, this observer will only run during the supplied interval.
            // See Settings.xml, CertificateObserverConfiguration section, RunInterval parameter for an example.
            if (RunInterval > TimeSpan.MinValue &&
                DateTime.Now.Subtract(LastRunDateTime) < RunInterval)
            {
                return;
            }

            bool initialized = Initialize();

            Token = token;

            if (!initialized)
            {
                HealthReporter.ReportFabricObserverServiceHealth(
                    FabricServiceContext.ServiceName.OriginalString,
                    ObserverName,
                    HealthState.Warning,
                    "This observer was unable to initialize correctly due to missing configuration info.");

                return;
            }

            try
            {
                perfCounters = new WindowsPerfCounters();
                diskUsage    = new DiskUsage();

                foreach (var app in targetList)
                {
                    Token.ThrowIfCancellationRequested();

                    if (string.IsNullOrWhiteSpace(app.Target) &&
                        string.IsNullOrWhiteSpace(app.TargetType))
                    {
                        continue;
                    }

                    await MonitorAppAsync(app).ConfigureAwait(true);
                }

                await ReportAsync(token).ConfigureAwait(true);

                LastRunDateTime = DateTime.Now;
            }
            finally
            {
                // Clean up.
                diskUsage?.Dispose();
                diskUsage = null;
                perfCounters?.Dispose();
                perfCounters = null;
            }
        }
        public override async Task ObserveAsync(CancellationToken token)
        {
            // If set, this observer will only run during the supplied interval.
            // See Settings.xml, CertificateObserverConfiguration section, RunInterval parameter for an example.
            // This observer is only useful if you enable the web api for producing
            // an html page with a bunch of information that's easy to read in one go.
            if (!ObserverManager.ObserverWebAppDeployed ||
                (RunInterval > TimeSpan.MinValue &&
                 DateTime.Now.Subtract(LastRunDateTime) < RunInterval))
            {
                return;
            }

            token.ThrowIfCancellationRequested();

            try
            {
                ServiceFabricConfiguration config = ServiceFabricConfiguration.Instance;
                this.SFVersion = config.FabricVersion;
                this.SFBinRoot = config.FabricBinRoot;
                this.SFCompatibilityJsonPath = config.CompatibilityJsonPath;
                this.SFCodePath = config.FabricCodePath;
                this.SFDataRoot = config.FabricDataRoot;
                this.SFLogRoot  = config.FabricLogRoot;
                SFRootDir       = config.FabricRoot;
                this.SFEnableCircularTraceSession      = config.EnableCircularTraceSession;
                this.SFVolumeDiskServiceEnabled        = config.IsSFVolumeDiskServiceEnabled;
                this.unsupportedPreviewFeaturesEnabled = config.EnableUnsupportedPreviewFeatures;
                this.SFNodeLastBootTime = config.NodeLastBootUpTime;
            }
            catch (Exception e) when(e is ArgumentException || e is IOException)
            {
                HealthReporter.ReportFabricObserverServiceHealth(
                    FabricServiceContext.ServiceName.OriginalString,
                    ObserverName,
                    HealthState.Warning,
                    $"{NodeName} | Handled Exception, but failed to read registry value:\n{e}");
            }
            catch (Exception e)
            {
                HealthReporter.ReportFabricObserverServiceHealth(
                    FabricServiceContext.ServiceName.OriginalString,
                    ObserverName,
                    HealthState.Warning,
                    $"this.NodeName | Unhandled Exception trying to read registry value:\n{e}");

                throw;
            }

            token.ThrowIfCancellationRequested();

            await ReportAsync(token).ConfigureAwait(true);

            LastRunDateTime = DateTime.Now;
        }
Пример #11
0
        /// <summary>
        /// This is the main entry point for your service instance.
        /// </summary>
        /// <param name="cancellationToken">Canceled when Service Fabric needs to shut down this service instance.</param>
        protected override async Task RunAsync(CancellationToken cancellationToken)
        {
            Guid traceId = Guid.NewGuid();

            using (var healthReporter = new HealthReporter(traceId, this.logger, ComponentName, this.healthReporterId, this.HealthReportCallback, this.Context, HealthReporter.ReportTypes.Instance, this.configurationProvider.Config.HealthReportInterval))
            {
                await healthReporter.StartAsync(traceId, cancellationToken).ConfigureAwait(false);

                this.logger.RunAsyncInvoked(traceId, ComponentName, this.GetType().FullName);

                while (!cancellationToken.IsCancellationRequested && !this.unsupportedConfigurationChangeOccurred)
                {
                    await Task.Delay(250, cancellationToken).ConfigureAwait(false);
                }

                await healthReporter.StopAsync(traceId, cancellationToken).ConfigureAwait(false);
            }

            this.logger.Informational(traceId, ComponentName, "RunAsync completed.");
        }
        public override async Task ObserveAsync(CancellationToken token)
        {
            // If set, this observer will only run during the supplied interval.
            // See Settings.xml, CertificateObserverConfiguration section, RunInterval parameter for an example.
            if (RunInterval > TimeSpan.MinValue &&
                DateTime.Now.Subtract(LastRunDateTime) < RunInterval)
            {
                return;
            }

            this.stopwatch.Start();
            bool initialized = await InitializeAsync();

            Token = token;

            if (!initialized)
            {
                HealthReporter.ReportFabricObserverServiceHealth(
                    FabricServiceContext.ServiceName.OriginalString,
                    ObserverName,
                    HealthState.Warning,
                    "This observer was unable to initialize correctly due to missing configuration info.");

                this.stopwatch.Stop();
                this.stopwatch.Reset();

                return;
            }

            await MonitorDeployedAppsAsync(token).ConfigureAwait(false);

            // The time it took to get to ReportAsync.
            // For use in computing actual HealthReport TTL.
            this.stopwatch.Stop();
            RunDuration = this.stopwatch.Elapsed;
            this.stopwatch.Reset();

            await ReportAsync(token).ConfigureAwait(true);

            LastRunDateTime = DateTime.Now;
        }
Пример #13
0
        public void Dispose()
        {
            if (this.disposed)
            {
                return;
            }

            this.disposed = true;

            DisposeOf(this.inputSubscriptions);

            this.pipelineHead.Complete();
            this.pipelineHead.Completion.Wait(TimeSpan.FromMilliseconds(this.pipelineConfiguration.PipelineCompletionTimeoutMsec));

            this.cancellationTokenSource.Cancel();
            DisposeOf(this.pipelineDisposables);

            if (this.disposeDependencies)
            {
                DisposeOf(this.Inputs);
                DisposeOf(this.Sinks);
                HealthReporter.Dispose();
            }
        }
Пример #14
0
        protected virtual void PerformProcessSync(string specificTable)
        {
            base.ExecuteMethod("PerformProcessSync", delegate()
            {
                IFindClassTypes finder                  = this.IFoundation.Resolve <IFindClassTypes>();
                IEnumerable <Type> synchronizers        = FindInterfacesOfType(typeof(ISynchronizer), finder.GetAssemblies(null));
                List <ISynchronizer> synchronizersToRun = new List <ISynchronizer>();
                foreach (Type item in synchronizers)
                {
                    if (string.IsNullOrEmpty(specificTable) || item.Name.Contains(specificTable)) // not perfect.. but should be good enough, its just for dev ease anyway
                    {
                        base.IFoundation.LogWarning(string.Format("ElasticSearchDaemon.{0} Loading", item.ToString()));
                        if (!item.IsGenericTypeDefinition && item != typeof(ISynchronizer))
                        {
                            try
                            {
                                ISynchronizer synchronizer = this.IFoundation.Container.Resolve(item, string.Empty) as ISynchronizer;
                                base.IFoundation.LogWarning(string.Format("ElasticSearchDaemon.{0} Running", item.ToString()));
                                if (synchronizer != null)
                                {
                                    synchronizersToRun.Add(synchronizer);
                                }
                            }
                            catch
                            {
                                // gulp, can't resolve
                            }
                        }
                        else
                        {
                            base.IFoundation.LogWarning("ElasticSearchDaemon: " + item.ToString() + "is a generic or the base interface");
                        }
                    }
                }

                // order them
                synchronizersToRun = synchronizersToRun.OrderBy(x => x.Priority).ToList();

                // process them by bulk-priority
                while (synchronizersToRun.Count > 0)
                {
                    int priority = synchronizersToRun[0].Priority;
                    List <ISynchronizer> itemsWithPriority = synchronizersToRun.Where(x => x.Priority == priority).ToList();
                    List <Task> tasks = new List <Task>();

                    foreach (var synchronizer in itemsWithPriority)
                    {
                        synchronizersToRun.Remove(synchronizer);

                        tasks.Add(Task.Run(delegate()
                        {
                            try
                            {
                                using (var scope = HealthReporter.BeginTrack(HealthTrackType.DurationAverage, string.Format(HealthReporter.INDEXER_QUEUE_TIME_FORMAT, synchronizer.EntityName)))
                                {
#pragma warning disable 612, 618
                                    int count = synchronizer.PerformSynchronization(this.AgentName);
#pragma warning restore 612, 618
                                    if (count > 0)
                                    {
                                        HealthReporter.Current.UpdateMetric(HealthTrackType.Count, string.Format(HealthReporter.INDEXER_QUEUE_SIZE_FORMAT, synchronizer.EntityName), 0, count);
                                    }
                                }

                                base.IFoundation.LogWarning(string.Format("ElasticSearchDaemon.{0} Complete", synchronizer.ToString()));
                            }
                            catch (Exception ex)
                            {
                                base.IFoundation.LogError(ex, "PerformProcessSync" + synchronizer.GetType().ToString());
                                base.IFoundation.LogWarning(string.Format("ElasticSearchDaemon.{0} Error", synchronizer.ToString()));
                            }
                        }));
                    }

                    base.IFoundation.LogWarning(string.Format("ElasticSearchDaemon.Waiting"));
                    Task.WaitAll(tasks.ToArray());
                    base.IFoundation.LogWarning(string.Format("ElasticSearchDaemon.Done"));
                }
            });
        }
        public override Task ReportAsync(CancellationToken token)
        {
            var timeToLiveWarning = SetHealthReportTimeToLive();

            // Report on connection state.
            foreach (var config in this.userConfig)
            {
                token.ThrowIfCancellationRequested();

                foreach (var conn in this.connectionStatus.Where(cs => cs.TargetApp == config.TargetApp))
                {
                    token.ThrowIfCancellationRequested();

                    var connState = conn;

                    if (!connState.Connected)
                    {
                        this.healthState = HealthState.Warning;
                        var healthMessage = $"Outbound Internet connection failure detected for endpoint {connState.HostName}{Environment.NewLine}";

                        // Send Health Telemetry (perhaps it signals an Alert in AppInsights or LogAnalytics).
                        // This will also be serialied into the health event (Desf.
                        var telemetryData = new TelemetryData(FabricClientInstance, token)
                        {
                            ApplicationName        = conn.TargetApp,
                            Code                   = FOErrorWarningCodes.AppWarningNetworkEndpointUnreachable,
                            HealthState            = "Warning",
                            HealthEventDescription = healthMessage,
                            ObserverName           = ObserverName,
                            Metric                 = ErrorWarningProperty.InternetConnectionFailure,
                            NodeName               = NodeName,
                        };

                        if (IsTelemetryProviderEnabled && IsObserverTelemetryEnabled)
                        {
                            _ = TelemetryClient?.ReportMetricAsync(
                                telemetryData,
                                Token);
                        }

                        var report = new HealthReport
                        {
                            AppName                = new Uri(conn.TargetApp),
                            EmitLogEvent           = true,
                            HealthData             = telemetryData,
                            HealthMessage          = healthMessage,
                            HealthReportTimeToLive = timeToLiveWarning,
                            State      = this.healthState,
                            NodeName   = NodeName,
                            Observer   = ObserverName,
                            Property   = $"EndpointUnreachable({conn.HostName})",
                            ReportType = HealthReportType.Application,
                            ResourceUsageDataProperty = $"{ErrorWarningProperty.InternetConnectionFailure}: {connState.HostName}",
                        };

                        // Send health report Warning and log event locally.
                        HealthReporter.ReportHealthToServiceFabric(report);

                        // This means this observer created a Warning or Error SF Health Report
                        HasActiveFabricErrorOrWarning = true;

                        // ETW.
                        if (IsEtwEnabled)
                        {
                            Logger.EtwLogger?.Write(
                                ObserverConstants.FabricObserverETWEventName,
                                new
                            {
                                ApplicationName        = conn.TargetApp,
                                Code                   = FOErrorWarningCodes.AppWarningNetworkEndpointUnreachable,
                                HealthState            = "Warning",
                                HealthEventDescription = healthMessage,
                                ObserverName,
                                Metric = ErrorWarningProperty.InternetConnectionFailure,
                                NodeName,
                            });
                        }
                    }
                    else
                    {
                        if (connState.Health != HealthState.Warning ||
                            connState.Health != HealthState.Error)
                        {
                            continue;
                        }

                        this.healthState = HealthState.Ok;
                        var healthMessage = $"Outbound Internet connection successful for {connState?.HostName} from node {NodeName}.";

                        // Clear existing Health Warning.
                        var report = new HealthReport
                        {
                            AppName                = new Uri(conn.TargetApp),
                            Code                   = FOErrorWarningCodes.AppWarningNetworkEndpointUnreachable,
                            EmitLogEvent           = true,
                            HealthMessage          = healthMessage,
                            HealthReportTimeToLive = default(TimeSpan),
                            State                  = HealthState.Ok,
                            NodeName               = NodeName,
                            Observer               = ObserverName,
                            Property               = $"EndpointUnreachable({conn.HostName})",
                            ReportType             = HealthReportType.Application,
                        };

                        HealthReporter.ReportHealthToServiceFabric(report);

                        // Telemetry.
                        if (IsTelemetryProviderEnabled && IsObserverTelemetryEnabled)
                        {
                            var telemetryData = new TelemetryData(FabricClientInstance, token)
                            {
                                ApplicationName        = conn.TargetApp,
                                Code                   = FOErrorWarningCodes.Ok,
                                HealthState            = "Ok",
                                HealthEventDescription = healthMessage,
                                ObserverName           = ObserverName,
                                Metric                 = "Internet Connection State",
                                NodeName               = NodeName,
                            };

                            _ = TelemetryClient?.ReportMetricAsync(
                                telemetryData,
                                Token);
                        }

                        // ETW.
                        if (IsEtwEnabled)
                        {
                            Logger.EtwLogger?.Write(
                                ObserverConstants.FabricObserverETWEventName,
                                new
                            {
                                ApplicationName        = conn.TargetApp,
                                Code                   = FOErrorWarningCodes.Ok,
                                HealthState            = "Ok",
                                HealthEventDescription = healthMessage,
                                ObserverName,
                                Metric = "Internet Connection State",
                                NodeName,
                            });
                        }

                        // Reset health state.
                        HasActiveFabricErrorOrWarning = false;
                    }
                }
            }

            // Clear
            _ = this.connectionStatus.RemoveAll(conn => conn.Connected);
            this.connectionStatus.TrimExcess();
            this.connEndpointTestResults.Clear();

            return(Task.CompletedTask);
        }
        private async Task MonitorDeployedAppsAsync(CancellationToken token)
        {
            Process currentProcess = null;

            foreach (var repOrInst in ReplicaOrInstanceList)
            {
                token.ThrowIfCancellationRequested();

                var  timer = new Stopwatch();
                int  processId = (int)repOrInst.HostProcessId;
                var  cpuUsage = new CpuUsage();
                bool checkCpu = false, checkMemMb = false, checkMemPct = false, checkAllPorts = false, checkEphemeralPorts = false;
                var  application = this.deployedTargetList?.FirstOrDefault(
                    app => app?.TargetApp?.ToLower() == repOrInst.ApplicationName?.OriginalString?.ToLower() ||
                    app?.TargetAppType?.ToLower() == repOrInst.ApplicationTypeName?.ToLower());

                if (application?.TargetApp == null && application?.TargetAppType == null)
                {
                    continue;
                }

                try
                {
                    // App level.
                    currentProcess = Process.GetProcessById(processId);

                    token.ThrowIfCancellationRequested();

                    var    procName      = currentProcess.ProcessName;
                    string appNameOrType = GetAppNameOrType(repOrInst);

                    var id = $"{appNameOrType}:{procName}";

                    // Add new resource data structures for each app service process where the metric is specified in configuration for related observation.
                    if (this.AllAppCpuData.All(list => list.Id != id) && (application.CpuErrorLimitPercent > 0 || application.CpuWarningLimitPercent > 0))
                    {
                        this.AllAppCpuData.Add(new FabricResourceUsageData <double>(ErrorWarningProperty.TotalCpuTime, id, DataCapacity, UseCircularBuffer));
                    }

                    if (this.AllAppCpuData.Any(list => list.Id == id))
                    {
                        checkCpu = true;
                    }

                    if (this.AllAppMemDataMb.All(list => list.Id != id) && (application.MemoryErrorLimitMb > 0 || application.MemoryWarningLimitMb > 0))
                    {
                        this.AllAppMemDataMb.Add(new FabricResourceUsageData <float>(ErrorWarningProperty.TotalMemoryConsumptionMb, id, DataCapacity, UseCircularBuffer));
                    }

                    if (this.AllAppMemDataMb.Any(list => list.Id == id))
                    {
                        checkMemMb = true;
                    }

                    if (this.AllAppMemDataPercent.All(list => list.Id != id) && (application.MemoryErrorLimitPercent > 0 || application.MemoryWarningLimitPercent > 0))
                    {
                        this.AllAppMemDataPercent.Add(new FabricResourceUsageData <double>(ErrorWarningProperty.TotalMemoryConsumptionPct, id, DataCapacity, UseCircularBuffer));
                    }

                    if (this.AllAppMemDataPercent.Any(list => list.Id == id))
                    {
                        checkMemPct = true;
                    }

                    if (this.AllAppTotalActivePortsData.All(list => list.Id != id) && (application.NetworkErrorActivePorts > 0 || application.NetworkWarningActivePorts > 0))
                    {
                        this.AllAppTotalActivePortsData.Add(new FabricResourceUsageData <int>(ErrorWarningProperty.TotalActivePorts, id, 1));
                    }

                    if (this.AllAppTotalActivePortsData.Any(list => list.Id == id))
                    {
                        checkAllPorts = true;
                    }

                    if (this.AllAppEphemeralPortsData.All(list => list.Id != id) && (application.NetworkErrorEphemeralPorts > 0 || application.NetworkWarningEphemeralPorts > 0))
                    {
                        this.AllAppEphemeralPortsData.Add(new FabricResourceUsageData <int>(ErrorWarningProperty.TotalEphemeralPorts, id, 1));
                    }

                    if (this.AllAppEphemeralPortsData.Any(list => list.Id == id))
                    {
                        checkEphemeralPorts = true;
                    }

                    // Measure Total and Ephemeral ports.
                    if (checkAllPorts)
                    {
                        this.AllAppTotalActivePortsData.FirstOrDefault(x => x.Id == id).Data.Add(OperatingSystemInfoProvider.Instance.GetActivePortCount(currentProcess.Id, FabricServiceContext));
                    }

                    if (checkEphemeralPorts)
                    {
                        this.AllAppEphemeralPortsData.FirstOrDefault(x => x.Id == id).Data.Add(OperatingSystemInfoProvider.Instance.GetActiveEphemeralPortCount(currentProcess.Id, FabricServiceContext));
                    }

                    // No need to proceed further if no cpu and mem thresholds are specified in configuration.
                    if (!checkCpu && !checkMemMb && !checkMemPct)
                    {
                        continue;
                    }

                    /* CPU and Memory Usage */

                    TimeSpan duration = TimeSpan.FromSeconds(15);

                    if (MonitorDuration > TimeSpan.MinValue)
                    {
                        duration = MonitorDuration;
                    }

                    // Warm up the counters.
                    if (checkCpu)
                    {
                        _ = cpuUsage.GetCpuUsagePercentageProcess(currentProcess);
                    }

                    if (checkMemMb || checkMemPct)
                    {
                        _ = ProcessInfoProvider.Instance.GetProcessPrivateWorkingSetInMB(currentProcess.Id);
                    }

                    timer.Start();

                    while (!currentProcess.HasExited && timer.Elapsed.Seconds <= duration.Seconds)
                    {
                        token.ThrowIfCancellationRequested();

                        if (checkCpu)
                        {
                            // CPU (all cores).
                            double cpu = cpuUsage.GetCpuUsagePercentageProcess(currentProcess);

                            if (cpu >= 0)
                            {
                                if (cpu > 100)
                                {
                                    cpu = 100;
                                }

                                this.AllAppCpuData.FirstOrDefault(x => x.Id == id).Data.Add(cpu);
                            }
                        }

                        float processMem = 0;

                        if (checkMemMb || checkMemPct)
                        {
                            processMem = ProcessInfoProvider.Instance.GetProcessPrivateWorkingSetInMB(currentProcess.Id);
                        }

                        if (checkMemMb)
                        {
                            // Memory (private working set (process)).
                            this.AllAppMemDataMb.FirstOrDefault(x => x.Id == id).Data.Add(processMem);
                        }

                        if (checkMemPct)
                        {
                            // Memory (percent in use (total)).
                            var(TotalMemory, PercentInUse) = OperatingSystemInfoProvider.Instance.TupleGetTotalPhysicalMemorySizeAndPercentInUse();
                            long totalMem = TotalMemory;

                            if (totalMem > 0)
                            {
                                double usedPct = Math.Round(((double)(processMem * 100)) / (totalMem * 1024), 2);
                                this.AllAppMemDataPercent.FirstOrDefault(x => x.Id == id).Data.Add(Math.Round(usedPct, 1));
                            }
                        }

                        await Task.Delay(250, Token);
                    }

                    timer.Stop();
                    timer.Reset();
                }
                catch (Exception e)
                {
#if DEBUG
                    // DEBUG INFO
                    var healthReport = new Utilities.HealthReport
                    {
                        AppName       = repOrInst.ApplicationName,
                        HealthMessage = $"Error:{Environment.NewLine}{e}{Environment.NewLine}",
                        State         = HealthState.Ok,
                        Code          = FOErrorWarningCodes.Ok,
                        NodeName      = NodeName,
                        Observer      = ObserverName,
                        Property      = $"{e.Source}",
                        ReportType    = HealthReportType.Application,
                    };

                    HealthReporter.ReportHealthToServiceFabric(healthReport);
#endif
                    if (e is Win32Exception || e is ArgumentException || e is InvalidOperationException)
                    {
                        WriteToLogWithLevel(
                            ObserverName,
                            $"MonitorAsync failed to find current service process for {repOrInst.ApplicationName?.OriginalString ?? repOrInst.ApplicationTypeName}{Environment.NewLine}{e}",
                            LogLevel.Information);
                    }
                    else
                    {
                        if (!(e is OperationCanceledException || e is TaskCanceledException))
                        {
                            WriteToLogWithLevel(
                                ObserverName,
                                $"Unhandled exception in MonitorAsync:{Environment.NewLine}{e}",
                                LogLevel.Warning);
                        }

                        throw;
                    }
                }
                finally
                {
                    currentProcess?.Dispose();
                    currentProcess = null;
                }
            }
        }
        public override async Task ReportAsync(CancellationToken token)
        {
            token.ThrowIfCancellationRequested();

            var sb = new StringBuilder();

            _ = sb.AppendLine("\nService Fabric information:\n");

            if (!string.IsNullOrEmpty(this.SFVersion))
            {
                _ = sb.AppendLine("Runtime Version: " + this.SFVersion);
            }

            if (this.SFBinRoot != null)
            {
                _ = sb.AppendLine("Fabric Bin root directory: " + this.SFBinRoot);
            }

            if (this.SFCodePath != null)
            {
                _ = sb.AppendLine("Fabric Code Path: " + this.SFCodePath);
            }

            if (!string.IsNullOrEmpty(this.SFDataRoot))
            {
                _ = sb.AppendLine("Data root directory: " + this.SFDataRoot);
            }

            if (!string.IsNullOrEmpty(this.SFLogRoot))
            {
                _ = sb.AppendLine("Log root directory: " + this.SFLogRoot);
            }

            if (this.SFVolumeDiskServiceEnabled != null)
            {
                _ = sb.AppendLine("Volume Disk Service Enabled: " + this.SFVolumeDiskServiceEnabled);
            }

            if (this.unsupportedPreviewFeaturesEnabled != null)
            {
                _ = sb.AppendLine("Unsupported Preview Features Enabled: " + this.unsupportedPreviewFeaturesEnabled);
            }

            if (this.SFCompatibilityJsonPath != null)
            {
                _ = sb.AppendLine("Compatibility Json path: " + this.SFCompatibilityJsonPath);
            }

            if (this.SFEnableCircularTraceSession != null)
            {
                _ = sb.AppendLine("Enable Circular trace session: " + this.SFEnableCircularTraceSession);
            }

            _ = sb.Append(await GetDeployedAppsInfoAsync(token).ConfigureAwait(true));
            _ = sb.AppendLine();

            token.ThrowIfCancellationRequested();

            var logPath = Path.Combine(ObserverLogger.LogFolderBasePath, "SFInfraInfo.txt");

            // This file is used by the web application (ObserverWebApi).
            if (!ObserverLogger.TryWriteLogFile(logPath, sb.ToString()))
            {
                HealthReporter.ReportFabricObserverServiceHealth(
                    FabricServiceContext.ServiceName.OriginalString,
                    ObserverName,
                    HealthState.Warning,
                    "Unable to create SFInfraInfo.txt file.");
            }

            _ = sb.Clear();
        }
Пример #18
0
        public override Task ReportAsync(CancellationToken token)
        {
            try
            {
                token.ThrowIfCancellationRequested();

                if (CsvFileLogger != null && CsvFileLogger.EnableCsvLogging)
                {
                    var fileName = "CpuMemFirewallsPorts" + NodeName;

                    // Log (csv) system-wide CPU/Mem data.
                    CsvFileLogger.LogData(
                        fileName,
                        NodeName,
                        "CPU Time",
                        "Average",
                        Math.Round(AllCpuTimeData.AverageDataValue));

                    CsvFileLogger.LogData(
                        fileName,
                        NodeName,
                        "CPU Time",
                        "Peak",
                        Math.Round(AllCpuTimeData.MaxDataValue));

                    CsvFileLogger.LogData(
                        fileName,
                        NodeName,
                        "Committed Memory (MB)",
                        "Average",
                        Math.Round(this.allMemDataCommittedBytes.AverageDataValue));

                    CsvFileLogger.LogData(
                        fileName,
                        NodeName,
                        "Committed Memory (MB)",
                        "Peak",
                        Math.Round(this.allMemDataCommittedBytes.MaxDataValue));

                    CsvFileLogger.LogData(
                        fileName,
                        NodeName,
                        "All Active Ports",
                        "Total",
                        this.activePortsData.Data[0]);

                    CsvFileLogger.LogData(
                        fileName,
                        NodeName,
                        "Ephemeral Active Ports",
                        "Total",
                        this.ephemeralPortsData.Data[0]);

                    CsvFileLogger.LogData(
                        fileName,
                        NodeName,
                        "Firewall Rules",
                        "Total",
                        this.firewallData.Data[0]);

                    DataTableFileLogger.Flush();
                }

                // Report on the global health state (system-wide (node) metrics).
                // User-configurable in NodeObserver.config.json
                var timeToLiveWarning = SetHealthReportTimeToLive();

                // CPU
                if (AllCpuTimeData.AverageDataValue > 0)
                {
                    ProcessResourceDataReportHealth(
                        AllCpuTimeData,
                        CpuErrorUsageThresholdPct,
                        CpuWarningUsageThresholdPct,
                        timeToLiveWarning);
                }

                // Memory
                if (this.allMemDataCommittedBytes.AverageDataValue > 0)
                {
                    ProcessResourceDataReportHealth(
                        this.allMemDataCommittedBytes,
                        MemErrorUsageThresholdMb,
                        MemWarningUsageThresholdMb,
                        timeToLiveWarning);
                }

                if (this.allMemDataPercentUsed.AverageDataValue > 0)
                {
                    ProcessResourceDataReportHealth(
                        this.allMemDataPercentUsed,
                        MemoryErrorLimitPercent,
                        MemoryWarningLimitPercent,
                        timeToLiveWarning);
                }

                // Firewall rules
                ProcessResourceDataReportHealth(
                    this.firewallData,
                    FirewallRulesErrorThreshold,
                    FirewallRulesWarningThreshold,
                    timeToLiveWarning);

                // Ports - Active TCP
                ProcessResourceDataReportHealth(
                    this.activePortsData,
                    ActivePortsErrorThreshold,
                    ActivePortsWarningThreshold,
                    timeToLiveWarning);

                // Ports - Active Ephemeral TCP
                ProcessResourceDataReportHealth(
                    this.ephemeralPortsData,
                    EphemeralPortsErrorThreshold,
                    EphemeralPortsWarningThreshold,
                    timeToLiveWarning);

                return(Task.CompletedTask);
            }
            catch (AggregateException e) when(e.InnerException is OperationCanceledException || e.InnerException is TaskCanceledException || e.InnerException is TimeoutException)
            {
                return(Task.CompletedTask);
            }
            catch (Exception e)
            {
                HealthReporter.ReportFabricObserverServiceHealth(
                    FabricServiceContext.ServiceName.OriginalString,
                    ObserverName,
                    HealthState.Warning,
                    $"Unhandled exception re-thrown:{Environment.NewLine}{e}");

                throw;
            }
        }
        // This runs each time ObserveAsync is run to ensure that any new app targets and config changes will
        // be up to date across observer loop iterations.
        private async Task <bool> InitializeAsync()
        {
            if (ReplicaOrInstanceList == null)
            {
                ReplicaOrInstanceList = new List <ReplicaOrInstanceMonitoringInfo>();
            }

            if (!IsTestRun)
            {
                configSettings.Initialize(
                    FabricServiceContext.CodePackageActivationContext.GetConfigurationPackageObject(
                        ObserverConstants.ObserverConfigurationPackageName)?.Settings,
                    ConfigurationSectionName,
                    "AppObserverDataFileName");
            }

            // For unit tests, this path will be an empty string and not generate an exception.
            var appObserverConfigFileName = Path.Combine(
                ConfigPackagePath ?? string.Empty,
                configSettings.AppObserverConfigFileName ?? string.Empty);

            if (!File.Exists(appObserverConfigFileName))
            {
                WriteToLogWithLevel(
                    ObserverName,
                    $"Will not observe resource consumption as no configuration parameters have been supplied. | {NodeName}",
                    LogLevel.Information);

                return(false);
            }

            // This code runs each time ObserveAsync is called,
            // so clear app list and deployed replica/instance list in case a new app has been added to watch list.
            if (this.userTargetList.Count > 0)
            {
                this.userTargetList.Clear();
                ReplicaOrInstanceList.Clear();
            }

            if (this.deployedTargetList.Count > 0)
            {
                this.deployedTargetList.Clear();
            }

            using Stream stream = new FileStream(
                      appObserverConfigFileName,
                      FileMode.Open,
                      FileAccess.Read,
                      FileShare.Read);

            if (stream.Length > 0 &&
                JsonHelper.IsJson <List <ApplicationInfo> >(File.ReadAllText(appObserverConfigFileName)))
            {
                this.userTargetList.AddRange(JsonHelper.ReadFromJsonStream <ApplicationInfo[]>(stream));
            }

            // Are any of the config-supplied apps deployed?.
            if (this.userTargetList.Count == 0)
            {
                WriteToLogWithLevel(
                    ObserverName,
                    $"Will not observe resource consumption as no configuration parameters have been supplied. | {NodeName}",
                    LogLevel.Information);

                return(false);
            }

            int settingSFail = 0;

            foreach (var application in this.userTargetList)
            {
                if (string.IsNullOrWhiteSpace(application.TargetApp) &&
                    string.IsNullOrWhiteSpace(application.TargetAppType))
                {
                    HealthReporter.ReportFabricObserverServiceHealth(
                        FabricServiceContext.ServiceName.ToString(),
                        ObserverName,
                        HealthState.Warning,
                        $"Initialize() | {application.TargetApp}: Required setting, target, is not set.");

                    settingSFail++;

                    continue;
                }

                // No required settings supplied for deployed application(s).
                if (settingSFail == this.userTargetList.Count)
                {
                    return(false);
                }

                if (!string.IsNullOrEmpty(application.TargetAppType))
                {
                    await SetDeployedApplicationReplicaOrInstanceListAsync(
                        null,
                        application.TargetAppType).ConfigureAwait(false);
                }
                else
                {
                    await SetDeployedApplicationReplicaOrInstanceListAsync(new Uri(application.TargetApp))
                    .ConfigureAwait(false);
                }
            }

            foreach (var repOrInst in ReplicaOrInstanceList)
            {
                ObserverLogger.LogInfo(
                    $"Will observe resource consumption by {repOrInst.ApplicationName?.OriginalString} " +
                    $"on Node {NodeName}.");
            }

            return(true);
        }
Пример #20
0
        // Initialize() runs each time ObserveAsync is run to ensure
        // that any new app targets and config changes will
        // be up to date across observer loop iterations.
        private bool Initialize()
        {
            if (replicaOrInstanceList == null)
            {
                replicaOrInstanceList = new List <ReplicaOrInstanceMonitoringInfo>();
            }

            // Is this a unit test run?
            if (IsTestRun)
            {
                replicaOrInstanceList.Add(new ReplicaOrInstanceMonitoringInfo
                {
                    ApplicationName     = new Uri("fabric:/TestApp"),
                    PartitionId         = Guid.NewGuid(),
                    HostProcessId       = 0,
                    ReplicaOrInstanceId = default(long),
                });

                return(true);
            }

            ConfigSettings.Initialize(FabricServiceContext.CodePackageActivationContext.GetConfigurationPackageObject(ObserverConstants.ObserverConfigurationPackageName)?.Settings, ObserverConstants.AppObserverConfigurationSectionName, "AppObserverDataFileName");
            var appObserverConfigFileName = Path.Combine(configPackagePath, ConfigSettings.AppObserverDataFileName);

            if (!File.Exists(appObserverConfigFileName))
            {
                WriteToLogWithLevel(
                    ObserverName,
                    $"Will not observe resource consumption as no configuration parameters have been supplied. | {NodeName}",
                    LogLevel.Information);

                return(false);
            }

            // this code runs each time ObserveAsync is called,
            // so clear app list and deployed replica/instance list in case a new app has been added to watch list.
            if (targetList.Count > 0)
            {
                targetList.Clear();
                replicaOrInstanceList.Clear();
            }

            using (Stream stream = new FileStream(appObserverConfigFileName, FileMode.Open, FileAccess.Read, FileShare.Read))
            {
                if (stream.Length > 42 &&
                    JsonHelper.IsJson <List <ApplicationInfo> >(File.ReadAllText(appObserverConfigFileName)))
                {
                    targetList.AddRange(JsonHelper.ReadFromJsonStream <ApplicationInfo[]>(stream));
                }
            }

            // Are any of the config-supplied apps deployed?.
            if (targetList.Count == 0)
            {
                WriteToLogWithLevel(
                    ObserverName,
                    $"Will not observe resource consumption as no configuration parameters have been supplied. | {NodeName}",
                    LogLevel.Information);

                return(false);
            }

            int settingsFail = 0;

            foreach (var application in targetList)
            {
                if (string.IsNullOrWhiteSpace(application.Target) &&
                    string.IsNullOrWhiteSpace(application.TargetType))
                {
                    HealthReporter.ReportFabricObserverServiceHealth(
                        FabricServiceContext.ServiceName.ToString(),
                        ObserverName,
                        HealthState.Warning,
                        $"Initialize() | {application.Target}: Required setting, target, is not set.");

                    settingsFail++;

                    continue;
                }

                // No required settings supplied for deployed application(s).
                if (settingsFail == targetList.Count)
                {
                    return(false);
                }

                ObserverLogger.LogInfo(
                    $"Will observe resource consumption by {application.Target ?? application.TargetType} " +
                    $"on Node {NodeName}.");
            }

            return(true);
        }
 /// <summary>
 /// Initializes a new instance of the <see cref="LastExceptionHealthCheck"/> class.
 /// </summary>
 /// <param name="healthReporter">HealthReporter.</param>
 public LastExceptionHealthCheck(HealthReporter healthReporter)
 {
     this.healthReporter = healthReporter;
 }
Пример #22
0
        private async Task GetSystemCpuMemoryValuesAsync(CancellationToken token)
        {
            token.ThrowIfCancellationRequested();

            CpuUtilizationProvider cpuUtilizationProvider = null;

            try
            {
                // Ports.
                int activePortCountTotal    = OperatingSystemInfoProvider.Instance.GetActivePortCount();
                int ephemeralPortCountTotal = OperatingSystemInfoProvider.Instance.GetActiveEphemeralPortCount();
                this.activePortsData.Data.Add(activePortCountTotal);
                this.ephemeralPortsData.Data.Add(ephemeralPortCountTotal);

                // Firewall rules.
                int firewalls = NetworkUsage.GetActiveFirewallRulesCount();
                this.firewallData.Data.Add(firewalls);

                // CPU and Memory.
                // Note: Please make sure you understand the normal state of your nodes
                // with respect to the machine resource use and/or abuse by your service(s).
                // For example, if it is normal for your services to consume 90% of available CPU and memory
                // as part of the work they perform under normal traffic flow, then it doesn't make sense to warn or
                // error on these conditions.
                // TODO: Look into making this a long running background task with signaling.
                TimeSpan duration = TimeSpan.FromSeconds(10);

                if (MonitorDuration > TimeSpan.MinValue)
                {
                    duration = MonitorDuration;
                }

                cpuUtilizationProvider = CpuUtilizationProvider.Create();

                // Warm up the counters.
                _ = await cpuUtilizationProvider.NextValueAsync();

                while (this.stopwatch.Elapsed <= duration)
                {
                    token.ThrowIfCancellationRequested();

                    if (CpuWarningUsageThresholdPct > 0 &&
                        CpuWarningUsageThresholdPct <= 100)
                    {
                        AllCpuTimeData.Data.Add(await cpuUtilizationProvider.NextValueAsync());
                    }

                    if (MemWarningUsageThresholdMb > 0)
                    {
                        float committedMegaBytes = MemoryUsageProvider.Instance.GetCommittedBytes() / 1048576.0f;
                        this.allMemDataCommittedBytes.Data.Add(committedMegaBytes);
                    }

                    if (MemoryWarningLimitPercent > 0)
                    {
                        this.allMemDataPercentUsed.Data.Add(
                            OperatingSystemInfoProvider.Instance.TupleGetTotalPhysicalMemorySizeAndPercentInUse().PercentInUse);
                    }

                    await Task.Delay(250).ConfigureAwait(false);
                }
            }
            catch (AggregateException e) when(e.InnerException is OperationCanceledException || e.InnerException is TaskCanceledException || e.InnerException is TimeoutException)
            {
                return;
            }
            catch (Exception e)
            {
                HealthReporter.ReportFabricObserverServiceHealth(
                    FabricServiceContext.ServiceName.OriginalString,
                    ObserverName,
                    HealthState.Warning,
                    $"Unhandled exception in GetSystemCpuMemoryValuesAsync:{Environment.NewLine}{e}");

                throw;
            }
            finally
            {
                cpuUtilizationProvider?.Dispose();
            }
        }
        public override Task ReportAsync(CancellationToken token)
        {
            try
            {
                token.ThrowIfCancellationRequested();

                // OS Health.
                if (this.osStatus != null && !string.Equals(this.osStatus, "OK", StringComparison.OrdinalIgnoreCase))
                {
                    string healthMessage = $"OS reporting unhealthy: {this.osStatus}";
                    var    healthReport  = new HealthReport
                    {
                        Observer               = ObserverName,
                        NodeName               = NodeName,
                        HealthMessage          = healthMessage,
                        State                  = HealthState.Error,
                        HealthReportTimeToLive = SetHealthReportTimeToLive(),
                    };

                    HealthReporter.ReportHealthToServiceFabric(healthReport);

                    // This means this observer created a Warning or Error SF Health Report
                    HasActiveFabricErrorOrWarning = true;

                    // Send Health Report as Telemetry (perhaps it signals an Alert from App Insights, for example.).
                    if (IsTelemetryProviderEnabled && IsObserverTelemetryEnabled)
                    {
                        _ = TelemetryClient?.ReportHealthAsync(
                            HealthScope.Application,
                            FabricRuntime.GetActivationContext().ApplicationName,
                            HealthState.Error,
                            $"{NodeName} - OS reporting unhealthy: {this.osStatus}",
                            ObserverName,
                            Token);
                    }
                }
                else if (HasActiveFabricErrorOrWarning && string.Equals(this.osStatus, "OK", StringComparison.OrdinalIgnoreCase))
                {
                    // Clear Error or Warning with an OK Health Report.
                    string healthMessage = $"OS reporting healthy: {this.osStatus}";

                    var healthReport = new HealthReport
                    {
                        Observer               = ObserverName,
                        NodeName               = NodeName,
                        HealthMessage          = healthMessage,
                        State                  = HealthState.Ok,
                        HealthReportTimeToLive = default(TimeSpan),
                    };

                    HealthReporter.ReportHealthToServiceFabric(healthReport);

                    // Reset internal health state.
                    HasActiveFabricErrorOrWarning = false;
                }

                if (ObserverManager.ObserverWebAppDeployed)
                {
                    var logPath = Path.Combine(ObserverLogger.LogFolderBasePath, "SysInfo.txt");

                    // This file is used by the web application (log reader.).
                    if (!ObserverLogger.TryWriteLogFile(logPath, $"Last updated on {DateTime.UtcNow.ToString("M/d/yyyy HH:mm:ss")} UTC<br/>{this.osReport}"))
                    {
                        HealthReporter.ReportFabricObserverServiceHealth(
                            FabricServiceContext.ServiceName.OriginalString,
                            ObserverName,
                            HealthState.Warning,
                            "Unable to create SysInfo.txt file.");
                    }
                }

                var report = new HealthReport
                {
                    Observer               = ObserverName,
                    HealthMessage          = this.osReport,
                    State                  = HealthState.Ok,
                    NodeName               = NodeName,
                    HealthReportTimeToLive = SetHealthReportTimeToLive(),
                };

                HealthReporter.ReportHealthToServiceFabric(report);

                // Windows Update automatic download enabled?
                if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows) &&
                    this.isWindowsUpdateAutoDownloadEnabled)
                {
                    string linkText =
                        $"{Environment.NewLine}For clusters of Silver durability or above, " +
                        $"please consider <a href=\"https://docs.microsoft.com/azure/virtual-machine-scale-sets/virtual-machine-scale-sets-automatic-upgrade\" target=\"blank\">" +
                        $"enabling VMSS automatic OS image upgrades</a> to prevent unexpected VM reboots. " +
                        $"For Bronze durability clusters, please consider deploying the " +
                        $"<a href=\"https://docs.microsoft.com/azure/service-fabric/service-fabric-patch-orchestration-application\" target=\"blank\">Patch Orchestration Service</a>.";

                    string auServiceEnabledMessage = $"Windows Update Automatic Download is enabled.{linkText}";

                    report = new HealthReport
                    {
                        Observer               = ObserverName,
                        Property               = "OSConfiguration",
                        HealthMessage          = auServiceEnabledMessage,
                        State                  = HealthState.Warning,
                        NodeName               = NodeName,
                        HealthReportTimeToLive = SetHealthReportTimeToLive(),
                    };

                    HealthReporter.ReportHealthToServiceFabric(report);

                    if (IsTelemetryProviderEnabled &&
                        IsObserverTelemetryEnabled &&
                        RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
                    {
                        // Send Health Report as Telemetry (perhaps it signals an Alert from App Insights, for example.).
                        var telemetryData = new TelemetryData(FabricClientInstance, token)
                        {
                            HealthEventDescription = auServiceEnabledMessage,
                            HealthState            = "Warning",
                            Metric       = "WUAutoDownloadEnabled",
                            Value        = this.isWindowsUpdateAutoDownloadEnabled,
                            NodeName     = NodeName,
                            ObserverName = ObserverName,
                            Source       = ObserverConstants.FabricObserverName,
                        };

                        _ = TelemetryClient?.ReportMetricAsync(
                            telemetryData,
                            Token);
                    }

                    // ETW.
                    if (IsEtwEnabled && RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
                    {
                        Logger.EtwLogger?.Write(
                            ObserverConstants.FabricObserverETWEventName,
                            new
                        {
                            HealthState            = "Warning",
                            HealthEventDescription = auServiceEnabledMessage,
                            ObserverName,
                            Metric = "WUAutoDownloadEnabled",
                            Value  = this.isWindowsUpdateAutoDownloadEnabled,
                            NodeName,
                        });
                    }
                }

                if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
                {
                    // reset au globals for fresh detection during next observer run.
                    this.isWindowsUpdateAutoDownloadEnabled = false;
                    this.auStateUnknown       = false;
                    this.isWUADSettingEnabled = false;
                }

                return(Task.CompletedTask);
            }
            catch (Exception e)
            {
                HealthReporter.ReportFabricObserverServiceHealth(
                    FabricServiceContext.ServiceName.OriginalString,
                    ObserverName,
                    HealthState.Error,
                    $"Unhandled exception processing OS information:{Environment.NewLine}{e}");

                throw;
            }
        }
        private async Task <bool> InitializeAsync()
        {
            WriteToLogWithLevel(
                ObserverName,
                $"Initializing {ObserverName} for network monitoring. | {NodeName}",
                LogLevel.Information);

            this.cancellationToken.ThrowIfCancellationRequested();

            // This only needs to be logged once.
            // This file is used by the ObserverWebApi application.
            if (ObserverManager.ObserverWebAppDeployed && !this.hasRun)
            {
                var logPath = Path.Combine(ObserverLogger.LogFolderBasePath, "NetInfo.txt");

                Console.WriteLine($"logPath: {logPath}");

                if (!ObserverLogger.TryWriteLogFile(logPath, GetNetworkInterfaceInfo(this.cancellationToken)))
                {
                    HealthReporter.ReportFabricObserverServiceHealth(
                        FabricServiceContext.ServiceName.OriginalString,
                        ObserverName,
                        HealthState.Warning,
                        "Unable to create NetInfo.txt file.");
                }
            }

            // Is this a unit test run?
            if (IsTestRun)
            {
                return(true);
            }

            var settings =
                FabricServiceContext.CodePackageActivationContext.GetConfigurationPackageObject(
                    ObserverConstants.ObserverConfigurationPackageName)?.Settings;

            this.configSettings.Initialize(
                settings,
                ConfigurationSectionName,
                "NetworkObserverDataFileName");

            var networkObserverConfigFileName =
                Path.Combine(this.dataPackagePath, this.configSettings.NetworkObserverConfigFileName);

            if (string.IsNullOrWhiteSpace(networkObserverConfigFileName))
            {
                ObserverLogger.LogError(
                    "Endpoint list file is not specified. " +
                    "Please Add file containing endpoints that need to be monitored.");

                return(false);
            }

            if (!File.Exists(networkObserverConfigFileName))
            {
                ObserverLogger.LogError(
                    "Endpoint list file is not specified. " +
                    "Please Add file containing endpoints that need to be monitored.");

                return(false);
            }

            if (this.userConfig.Count == 0)
            {
                using (Stream stream = new FileStream(
                           networkObserverConfigFileName,
                           FileMode.Open,
                           FileAccess.Read,
                           FileShare.Read))
                {
                    var configs = JsonHelper.ReadFromJsonStream <NetworkObserverConfig[]>(stream);

                    foreach (var netConfig in configs)
                    {
                        var deployedApps = await FabricClientInstance.QueryManager.GetDeployedApplicationListAsync(
                            NodeName,
                            new Uri(netConfig.TargetApp)).ConfigureAwait(false);

                        if (deployedApps == null || deployedApps.Count < 1)
                        {
                            continue;
                        }

                        this.userConfig.Add(netConfig);
                    }
                }

                if (this.userConfig.Count == 0)
                {
                    HealthReporter.ReportFabricObserverServiceHealth(
                        FabricServiceContext.ServiceName.ToString(),
                        ObserverName,
                        HealthState.Warning,
                        "Missing required configuration data: endpoints.");

                    return(false);
                }
            }

            return(true);
        }
        private void InternetConnectionStateIsConnected()
        {
            var configList = this.defaultConfig;

            if (this.userConfig.Count > 0)
            {
                configList = this.userConfig;
            }

            foreach (var config in configList)
            {
                this.cancellationToken.ThrowIfCancellationRequested();

                foreach (var endpoint in config.Endpoints)
                {
                    if (string.IsNullOrEmpty(endpoint.HostName))
                    {
                        continue;
                    }

                    // Don't re-test endpoint if it has already been tested for a different targetApp.
                    if (this.connEndpointTestResults.ContainsKey(endpoint.HostName))
                    {
                        SetHealthState(endpoint, config.TargetApp, this.connEndpointTestResults[endpoint.HostName]);
                        continue;
                    }

                    bool passed = false;
                    this.cancellationToken.ThrowIfCancellationRequested();

                    // SQL Azure, other database services that are addressable over direct TCP.
                    if (endpoint.Protocol == DirectInternetProtocol.Tcp)
                    {
                        passed = TcpEndpointDoConnectionTest(endpoint.HostName, endpoint.Port);
                    }

                    // Default is http.
                    else
                    {
                        // Service REST endpoints, CosmosDB REST endpoint, etc.
                        // Http protocol means any enpoint/port pair that is addressable over HTTP/s.
                        // E.g., REST enpoints, etc.
                        try
                        {
                            this.cancellationToken.ThrowIfCancellationRequested();

                            ServicePointManager.SecurityProtocol = SecurityProtocolType.SystemDefault;
                            string prefix =
                                endpoint.Port == 443 ? "https://" : "http://";

                            if (endpoint.HostName.Contains("://"))
                            {
                                prefix = string.Empty;
                            }

                            var request = (HttpWebRequest)WebRequest.Create(
                                new Uri($"{prefix}{endpoint.HostName}:{endpoint.Port}"));

                            request.AuthenticationLevel = AuthenticationLevel.MutualAuthRequired;
                            request.ImpersonationLevel  = TokenImpersonationLevel.Impersonation;
                            request.Timeout             = 60000;
                            request.Method = "GET";

                            using var response = (HttpWebResponse)request.GetResponse();
                            var status = response.StatusCode;

                            // The target server responded with something.
                            // It doesn't really matter what it "said".
                            if (status == HttpStatusCode.OK || response?.Headers?.Count > 0)
                            {
                                passed = true;
                            }
                        }
                        catch (IOException ie)
                        {
                            if (ie.InnerException != null &&
                                ie.InnerException is ProtocolViolationException)
                            {
                                passed = true;
                            }
                        }
                        catch (WebException we)
                        {
                            if (we.Status == WebExceptionStatus.ProtocolError ||
                                we.Status == WebExceptionStatus.TrustFailure ||
                                we.Status == WebExceptionStatus.SecureChannelFailure ||
                                we.Response?.Headers?.Count > 0)
                            {
                                // Could not establish trust or server doesn't want to hear from you, or...
                                // Either way, the Server *responded*. It's reachable.
                                // You could always add code to grab your app or cluster certs from local store
                                // and apply it to the request. See CertificateObserver for how to get
                                // both your App cert(s) and Cluster cert. The goal of NetworkObserver is
                                // to test availability. Nothing more.
                                passed = true;
                            }
                            else if (we.Status == WebExceptionStatus.SendFailure &&
                                     we.InnerException != null &&
                                     (we.InnerException.Message.ToLower().Contains("authentication") ||
                                      we.InnerException.HResult == -2146232800))
                            {
                                passed = true;
                            }
                        }
                        catch (Exception e)
                        {
                            HealthReporter.ReportFabricObserverServiceHealth(
                                FabricServiceContext.ServiceName.OriginalString,
                                ObserverName,
                                HealthState.Warning,
                                e.ToString());

                            throw;
                        }
                    }

                    SetHealthState(endpoint, config.TargetApp, passed);

                    if (!this.connEndpointTestResults.ContainsKey(endpoint.HostName))
                    {
                        this.connEndpointTestResults.Add(endpoint.HostName, passed);
                    }
                }
            }
        }
Пример #26
0
        public override Task ReportAsync(CancellationToken token)
        {
            try
            {
                var timeToLiveWarning = SetHealthReportTimeToLive();

                // User-supplied Disk Space Usage % thresholds from Settings.xml.
                foreach (var data in this.DiskSpaceUsagePercentageData)
                {
                    token.ThrowIfCancellationRequested();
                    ProcessResourceDataReportHealth(
                        data,
                        DiskSpacePercentErrorThreshold,
                        DiskSpacePercentWarningThreshold,
                        timeToLiveWarning);
                }

                // User-supplied Average disk queue length thresholds from Settings.xml.
                foreach (var data in this.DiskAverageQueueLengthData)
                {
                    token.ThrowIfCancellationRequested();
                    ProcessResourceDataReportHealth(
                        data,
                        AverageQueueLengthErrorThreshold,
                        AverageQueueLengthWarningThreshold,
                        timeToLiveWarning);
                }

                /* For ETW Only - These calls will just produce ETW (note the thresholds). */
                if (IsEtwEnabled)
                {
                    // Disk Space Available
                    foreach (var data in this.DiskSpaceAvailableMbData)
                    {
                        token.ThrowIfCancellationRequested();
                        ProcessResourceDataReportHealth(
                            data,
                            0,
                            0,
                            timeToLiveWarning);
                    }

                    // Disk Space Total
                    foreach (var data in this.DiskSpaceTotalMbData)
                    {
                        token.ThrowIfCancellationRequested();
                        ProcessResourceDataReportHealth(
                            data,
                            0,
                            0,
                            timeToLiveWarning);
                    }
                }

                token.ThrowIfCancellationRequested();

                // This section only needs to run if you have the FabricObserverWebApi app installed.
                if (!ObserverManager.ObserverWebAppDeployed)
                {
                    return(Task.CompletedTask);
                }

                var diskInfoPath = Path.Combine(ObserverLogger.LogFolderBasePath, "disks.txt");

                _ = ObserverLogger.TryWriteLogFile(diskInfoPath, this.diskInfo.ToString());

                _ = this.diskInfo.Clear();

                return(Task.CompletedTask);
            }
            catch (AggregateException e) when(e.InnerException is OperationCanceledException || e.InnerException is TaskCanceledException || e.InnerException is TimeoutException)
            {
                return(Task.CompletedTask);
            }
            catch (Exception e)
            {
                HealthReporter.ReportFabricObserverServiceHealth(
                    FabricServiceContext.ServiceName.OriginalString,
                    ObserverName,
                    HealthState.Warning,
                    $"Unhandled exception in GetSystemCpuMemoryValuesAsync:{Environment.NewLine}{e}");

                throw;
            }
        }
        private async Task GetComputerInfoAsync(CancellationToken token)
        {
            var sb = new StringBuilder();
            int logicalProcessorCount = Environment.ProcessorCount;

            try
            {
                OSInfo osInfo = await OperatingSystemInfoProvider.Instance.GetOSInfoAsync(token);

                this.osStatus = osInfo.Status;

                // Active, bound ports.
                int activePorts = OperatingSystemInfoProvider.Instance.GetActivePortCount();

                // Active, ephemeral ports.
                int activeEphemeralPorts = OperatingSystemInfoProvider.Instance.GetActiveEphemeralPortCount();
                (int lowPortOS, int highPortOS) = OperatingSystemInfoProvider.Instance.TupleGetDynamicPortRange();
                string osEphemeralPortRange = string.Empty;
                string fabricAppPortRange   = string.Empty;

                string clusterManifestXml = IsTestRun ? File.ReadAllText(
                    TestManifestPath) : await FabricClientInstance.ClusterManager.GetClusterManifestAsync(
                    AsyncClusterOperationTimeoutSeconds, Token).ConfigureAwait(false);

                (int lowPortApp, int highPortApp) =
                    NetworkUsage.TupleGetFabricApplicationPortRangeForNodeType(
                        FabricServiceContext.NodeContext.NodeType,
                        clusterManifestXml);

                int firewalls = NetworkUsage.GetActiveFirewallRulesCount();

                // OS info.
                _ = sb.AppendLine("OS Information:\r\n");
                _ = sb.AppendLine($"Name: {osInfo.Name}");
                _ = sb.AppendLine($"Version: {osInfo.Version}");

                if (string.IsNullOrEmpty(osInfo.InstallDate))
                {
                    _ = sb.AppendLine($"InstallDate: {osInfo.InstallDate}");
                }

                _ = sb.AppendLine($"LastBootUpTime*: {osInfo.LastBootUpTime}");

                if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
                {
                    // WU AutoUpdate - Download enabled.
                    // If the config setting EnableWindowsAutoUpdateCheck is set to false, then don't add this info to sb.
                    if (this.isWUADSettingEnabled)
                    {
                        string auMessage = "WindowsUpdateAutoDownloadEnabled: ";

                        if (this.auStateUnknown)
                        {
                            auMessage += "Unknown";
                        }
                        else
                        {
                            auMessage += this.isWindowsUpdateAutoDownloadEnabled;
                        }
                        _ = sb.AppendLine(auMessage);
                    }

                    // Not supported for Linux.
                    _ = sb.AppendLine($"OSLanguage: {osInfo.Language}");
                    _ = sb.AppendLine($"OSHealthStatus*: {osInfo.Status}");
                }

                _ = sb.AppendLine($"NumberOfProcesses*: {osInfo.NumberOfProcesses}");

                if (lowPortOS > -1)
                {
                    osEphemeralPortRange = $"{lowPortOS} - {highPortOS}";
                    _ = sb.AppendLine($"OSEphemeralTCPPortRange: {osEphemeralPortRange} (Active*: {activeEphemeralPorts})");
                }

                if (lowPortApp > -1)
                {
                    fabricAppPortRange = $"{lowPortApp} - {highPortApp}";
                    _ = sb.AppendLine($"FabricApplicationTCPPortRange: {fabricAppPortRange}");
                }

                if (firewalls > -1)
                {
                    _ = sb.AppendLine($"ActiveFirewallRules*: {firewalls}");
                }

                if (activePorts > -1)
                {
                    _ = sb.AppendLine($"TotalActiveTCPPorts*: {activePorts}");
                }

                // Hardware info.
                // Proc/Mem
                _ = sb.AppendLine($"{Environment.NewLine}Hardware Information:{Environment.NewLine}");
                _ = sb.AppendLine($"LogicalProcessorCount: {logicalProcessorCount}");

                if (osInfo.TotalVirtualMemorySizeKB > 0)
                {
                    _ = sb.AppendLine($"TotalVirtualMemorySize: {osInfo.TotalVirtualMemorySizeKB / 1048576} GB");
                }

                if (osInfo.TotalVisibleMemorySizeKB > 0)
                {
                    _ = sb.AppendLine($"TotalVisibleMemorySize: {osInfo.TotalVisibleMemorySizeKB / 1048576} GB");
                }

                _ = sb.AppendLine($"FreePhysicalMemory*: {Math.Round(osInfo.AvailableMemoryKB / 1048576.0, 2)} GB");
                _ = sb.AppendLine($"FreeVirtualMemory*: {Math.Round(osInfo.FreeVirtualMemoryKB / 1048576.0, 2)} GB");

                // Disk
                var    drivesInformationTuple = DiskUsage.GetCurrentDiskSpaceTotalAndUsedPercentAllDrives(SizeUnit.Gigabytes);
                var    logicalDriveCount      = drivesInformationTuple.Count;
                string driveInfo = string.Empty;

                _ = sb.AppendLine($"LogicalDriveCount: {logicalDriveCount}");

                foreach (var(driveName, diskSize, percentConsumed) in drivesInformationTuple)
                {
                    string drvSize;

                    if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
                    {
                        string systemDrv = "Data";

                        if (string.Equals(Environment.SystemDirectory.Substring(0, 1), driveName.Substring(0, 1), StringComparison.OrdinalIgnoreCase))
                        {
                            systemDrv = "System";
                        }

                        drvSize = $"Drive {driveName} ({systemDrv}) Size: {diskSize} GB, Consumed*: {percentConsumed}%";
                    }
                    else
                    {
                        drvSize = $"Mount point: {driveName}, Size: {diskSize} GB, Consumed*: {percentConsumed}%";
                    }

                    _ = sb.AppendLine(drvSize);

                    driveInfo += $"{drvSize}{Environment.NewLine}";
                }

                string osHotFixes = string.Empty;

                if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
                {
                    osHotFixes = GetWindowsHotFixes(token);
                }

                if (!string.IsNullOrEmpty(osHotFixes))
                {
                    _ = sb.AppendLine($"\nWindows Patches/Hot Fixes*:\n\n{osHotFixes}");
                }

                // Dynamic info qualifier (*)
                _ = sb.AppendLine($"\n* Dynamic data.");

                this.osReport = sb.ToString();

                string hotFixes = string.Empty;

                // ETW.
                if (IsEtwEnabled)
                {
                    if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
                    {
                        hotFixes = GetWindowsHotFixes(token, generateUrl: false).Replace("\r\n", ", ").TrimEnd(',');
                    }

                    Logger.EtwLogger?.Write(
                        ObserverConstants.FabricObserverETWEventName,
                        new
                    {
                        HealthState       = "Ok",
                        Node              = NodeName,
                        Observer          = ObserverName,
                        OS                = osInfo.Name,
                        OSVersion         = osInfo.Version,
                        OSInstallDate     = osInfo.InstallDate,
                        AutoUpdateEnabled = this.auStateUnknown ? "Unknown" : this.isWindowsUpdateAutoDownloadEnabled.ToString(),
                        osInfo.LastBootUpTime,
                        WindowsAutoUpdateEnabled  = this.isWindowsUpdateAutoDownloadEnabled,
                        TotalMemorySizeGB         = (int)(osInfo.TotalVisibleMemorySizeKB / 1048576),
                        AvailablePhysicalMemoryGB = Math.Round(osInfo.FreePhysicalMemoryKB / 1048576.0, 2),
                        AvailableVirtualMemoryGB  = Math.Round(osInfo.FreeVirtualMemoryKB / 1048576.0, 2),
                        LogicalProcessorCount     = logicalProcessorCount,
                        LogicalDriveCount         = logicalDriveCount,
                        DriveInfo = driveInfo,
                        NumberOfRunningProcesses = osInfo.NumberOfProcesses,
                        ActiveFirewallRules      = firewalls,
                        ActivePorts             = activePorts,
                        ActiveEphemeralPorts    = activeEphemeralPorts,
                        WindowsDynamicPortRange = osEphemeralPortRange,
                        FabricAppPortRange      = fabricAppPortRange,
                        HotFixes = hotFixes,
                    });
                }

                // Telemetry
                if (IsTelemetryProviderEnabled && IsObserverTelemetryEnabled)
                {
                    if (string.IsNullOrEmpty(hotFixes) && RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
                    {
                        hotFixes = GetWindowsHotFixes(token, generateUrl: false).Replace("\r\n", ", ").TrimEnd(',');
                    }

                    TelemetryClient?.ReportMetricAsync(
                        new MachineTelemetryData
                    {
                        HealthState    = "Ok",
                        Node           = NodeName,
                        Observer       = ObserverName,
                        OS             = osInfo.Name,
                        OSVersion      = osInfo.Version,
                        OSInstallDate  = osInfo.InstallDate,
                        LastBootUpTime = osInfo.LastBootUpTime,
                        WindowsUpdateAutoDownloadEnabled = this.isWindowsUpdateAutoDownloadEnabled,
                        TotalMemorySizeGB         = (int)osInfo.TotalVisibleMemorySizeKB / 1048576,
                        AvailablePhysicalMemoryGB = Math.Round(osInfo.FreePhysicalMemoryKB / 1048576.0, 2),
                        AvailableVirtualMemoryGB  = Math.Round(osInfo.FreeVirtualMemoryKB / 1048576.0, 2),
                        LogicalProcessorCount     = logicalProcessorCount,
                        LogicalDriveCount         = logicalDriveCount,
                        DriveInfo = driveInfo,
                        NumberOfRunningProcesses = osInfo.NumberOfProcesses,
                        ActiveFirewallRules      = firewalls,
                        ActivePorts             = activePorts,
                        ActiveEphemeralPorts    = activeEphemeralPorts,
                        WindowsDynamicPortRange = osEphemeralPortRange,
                        FabricAppPortRange      = fabricAppPortRange,
                        HotFixes = hotFixes,
                    }, Token);
                }
            }
            catch (Exception e) when(e is FabricException || e is OperationCanceledException || e is TaskCanceledException || e is InvalidComObjectException)
            {
                HealthReporter.ReportFabricObserverServiceHealth(
                    FabricServiceContext.ServiceName.OriginalString,
                    ObserverName,
                    HealthState.Warning,
                    $"Handled Exception processing OS information:{Environment.NewLine}{e}");
            }
            catch (Exception e)
            {
                HealthReporter.ReportFabricObserverServiceHealth(
                    FabricServiceContext.ServiceName.OriginalString,
                    ObserverName,
                    HealthState.Error,
                    $"Unhandled Exception processing OS information:{Environment.NewLine}{e}");

                throw;
            }
        }
        private async Task MonitorDeployedAppsAsync(CancellationToken token)
        {
            Process currentProcess = null;

            foreach (var repOrInst in ReplicaOrInstanceList)
            {
                token.ThrowIfCancellationRequested();

                var timer     = new Stopwatch();
                int processId = (int)repOrInst.HostProcessId;
                var cpuUsage  = new CpuUsage();

                try
                {
                    // App level.
                    currentProcess = Process.GetProcessById(processId);

                    token.ThrowIfCancellationRequested();

                    var    procName      = currentProcess.ProcessName;
                    string appNameOrType = GetAppNameOrType(repOrInst);

                    var id = $"{appNameOrType}:{procName}";

                    // Add new resource data structures for each app service process.
                    if (this.allAppCpuData.All(list => list.Id != id))
                    {
                        this.allAppCpuData.Add(new FabricResourceUsageData <double>(ErrorWarningProperty.TotalCpuTime, id, DataCapacity, UseCircularBuffer));
                        this.allAppMemDataMb.Add(new FabricResourceUsageData <float>(ErrorWarningProperty.TotalMemoryConsumptionMb, id, DataCapacity, UseCircularBuffer));
                        this.allAppMemDataPercent.Add(new FabricResourceUsageData <double>(ErrorWarningProperty.TotalMemoryConsumptionPct, id, DataCapacity, UseCircularBuffer));
                        this.allAppTotalActivePortsData.Add(new FabricResourceUsageData <int>(ErrorWarningProperty.TotalActivePorts, id, 1));
                        this.allAppEphemeralPortsData.Add(new FabricResourceUsageData <int>(ErrorWarningProperty.TotalEphemeralPorts, id, 1));
                    }

                    TimeSpan duration = TimeSpan.FromSeconds(15);

                    if (MonitorDuration > TimeSpan.MinValue)
                    {
                        duration = MonitorDuration;
                    }

                    // Warm up the counters.
                    _ = cpuUsage.GetCpuUsagePercentageProcess(currentProcess);
                    _ = ProcessInfoProvider.Instance.GetProcessPrivateWorkingSetInMB(currentProcess.Id);

                    timer.Start();

                    while (!currentProcess.HasExited && timer.Elapsed.Seconds <= duration.Seconds)
                    {
                        token.ThrowIfCancellationRequested();

                        // CPU (all cores).
                        double cpu = cpuUsage.GetCpuUsagePercentageProcess(currentProcess);

                        if (cpu >= 0)
                        {
                            if (cpu > 100)
                            {
                                cpu = 100;
                            }

                            this.allAppCpuData.FirstOrDefault(x => x.Id == id).Data.Add(cpu);
                        }

                        // Memory (private working set (process)).
                        var processMem = ProcessInfoProvider.Instance.GetProcessPrivateWorkingSetInMB(currentProcess.Id);
                        this.allAppMemDataMb.FirstOrDefault(x => x.Id == id).Data.Add(processMem);

                        // Memory (percent in use (total)).
                        var(TotalMemory, PercentInUse) = OperatingSystemInfoProvider.Instance.TupleGetTotalPhysicalMemorySizeAndPercentInUse();
                        long totalMem = TotalMemory;

                        if (totalMem > -1)
                        {
                            double usedPct = Math.Round(((double)(processMem * 100)) / (totalMem * 1024), 2);
                            this.allAppMemDataPercent.FirstOrDefault(x => x.Id == id).Data.Add(Math.Round(usedPct, 1));
                        }

                        await Task.Delay(250, Token);
                    }

                    timer.Stop();
                    timer.Reset();

                    // Total and Ephemeral ports..
                    this.allAppTotalActivePortsData.FirstOrDefault(x => x.Id == id)
                    .Data.Add(OperatingSystemInfoProvider.Instance.GetActivePortCount(currentProcess.Id, FabricServiceContext));

                    this.allAppEphemeralPortsData.FirstOrDefault(x => x.Id == id)
                    .Data.Add(OperatingSystemInfoProvider.Instance.GetActiveEphemeralPortCount(currentProcess.Id, FabricServiceContext));
                }
                catch (Exception e)
                {
#if DEBUG
                    // DEBUG INFO
                    var healthReport = new Utilities.HealthReport
                    {
                        AppName       = repOrInst.ApplicationName,
                        HealthMessage = $"Error: {e}\n\n",
                        State         = HealthState.Ok,
                        Code          = FOErrorWarningCodes.Ok,
                        NodeName      = NodeName,
                        Observer      = ObserverName,
                        Property      = $"{e.Source}",
                        ReportType    = HealthReportType.Application,
                    };

                    HealthReporter.ReportHealthToServiceFabric(healthReport);
#endif
                    if (e is Win32Exception || e is ArgumentException || e is InvalidOperationException)
                    {
                        WriteToLogWithLevel(
                            ObserverName,
                            $"MonitorAsync failed to find current service process for {repOrInst.ApplicationName?.OriginalString ?? repOrInst.ApplicationTypeName}/n{e}",
                            LogLevel.Information);
                    }
                    else
                    {
                        if (!(e is OperationCanceledException || e is TaskCanceledException))
                        {
                            WriteToLogWithLevel(
                                ObserverName,
                                $"Unhandled exception in MonitorAsync: \n {e}",
                                LogLevel.Warning);
                        }

                        throw;
                    }
                }
                finally
                {
                    currentProcess?.Dispose();
                    currentProcess = null;
                }
            }
        }
Пример #29
0
        public override Task ReportAsync(CancellationToken token)
        {
            Token.ThrowIfCancellationRequested();

            // Informational report. For now, Linux is where we pay close attention to memory use by Fabric system services as there are still a few issues in that realm..
            var timeToLiveWarning     = SetHealthReportTimeToLive();
            var portInformationReport = new HealthReport
            {
                Observer      = ObserverName,
                NodeName      = NodeName,
                HealthMessage = $"Number of ports in use by Fabric services: {TotalActivePortCountAllSystemServices}{Environment.NewLine}" +
                                $"Number of ephemeral ports in use by Fabric services: {TotalActiveEphemeralPortCountAllSystemServices}{Environment.NewLine}" +
                                $"Fabric memory use MB: {this.allMemData.Where(x => x.Id == "Fabric")?.FirstOrDefault()?.AverageDataValue}{Environment.NewLine}" +
                                (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) ?
                                 $"FabricGateway memory use MB: {this.allMemData.Where(x => x.Id == "FabricGateway.exe")?.FirstOrDefault()?.AverageDataValue}{Environment.NewLine}" +
                                 $"FabricHost memory use MB: {this.allMemData.Where(x => x.Id == "FabricHost")?.FirstOrDefault()?.AverageDataValue}{Environment.NewLine}" : string.Empty),

                State = HealthState.Ok,
                HealthReportTimeToLive = timeToLiveWarning,
            };

            HealthReporter.ReportHealthToServiceFabric(portInformationReport);

            // Reset ports counters.
            TotalActivePortCountAllSystemServices          = 0;
            TotalActiveEphemeralPortCountAllSystemServices = 0;

            // CPU
            ProcessResourceDataList(
                this.allCpuData,
                CpuErrorUsageThresholdPct,
                CpuWarnUsageThresholdPct);

            // Memory
            ProcessResourceDataList(
                this.allMemData,
                MemErrorUsageThresholdMb,
                MemWarnUsageThresholdMb);

            // Ports - Active TCP
            ProcessResourceDataList(
                this.allActiveTcpPortData,
                ActiveTcpPortCountError,
                ActiveTcpPortCountWarning);

            // Ports - Ephemeral
            ProcessResourceDataList(
                this.allEphemeralTcpPortData,
                ActiveEphemeralPortCountError,
                ActiveEphemeralPortCountWarning);

            // Windows Event Log
            if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows) && ObserverManager.ObserverWebAppDeployed &&
                this.monitorWinEventLog)
            {
                // SF Eventlog Errors?
                // Write this out to a new file, for use by the web front end log viewer.
                // Format = HTML.
                int count   = this.evtRecordList.Count();
                var logPath = Path.Combine(ObserverLogger.LogFolderBasePath, "EventVwrErrors.txt");

                // Remove existing file.
                if (File.Exists(logPath))
                {
                    try
                    {
                        File.Delete(logPath);
                    }
                    catch (IOException)
                    {
                    }
                    catch (UnauthorizedAccessException)
                    {
                    }
                }

                if (count >= 10)
                {
                    var sb = new StringBuilder();

                    _ = sb.AppendLine("<br/><div><strong>" +
                                      "<a href='javascript:toggle(\"evtContainer\")'>" +
                                      "<div id=\"plus\" style=\"display: inline; font-size: 25px;\">+</div> " + count +
                                      " Error Events in ServiceFabric and System</a> " +
                                      "Event logs</strong>.<br/></div>");

                    _ = sb.AppendLine("<div id='evtContainer' style=\"display: none;\">");

                    foreach (var evt in this.evtRecordList.Distinct())
                    {
                        token.ThrowIfCancellationRequested();

                        try
                        {
                            // Access event properties:
                            _ = sb.AppendLine("<div>" + evt.LogName + "</div>");
                            _ = sb.AppendLine("<div>" + evt.LevelDisplayName + "</div>");
                            if (evt.TimeCreated.HasValue)
                            {
                                _ = sb.AppendLine("<div>" + evt.TimeCreated.Value.ToShortDateString() + "</div>");
                            }

                            foreach (var prop in evt.Properties)
                            {
                                if (prop.Value != null && Convert.ToString(prop.Value).Length > 0)
                                {
                                    _ = sb.AppendLine("<div>" + prop.Value + "</div>");
                                }
                            }
                        }
                        catch (EventLogException)
                        {
                        }
                    }

                    _ = sb.AppendLine("</div>");

                    _ = ObserverLogger.TryWriteLogFile(logPath, sb.ToString());
                    _ = sb.Clear();
                }

                // Clean up.
                if (count > 0)
                {
                    this.evtRecordList.Clear();
                }
            }

            ClearDataContainers();

            return(Task.CompletedTask);
        }
        // Windows process dmp creator.
        public bool DumpServiceProcess(int processId, DumpType dumpType = DumpType.Full)
        {
            if (string.IsNullOrEmpty(this.dumpsPath))
            {
                return(false);
            }

            string processName = string.Empty;

            NativeMethods.MINIDUMP_TYPE miniDumpType;

            switch (dumpType)
            {
            case DumpType.Full:
                miniDumpType = NativeMethods.MINIDUMP_TYPE.MiniDumpWithFullMemory |
                               NativeMethods.MINIDUMP_TYPE.MiniDumpWithFullMemoryInfo |
                               NativeMethods.MINIDUMP_TYPE.MiniDumpWithHandleData |
                               NativeMethods.MINIDUMP_TYPE.MiniDumpWithThreadInfo |
                               NativeMethods.MINIDUMP_TYPE.MiniDumpWithUnloadedModules;
                break;

            case DumpType.MiniPlus:
                miniDumpType = NativeMethods.MINIDUMP_TYPE.MiniDumpWithPrivateReadWriteMemory |
                               NativeMethods.MINIDUMP_TYPE.MiniDumpWithDataSegs |
                               NativeMethods.MINIDUMP_TYPE.MiniDumpWithHandleData |
                               NativeMethods.MINIDUMP_TYPE.MiniDumpWithFullMemoryInfo |
                               NativeMethods.MINIDUMP_TYPE.MiniDumpWithThreadInfo |
                               NativeMethods.MINIDUMP_TYPE.MiniDumpWithUnloadedModules;
                break;

            case DumpType.Mini:
                miniDumpType = NativeMethods.MINIDUMP_TYPE.MiniDumpWithIndirectlyReferencedMemory |
                               NativeMethods.MINIDUMP_TYPE.MiniDumpScanMemory;
                break;

            default:
                throw new ArgumentOutOfRangeException(nameof(dumpType), dumpType, null);
            }

            try
            {
                // This is to ensure friendly-name of resulting dmp file.
                processName = Process.GetProcessById(processId).ProcessName;

                if (string.IsNullOrEmpty(processName))
                {
                    return(false);
                }

                IntPtr processHandle = Process.GetProcessById(processId).Handle;

                processName += "_" + DateTime.Now.ToString("ddMMyyyyHHmmss") + ".dmp";

                // Check disk space availability before writing dump file.

                // This will not work on Linux
                string driveName = this.dumpsPath.Substring(0, 2);
                if (DiskUsage.GetCurrentDiskSpaceUsedPercent(driveName) > 90)
                {
                    HealthReporter.ReportFabricObserverServiceHealth(
                        FabricServiceContext.ServiceName.OriginalString,
                        ObserverName,
                        HealthState.Warning,
                        "Not enough disk space available for dump file creation.");
                    return(false);
                }

                using (var file = File.Create(Path.Combine(this.dumpsPath, processName)))
                {
                    if (!NativeMethods.MiniDumpWriteDump(
                            processHandle,
                            (uint)processId,
                            file.SafeFileHandle,
                            miniDumpType,
                            IntPtr.Zero,
                            IntPtr.Zero,
                            IntPtr.Zero))
                    {
                        throw new Win32Exception(Marshal.GetLastWin32Error());
                    }
                }

                return(true);
            }
            catch (Exception e) when(e is ArgumentException || e is InvalidOperationException || e is Win32Exception)
            {
                ObserverLogger.LogInfo(
                    $"Unable to generate dump file {processName} with error{Environment.NewLine}{e}");
            }

            return(false);
        }