private async Task <List <ReplicaMonitoringInfo> > GetDeployedPrimaryReplicaAsync(Uri appName, ServiceList services)
        {
            var deployedReplicaList = await this.FabricClientInstance.QueryManager.GetDeployedReplicaListAsync(this.NodeName, appName).ConfigureAwait(true);

            var replicaMonitoringList = new List <ReplicaMonitoringInfo>();

            foreach (var deployedReplica in deployedReplicaList)
            {
                if (deployedReplica is DeployedStatefulServiceReplica statefulReplica)
                {
                    if (statefulReplica.ReplicaRole == ReplicaRole.Primary &&
                        services.Any(s => s.ServiceName == statefulReplica.ServiceName))
                    {
                        var replicaInfo = new ReplicaMonitoringInfo()
                        {
                            ApplicationName      = appName,
                            ReplicaHostProcessId = statefulReplica.HostProcessId,
                            ReplicaOrInstanceId  = statefulReplica.ReplicaId,
                            Partitionid          = statefulReplica.Partitionid,
                        };

                        replicaMonitoringList.Add(replicaInfo);

                        continue;
                    }
                }

                if (deployedReplica is DeployedStatelessServiceInstance statelessReplica &&
                    services.Any(s => s.ServiceName == statelessReplica.ServiceName))
                {
                    var replicaInfo = new ReplicaMonitoringInfo()
                    {
                        ApplicationName      = appName,
                        ReplicaHostProcessId = statelessReplica.HostProcessId,
                        ReplicaOrInstanceId  = statelessReplica.InstanceId,
                        Partitionid          = statelessReplica.Partitionid,
                    };

                    replicaMonitoringList.Add(replicaInfo);

                    continue;
                }
            }

            return(replicaMonitoringList);
        }
Esempio n. 2
0
        public void ProcessResourceDataReportHealth <T>(
            FabricResourceUsageData <T> data,
            T thresholdError,
            T thresholdWarning,
            TimeSpan healthReportTtl,
            HealthReportType healthReportType = HealthReportType.Node,
            string app = null,
            ReplicaMonitoringInfo replicaOrInstance = null,
            bool dumpOnError = false)
        {
            if (data == null)
            {
                throw new ArgumentException("Supply all required parameters with non-null value...");
            }

            var    thresholdName = "Minimum";
            bool   warningOrError = false;
            string repPartitionId = null, repOrInstanceId = null, name = null, id = null, procName = null;
            T      threshold   = thresholdWarning;
            var    healthState = HealthState.Ok;
            Uri    appName     = null;

            if (replicaOrInstance != null)
            {
                repPartitionId  = $"Partition: {replicaOrInstance.Partitionid}";
                repOrInstanceId = $"Replica: {replicaOrInstance.ReplicaOrInstanceId}";
                procName        = Process.GetProcessById((int)replicaOrInstance.ReplicaHostProcessId)?.ProcessName;
            }

            // Create a unique node id which may be used in the case of warnings or OK clears...
            if (app != null)
            {
                appName = new Uri(app);
                name    = app.Replace("fabric:/", string.Empty);
                id      = name + "_" + data.Property.Replace(" ", string.Empty);
            }

            // Telemetry...
            if (this.IsTelemetryEnabled)
            {
                _ = this.ObserverTelemetryClient?.ReportMetricAsync($"{this.NodeName}-{app}-{data.Id}-{data.Property}", data.AverageDataValue, this.Token);
            }

            // ETW...
            if (this.IsEtwEnabled)
            {
                Logger.EtwLogger?.Write(
                    $"FabricObserverDataEvent",
                    new
                {
                    Level    = 0,  // Info
                    Node     = this.NodeName,
                    Observer = this.ObserverName,
                    Property = data.Property,
                    Id       = data.Id,
                    Value    = $"{Math.Round(data.AverageDataValue)}",
                    Unit     = data.Units,
                });
            }

            // Health Error
            if (data.IsUnhealthy(thresholdError))
            {
                thresholdName  = "Maximum";
                threshold      = thresholdError;
                warningOrError = true;
                healthState    = HealthState.Error;

                // This is primarily useful for AppObserver, but makes sense to be
                // part of the base class for future use, like for FSO...
                if (replicaOrInstance != null && procName != null && dumpOnError)
                {
                    try
                    {
                        int procId = (int)replicaOrInstance.ReplicaHostProcessId;

                        if (!this.serviceDumpCountDictionary.ContainsKey(procName))
                        {
                            this.serviceDumpCountDictionary.Add(procName, 0);
                        }

                        if (this.serviceDumpCountDictionary[procName] < this.maxDumps)
                        {
                            // DumpServiceProcess defaults to a Full dump with
                            // process memory, handles and thread data...
                            bool success = this.DumpServiceProcess(procId);

                            if (success)
                            {
                                this.serviceDumpCountDictionary[procName]++;
                            }
                        }
                    }

                    // Ignore these, it just means no dmp will be created.This is not
                    // critical to FO... Log as info, not warning...
                    catch (ArgumentException ae)
                    {
                        this.ObserverLogger.LogInfo($"Unable to generate dmp file:\n{ae.ToString()}");
                    }
                    catch (InvalidOperationException ioe)
                    {
                        this.ObserverLogger.LogInfo($"Unable to generate dmp file:\n{ioe.ToString()}");
                    }
                }
            }

            // Health Warning
            if (!warningOrError && data.IsUnhealthy(thresholdWarning))
            {
                warningOrError = true;
                healthState    = HealthState.Warning;
            }

            if (warningOrError)
            {
                string errorWarningKind = null;

                if (data.Property.ToLower().Contains("cpu"))
                {
                    errorWarningKind = (healthState == HealthState.Error) ? ErrorWarningCode.ErrorCpuTime : ErrorWarningCode.WarningCpuTime;
                }
                else if (data.Property.ToLower().Contains("disk space"))
                {
                    errorWarningKind = (healthState == HealthState.Error) ? ErrorWarningCode.ErrorDiskSpace : ErrorWarningCode.WarningDiskSpace;
                }
                else if (data.Property == "Memory Consumption MB")
                {
                    errorWarningKind = (healthState == HealthState.Error) ? ErrorWarningCode.ErrorMemoryCommitted : ErrorWarningCode.WarningMemoryCommitted;
                }
                else if (data.Property == "Memory Consumption %")
                {
                    errorWarningKind = (healthState == HealthState.Error) ? ErrorWarningCode.ErrorMemoryPercentUsed : ErrorWarningCode.WarningMemoryPercentUsed;
                }
                else if (data.Property.Contains("Read"))
                {
                    errorWarningKind = (healthState == HealthState.Error) ? ErrorWarningCode.ErrorDiskIoReads : ErrorWarningCode.WarningDiskIoReads;
                }
                else if (data.Property.Contains("Write"))
                {
                    errorWarningKind = (healthState == HealthState.Error) ? ErrorWarningCode.ErrorDiskIoWrites : ErrorWarningCode.WarningDiskIoWrites;
                }
                else if (data.Property.Contains("Queue"))
                {
                    errorWarningKind = (healthState == HealthState.Error) ? ErrorWarningCode.ErrorDiskAverageQueueLength : ErrorWarningCode.WarningDiskAverageQueueLength;
                }
                else if (data.Property.Contains("Firewall"))
                {
                    errorWarningKind = (healthState == HealthState.Error) ? ErrorWarningCode.ErrorTooManyFirewallRules : ErrorWarningCode.WarningTooManyFirewallRules;
                }
                else if (data.Property.Contains("Ports"))
                {
                    errorWarningKind = (healthState == HealthState.Error) ? ErrorWarningCode.ErrorTooManyActivePorts : ErrorWarningCode.WarningTooManyActivePorts;
                }

                var healthMessage = new StringBuilder();

                if (name != null)
                {
                    healthMessage.Append($"{name} (Service Process: {procName}, {repPartitionId}, {repOrInstanceId}): ");
                }

                healthMessage.Append($"{data.Property} is at or above the specified {thresholdName} limit ({threshold}{data.Units})");
                healthMessage.AppendLine($" - Average {data.Property}: {Math.Round(data.AverageDataValue)}{data.Units}");

                // Set internal fabric health states...
                data.ActiveErrorOrWarning = true;

                // This means this observer created a Warning or Error SF Health Report
                this.HasActiveFabricErrorOrWarning = true;

                var healthReport = new Utilities.HealthReport
                {
                    AppName                = appName,
                    Code                   = errorWarningKind,
                    EmitLogEvent           = true,
                    HealthMessage          = healthMessage.ToString(),
                    HealthReportTimeToLive = healthReportTtl,
                    ReportType             = healthReportType,
                    State                  = healthState,
                    NodeName               = this.NodeName,
                    Observer               = this.ObserverName,
                };

                // Emit a Fabric Health Report and optionally a local log write...
                this.HealthReporter.ReportHealthToServiceFabric(healthReport);

                // Send Health Report as Telemetry event (perhaps it signals an Alert from App Insights, for example...)...
                if (this.IsTelemetryEnabled)
                {
                    _ = this.ObserverTelemetryClient?.ReportHealthAsync(
                        id,
                        this.FabricServiceContext.ServiceName.OriginalString,
                        "FabricObserver",
                        this.ObserverName,
                        $"{this.NodeName}/{errorWarningKind}/{data.Property}/{Math.Round(data.AverageDataValue)}",
                        healthState,
                        this.Token);
                }

                // ETW...
                if (this.IsEtwEnabled)
                {
                    Logger.EtwLogger?.Write(
                        $"FabricObserverDataEvent",
                        new
                    {
                        Level                  = (healthState == HealthState.Warning) ? 1 : 2,
                        Node                   = this.NodeName,
                        Observer               = this.ObserverName,
                        HealthEventErrorCode   = errorWarningKind,
                        HealthEventDescription = healthMessage.ToString(),
                        Property               = data.Property,
                        Id    = data.Id,
                        Value = $"{Math.Round(data.AverageDataValue)}",
                        Unit  = data.Units,
                    });
                }

                // Clean up sb...
                healthMessage.Clear();
            }
            else
            {
                if (data.ActiveErrorOrWarning)
                {
                    Utilities.HealthReport report = new Utilities.HealthReport
                    {
                        AppName                = appName,
                        EmitLogEvent           = true,
                        HealthMessage          = $"{data.Id}: {data.Property} is now within normal/expected range.",
                        HealthReportTimeToLive = default(TimeSpan),
                        ReportType             = healthReportType,
                        State    = HealthState.Ok,
                        NodeName = this.NodeName,
                        Observer = this.ObserverName,
                    };

                    // Emit an Ok Health Report to clear Fabric Health warning...
                    this.HealthReporter.ReportHealthToServiceFabric(report);

                    // Reset health states...
                    data.ActiveErrorOrWarning          = false;
                    this.HasActiveFabricErrorOrWarning = false;
                }
            }

            // No need to keep data in memory...
            data.Data.Clear();
            data.Data.TrimExcess();
        }