private void LoadAppDefinitions() { char[] splitters = new char[] { '\\', '/' }; foreach (string app in Directory.GetFiles(Folders.appConfigRelativeDir)) { //char[] spliter = "\\".ToCharArray(); string[] tmp = app.Split(splitters); tmp = tmp[tmp.Length - 1].Split('.'); string name = tmp[0]; appDefinitions.Add(name, app); AppNames.Add(name); } selectedAppName = AppNames[0]; NotifyOfPropertyChange(() => AppNames); }
/// <summary> /// This function processes numeric data held in FRUD instances and generates Application or Node level Health Reports depending on supplied thresholds. /// </summary> /// <typeparam name="T">This represents the numeric type of data this function will operate on.</typeparam> /// <param name="data">FabricResourceUsageData instance.</param> /// <param name="thresholdError">Error threshold (numeric)</param> /// <param name="thresholdWarning">Warning threshold (numeric)</param> /// <param name="healthReportTtl">Health report Time to Live (TimeSpan)</param> /// <param name="healthReportType">HealthReport type. Note, only Application and Node health report types are supported.</param> /// <param name="replicaOrInstance">Replica or Instance information contained in a type.</param> /// <param name="dumpOnError">Wheter or not to dump process if Error threshold has been reached.</param> public void ProcessResourceDataReportHealth <T>( FabricResourceUsageData <T> data, T thresholdError, T thresholdWarning, TimeSpan healthReportTtl, HealthReportType healthReportType = HealthReportType.Node, ReplicaOrInstanceMonitoringInfo replicaOrInstance = null, bool dumpOnError = false) where T : struct { if (data == null) { throw new ArgumentException("Supply all required parameters with non-null value."); } if (healthReportType != HealthReportType.Application && healthReportType != HealthReportType.Node) { this.ObserverLogger.LogWarning($"ProcessResourceDataReportHealth: Unsupported HealthReport type -> {Enum.GetName(typeof(HealthReportType), healthReportType)}"); return; } var thresholdName = "Minimum"; bool warningOrError = false; string repPartitionId = null, repOrInstanceId = null, name = null, id = null, procName = null; T threshold = thresholdWarning; var healthState = HealthState.Ok; Uri appName = null; Uri serviceName = null; TelemetryData telemetryData = null; if (healthReportType == HealthReportType.Application) { if (replicaOrInstance != null) { repPartitionId = $"Partition: {replicaOrInstance.PartitionId}"; repOrInstanceId = $"Replica: {replicaOrInstance.ReplicaOrInstanceId}"; // Create a unique id which will be used for health Warnings and OKs (clears). appName = replicaOrInstance.ApplicationName; serviceName = replicaOrInstance.ServiceName; name = appName.OriginalString.Replace("fabric:/", string.Empty); } else { appName = new Uri("fabric:/System"); name = data.Id; } id = name + "_" + data.Property.Replace(" ", string.Empty); // The health event description will be a serialized instance of telemetryData, // so it should be completely constructed (filled with data) regardless // of user telemetry settings. telemetryData = new TelemetryData(FabricClientInstance, Token) { ApplicationName = appName?.OriginalString ?? string.Empty, Code = FOErrorWarningCodes.Ok, HealthState = Enum.GetName(typeof(HealthState), HealthState.Ok), NodeName = NodeName, ObserverName = ObserverName, Metric = data.Property, Value = Math.Round(data.AverageDataValue, 1), PartitionId = replicaOrInstance?.PartitionId.ToString(), ReplicaId = replicaOrInstance?.ReplicaOrInstanceId.ToString(), ServiceName = serviceName?.OriginalString ?? string.Empty, Source = ObserverConstants.FabricObserverName, }; try { if (replicaOrInstance != null && replicaOrInstance.HostProcessId > 0) { procName = Process.GetProcessById((int)replicaOrInstance.HostProcessId).ProcessName; } else { // The name of the target service process is always the id for data containers coming from FSO. procName = data.Id; } telemetryData.ServiceName = procName; if (IsTelemetryProviderEnabled && IsObserverTelemetryEnabled) { _ = TelemetryClient?.ReportMetricAsync( telemetryData, Token).ConfigureAwait(false); } if (IsEtwEnabled) { Logger.EtwLogger?.Write( ObserverConstants.FabricObserverETWEventName, new { ApplicationName = appName?.OriginalString ?? string.Empty, Code = FOErrorWarningCodes.Ok, HealthState = Enum.GetName(typeof(HealthState), HealthState.Ok), NodeName, ObserverName, Metric = data.Property, Value = Math.Round(data.AverageDataValue, 1), PartitionId = replicaOrInstance?.PartitionId.ToString(), ReplicaId = replicaOrInstance?.ReplicaOrInstanceId.ToString(), ServiceName = procName, Source = ObserverConstants.FabricObserverName, }); } } catch (ArgumentException) { return; } catch (InvalidOperationException) { return; } } else { string drive = string.Empty; if (ObserverName == ObserverConstants.DiskObserverName) { drive = $"{data.Id}: "; if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { drive = $"{data.Id.Remove(1, 2)}: "; } } // The health event description will be a serialized instance of telemetryData, // so it should be completely constructed (filled with data) regardless // of user telemetry settings. telemetryData = new TelemetryData(FabricClientInstance, Token) { Code = FOErrorWarningCodes.Ok, HealthState = Enum.GetName(typeof(HealthState), HealthState.Ok), NodeName = NodeName, ObserverName = ObserverName, Metric = $"{drive}{data.Property}", Source = ObserverConstants.FabricObserverName, Value = Math.Round(data.AverageDataValue, 1), }; if (IsTelemetryProviderEnabled && IsObserverTelemetryEnabled) { _ = TelemetryClient?.ReportMetricAsync( telemetryData, Token); } if (IsEtwEnabled) { Logger.EtwLogger?.Write( ObserverConstants.FabricObserverETWEventName, new { Code = FOErrorWarningCodes.Ok, HealthState = Enum.GetName(typeof(HealthState), HealthState.Ok), NodeName, ObserverName, Metric = $"{drive}{data.Property}", Source = ObserverConstants.FabricObserverName, Value = Math.Round(data.AverageDataValue, 1), }); } } // Health Error if (data.IsUnhealthy(thresholdError)) { thresholdName = "Maximum"; threshold = thresholdError; warningOrError = true; healthState = HealthState.Error; // This is primarily useful for AppObserver, but makes sense to be // part of the base class for future use, like for FSO. if (replicaOrInstance != null && dumpOnError) { try { int procId = (int)replicaOrInstance.HostProcessId; if (!this.serviceDumpCountDictionary.ContainsKey(procName)) { this.serviceDumpCountDictionary.Add(procName, 0); } if (this.serviceDumpCountDictionary[procName] < this.maxDumps) { // DumpServiceProcess defaults to a Full dump with // process memory, handles and thread data. bool success = DumpServiceProcess(procId); if (success) { this.serviceDumpCountDictionary[procName]++; } } } // Ignore these, it just means no dmp will be created.This is not // critical to FO. Log as info, not warning. catch (Exception e) when(e is ArgumentException || e is InvalidOperationException) { ObserverLogger.LogInfo($"Unable to generate dmp file:{Environment.NewLine}{e}"); } } } // Health Warning if (!warningOrError && data.IsUnhealthy(thresholdWarning)) { warningOrError = true; healthState = HealthState.Warning; } if (warningOrError) { string errorWarningCode = null; switch (data.Property) { case ErrorWarningProperty.TotalCpuTime when healthReportType == HealthReportType.Application: errorWarningCode = (healthState == HealthState.Error) ? FOErrorWarningCodes.AppErrorCpuPercent : FOErrorWarningCodes.AppWarningCpuPercent; break; case ErrorWarningProperty.TotalCpuTime: errorWarningCode = (healthState == HealthState.Error) ? FOErrorWarningCodes.NodeErrorCpuPercent : FOErrorWarningCodes.NodeWarningCpuPercent; break; case ErrorWarningProperty.DiskSpaceUsagePercentage: errorWarningCode = (healthState == HealthState.Error) ? FOErrorWarningCodes.NodeErrorDiskSpacePercent : FOErrorWarningCodes.NodeWarningDiskSpacePercent; break; case ErrorWarningProperty.DiskSpaceUsageMb: errorWarningCode = (healthState == HealthState.Error) ? FOErrorWarningCodes.NodeErrorDiskSpaceMB : FOErrorWarningCodes.NodeWarningDiskSpaceMB; break; case ErrorWarningProperty.TotalMemoryConsumptionMb when healthReportType == HealthReportType.Application: errorWarningCode = (healthState == HealthState.Error) ? FOErrorWarningCodes.AppErrorMemoryMB : FOErrorWarningCodes.AppWarningMemoryMB; break; case ErrorWarningProperty.TotalMemoryConsumptionMb: errorWarningCode = (healthState == HealthState.Error) ? FOErrorWarningCodes.NodeErrorMemoryMB : FOErrorWarningCodes.NodeWarningMemoryMB; break; case ErrorWarningProperty.TotalMemoryConsumptionPct when replicaOrInstance != null: errorWarningCode = (healthState == HealthState.Error) ? FOErrorWarningCodes.AppErrorMemoryPercent : FOErrorWarningCodes.AppWarningMemoryPercent; break; case ErrorWarningProperty.TotalMemoryConsumptionPct: errorWarningCode = (healthState == HealthState.Error) ? FOErrorWarningCodes.NodeErrorMemoryPercent : FOErrorWarningCodes.NodeWarningMemoryPercent; break; case ErrorWarningProperty.DiskAverageQueueLength: errorWarningCode = (healthState == HealthState.Error) ? FOErrorWarningCodes.NodeErrorDiskAverageQueueLength : FOErrorWarningCodes.NodeWarningDiskAverageQueueLength; break; case ErrorWarningProperty.TotalActiveFirewallRules: errorWarningCode = (healthState == HealthState.Error) ? FOErrorWarningCodes.ErrorTooManyFirewallRules : FOErrorWarningCodes.WarningTooManyFirewallRules; break; case ErrorWarningProperty.TotalActivePorts when healthReportType == HealthReportType.Application: errorWarningCode = (healthState == HealthState.Error) ? FOErrorWarningCodes.AppErrorTooManyActiveTcpPorts : FOErrorWarningCodes.AppWarningTooManyActiveTcpPorts; break; case ErrorWarningProperty.TotalActivePorts: errorWarningCode = (healthState == HealthState.Error) ? FOErrorWarningCodes.NodeErrorTooManyActiveTcpPorts : FOErrorWarningCodes.NodeWarningTooManyActiveTcpPorts; break; case ErrorWarningProperty.TotalEphemeralPorts when healthReportType == HealthReportType.Application: errorWarningCode = (healthState == HealthState.Error) ? FOErrorWarningCodes.AppErrorTooManyActiveEphemeralPorts : FOErrorWarningCodes.AppWarningTooManyActiveEphemeralPorts; break; case ErrorWarningProperty.TotalEphemeralPorts: errorWarningCode = (healthState == HealthState.Error) ? FOErrorWarningCodes.NodeErrorTooManyActiveEphemeralPorts : FOErrorWarningCodes.NodeWarningTooManyActiveEphemeralPorts; break; } var healthMessage = new StringBuilder(); string drive = string.Empty; if (ObserverName == ObserverConstants.DiskObserverName) { drive = $"{data.Id}: "; if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { drive = $"{data.Id.Remove(1, 2)}: "; } } _ = healthMessage.Append($"{drive}{data.Property} is at or above the specified {thresholdName} limit ({threshold}{data.Units})"); _ = healthMessage.AppendLine($" - {data.Property}: {Math.Round(data.AverageDataValue)}{data.Units}"); // The health event description will be a serialized instance of telemetryData, // so it should be completely constructed (filled with data) regardless // of user telemetry settings. telemetryData.ApplicationName = appName?.OriginalString ?? string.Empty; telemetryData.Code = errorWarningCode; if (replicaOrInstance != null && !string.IsNullOrEmpty(replicaOrInstance.ContainerId)) { telemetryData.ContainerId = replicaOrInstance.ContainerId; } telemetryData.HealthState = Enum.GetName(typeof(HealthState), healthState); telemetryData.HealthEventDescription = healthMessage.ToString(); telemetryData.Metric = $"{drive}{data.Property}"; telemetryData.ServiceName = serviceName?.OriginalString ?? string.Empty; telemetryData.Source = ObserverConstants.FabricObserverName; telemetryData.Value = Math.Round(data.AverageDataValue, 1); // Send Health Report as Telemetry event (perhaps it signals an Alert from App Insights, for example.). if (IsTelemetryProviderEnabled && IsObserverTelemetryEnabled) { _ = TelemetryClient?.ReportHealthAsync( telemetryData, Token); } // ETW. if (IsEtwEnabled) { Logger.EtwLogger?.Write( ObserverConstants.FabricObserverETWEventName, new { ApplicationName = appName?.OriginalString ?? string.Empty, Code = errorWarningCode, ContainerId = replicaOrInstance != null ? replicaOrInstance.ContainerId ?? string.Empty : string.Empty, HealthState = Enum.GetName(typeof(HealthState), healthState), HealthEventDescription = healthMessage.ToString(), Metric = $"{drive}{data.Property}", Node = NodeName, ServiceName = serviceName?.OriginalString ?? string.Empty, Source = ObserverConstants.FabricObserverName, Value = Math.Round(data.AverageDataValue, 1), }); } var healthReport = new HealthReport { AppName = appName, Code = errorWarningCode, EmitLogEvent = true, HealthData = telemetryData, HealthMessage = healthMessage.ToString(), HealthReportTimeToLive = healthReportTtl, ReportType = healthReportType, State = healthState, NodeName = NodeName, Observer = ObserverName, ResourceUsageDataProperty = data.Property, }; if (!AppNames.Any(a => a == appName?.OriginalString)) { AppNames.Add(appName?.OriginalString); } // From FSO. if (replicaOrInstance == null && healthReportType == HealthReportType.Application) { HealthReportProperties.Add(id); } else { if (HealthReportProperties.Count == 0) { HealthReportProperties.Add(ObserverName switch { ObserverConstants.AppObserverName => "ApplicationHealth", ObserverConstants.CertificateObserverName => "SecurityHealth", ObserverConstants.DiskObserverName => "DiskHealth", ObserverConstants.FabricSystemObserverName => "FabricSystemServiceHealth", ObserverConstants.NetworkObserverName => "NetworkHealth", ObserverConstants.OSObserverName => "MachineInformation", ObserverConstants.NodeObserverName => "MachineResourceHealth", _ => $"{data.Property}", });