public static SystemCheckResult CreateResult(this ISystemChecker systemChecker, HealthState state, string message) { return new SystemCheckResult { Health = state, IsVital = systemChecker.IsVital, SystemName = systemChecker.SystemName, Message = message }; }
private void ReportHealth(HealthState healthState, string problemDescription) { HealthInformation healthInformation = new HealthInformation(this.entityIdentifier, "Connectivity", healthState); healthInformation.Description = problemDescription; DeployedServicePackageHealthReport healthReport = new DeployedServicePackageHealthReport( this.applicatioName, this.serviceManifestName, this.nodeName, healthInformation); this.fabricClient.HealthManager.ReportHealth(healthReport); }
protected ISystemChecker CreateChecker(HealthState state, bool isVital = true, bool isIntrusive = false, string name = "") { var systemStateChecker = Substitute.For<ISystemChecker>(); systemStateChecker.CheckSystem().Returns(new SystemCheckResult { SystemName = name, Health = state, IsVital = isVital }); systemStateChecker.SystemName.Returns(name); systemStateChecker.IsIntrusive.Returns(isIntrusive); return systemStateChecker; }
public FabricHealthReporter(string entityIdentifier, HealthState problemHealthState = HealthState.Warning) { if (string.IsNullOrWhiteSpace(entityIdentifier)) { throw new ArgumentException("entityIdentifier cannot be null or empty", "entityIdentifier"); } this.entityIdentifier = entityIdentifier; this.problemHealthState = problemHealthState; this.fabricClient = new FabricClient( new FabricClientSettings() { HealthReportSendInterval = TimeSpan.FromSeconds(5) } ); CodePackageActivationContext activationContext = FabricRuntime.GetActivationContext(); this.applicatioName = new Uri(activationContext.ApplicationName); this.serviceManifestName = activationContext.GetServiceManifestName(); NodeContext nodeContext = FabricRuntime.GetNodeContext(); this.nodeName = nodeContext.NodeName; }
public Player(float x, float y) { Rectangle src = Sprites.GetSlice(Element.Player); Position = new Vector2(x, y); Velocity = Vector2.Zero; Dst = new Rectangle((int)x, (int)y, src.Width, src.Height); ElementType = Element.Player; Visible = true; XState = HorizontalState.Stopped; YState = VerticalState.Falling; HState = HealthState.Normal; PlayerAnimation = new Animation( TimeSpan.FromMilliseconds(200f), src.Width, src.Height, Sprites.GetTexture(Element.Player).Width / src.Width, true); Direction = 1; }
public override void Update() { base.Update(); timeUntilVulnerable = Mathf.Max(0.0f, timeUntilVulnerable-Time.deltaTime); invulnerable = timeUntilVulnerable > 0.0f; float fractionOfMaxHealth = currentHealth/maxHealth; if (fractionOfMaxHealth <= healthRatioToBeCritical) { this.state = HealthState.CRITICAL; } else if (fractionOfMaxHealth <= healthRatioToBeInjured) { this.state = HealthState.INJURED; } else { this.state = HealthState.HEALTHY; } HandleDamageOverlay(); HandleDamageElectricField(); HandleEnemyIndicators(); HandleDeath(); lastDamager = currentDamager; currentDamager = null; }
public ApplicationHealthState(Uri appUri, HealthState state) : base(state) { this.ApplicationName = Guard.IsNotNull(appUri, nameof(appUri)); }
public ClusterHealthState(HealthState state) : base(state) { }
/// <summary> /// Utility to Report health for the NodeAgentService. Typical usecases are as below /// If windows update operation is not successful after exhausting all reties, user can post warning level health report /// If windows update operation is successfull, user can post Ok level health report. /// </summary> /// <param name="applicationName">Application name for constructing the servicename</param> /// <param name="healthProperty">Title for health report. Once the health report is set, any future updates should be done using same healthProperty.</param> /// <param name="healthDescription">Description of the health. In case of failure a good description is very helpful for quick mitigation.</param> /// <param name="healthState"><see cref="HealthState"/>Indicating the severity of the health report</param> /// <param name="timeToLiveInMinutes">Time to live for health report in the health manager in minutes. Default value is -1 indicating infinite time to live, any positive value indicates </param> /// <param name="timeout">Timeout for the async operation</param> /// <param name="cancellationToken">Cancellation token to cancel this async operation</param> /// <returns>Operation result in <see cref="NodeAgentSfUtilityExitCodes"/></returns> public NodeAgentSfUtilityExitCodes ReportHealth(Uri applicationName, String healthProperty, String healthDescription, HealthState healthState, long timeToLiveInMinutes, TimeSpan timeout, CancellationToken cancellationToken) { NodeAgentSfUtilityExitCodes result = HealthManagerHelper.PostServiceHealthReport(this.fabricClient, applicationName, healthProperty, healthDescription, (System.Fabric.Health.HealthState)healthState, timeToLiveInMinutes); ServiceEventSource.Current.InfoMessage("CommandProcessor.ReportHealth returned {0}", result); return(result); }
public static Boolean IsWoundedHealthLevel(HealthState hs) { return(hs.Equals(HealthState.BASHING) || hs.Equals(HealthState.LETHAL)); }
internal ReplicaHealth(Guid partitionId, long replicaId, HealthState aggregatedHealthState) : base(aggregatedHealthState) { this.PartitionId = partitionId; this.ReplicaId = replicaId; }
internal void Update(float delta, List<Tile> tiles) { TryingToMove = false; bool pushing = false; bool carrying = XState.Equals(HorizontalState.Carrying); #region Trying to move if (Math.Abs(Velocity.X) == 0) TryingToMove = true; #endregion #region Gravity Velocity.Y += WorldSettings.GRAVITY; #endregion #region Update Position/Velocity and Resolve Collision ResolveXCollision(delta, tiles); ResolveYCollision(delta, tiles); #endregion #region Air Friction Velocity.X *= WorldSettings.AIR_FRICTION; #endregion #region Rounding if (Math.Abs(Velocity.X) < WorldSettings.SLIDING_CUTOFF) Velocity.X = 0; #endregion #region UpdateDst UpdateDst(); #endregion #region Next Vertical State Rectangle nextDst = Dst; bool onGround = false; nextDst.Y += (int)Math.Ceiling(Velocity.Y + WorldSettings.GRAVITY * delta); foreach (Tile t in tiles) if (t.Solid) if (nextDst.Intersects(t.Dst)) onGround = true; #endregion #region Next Horizontal State nextDst = Dst; nextDst.X += (int)Math.Round((Velocity.X + MoveSpeed * Direction) * delta); Tile tileToPush = null; foreach (Tile t in tiles) if (t.Solid) if (nextDst.Intersects(t.Dst)) if (t.Pushable) tileToPush = t; if (tileToPush != null) { if (!XState.Equals(HorizontalState.Carrying)) { pushing = true; Game1.Events.Notify(Event.PlayerPushesTile, new object[] { this, tileToPush }); } } #endregion // Update States if (onGround) YState = VerticalState.OnGround; else if (Velocity.Y < 0) YState = VerticalState.Jumping; else if (Velocity.Y > 0) YState = VerticalState.Falling; if (HState.Equals(HealthState.Hurt)) { HurtCounter--; if (HurtCounter <= 0) HState = HealthState.Normal; } if (carrying || pushing) { } else if (Velocity.X == 0) XState = HorizontalState.Stopped; else XState = HorizontalState.Walking; // Lock on moving platforms if (LockOnTileTile != null) { Vector2 tileDisplacement = LockOnTileTile.Movement.GetDisplacement(); if (LockToHorizontalTile) Position.X += tileDisplacement.X; if (LockToVerticalTile) { Position.Y = LockOnTileTile.Position.Y - Dst.Height - 3; // Cancel Out Gravity Velocity.Y -= WorldSettings.GRAVITY * delta; } UpdateDst(); if (!YState.Equals(VerticalState.OnGround)) ReleaseLocks(); } }
internal EntityHealthState(HealthState state) { this.AggregatedHealthState = state; }
public ServiceHealthState(Uri serviceName, HealthState state) : base(state) { this.ServiceName = Guard.IsNotNull(serviceName, nameof(serviceName)); }
/// <inheritdoc/> public override async Task ReportAsync(CancellationToken token) { var timeToLiveWarning = this.SetHealthReportTimeToLive(); // Report on connection state. foreach (var config in this.userConfig) { token.ThrowIfCancellationRequested(); var deployedApps = await this.FabricClientInstance.QueryManager .GetDeployedApplicationListAsync( this.NodeName, new Uri(config.TargetApp)).ConfigureAwait(true); // We only care about deployed apps. if (deployedApps == null || deployedApps.Count < 1) { continue; } foreach (var conn in this.connectionStatus.Where(cs => cs.TargetApp == config.TargetApp)) { token.ThrowIfCancellationRequested(); var connState = conn; if (!connState.Connected) { this.healthState = HealthState.Warning; var healthMessage = $"Outbound Internet connection failure detected for endpoint {connState.HostName}{Environment.NewLine}"; var report = new HealthReport { AppName = new Uri(conn.TargetApp), EmitLogEvent = true, HealthMessage = healthMessage, HealthReportTimeToLive = timeToLiveWarning, State = this.healthState, NodeName = this.NodeName, Observer = this.ObserverName, Property = $"EndpointUnreachable({conn.HostName})", ReportType = HealthReportType.Application, ResourceUsageDataProperty = $"{ErrorWarningProperty.InternetConnectionFailure}: {connState.HostName}", }; // Send health report Warning and log event locally. this.HealthReporter.ReportHealthToServiceFabric(report); // This means this observer created a Warning or Error SF Health Report this.HasActiveFabricErrorOrWarning = true; // Send Health Telemetry (perhaps it signals an Alert in AppInsights or LogAnalytics). if (this.IsTelemetryProviderEnabled && this.IsObserverTelemetryEnabled) { var telemetryData = new TelemetryData(this.FabricClientInstance, token) { ApplicationName = conn.TargetApp, Code = FoErrorWarningCodes.AppWarningNetworkEndpointUnreachable, HealthState = "Warning", HealthEventDescription = healthMessage, ObserverName = this.ObserverName, Metric = ErrorWarningProperty.InternetConnectionFailure, NodeName = this.NodeName, }; _ = this.TelemetryClient?.ReportMetricAsync( telemetryData, this.Token); } // ETW. if (this.IsEtwEnabled) { Logger.EtwLogger?.Write( ObserverConstants.FabricObserverETWEventName, new { ApplicationName = conn.TargetApp, Code = FoErrorWarningCodes.AppWarningNetworkEndpointUnreachable, HealthState = "Warning", HealthEventDescription = healthMessage, ObserverName = this.ObserverName, Metric = ErrorWarningProperty.InternetConnectionFailure, NodeName = this.NodeName, }); } } else { if (connState.Health != HealthState.Warning) { continue; } this.healthState = HealthState.Ok; var healthMessage = $"Outbound Internet connection successful for {connState?.HostName} from node {this.NodeName}."; // Clear existing Health Warning. var report = new HealthReport { AppName = new Uri(conn.TargetApp), Code = FoErrorWarningCodes.AppWarningNetworkEndpointUnreachable, EmitLogEvent = true, HealthMessage = healthMessage, HealthReportTimeToLive = default(TimeSpan), State = HealthState.Ok, NodeName = this.NodeName, Observer = this.ObserverName, Property = $"EndpointUnreachable({conn.HostName})", ReportType = HealthReportType.Application, }; this.HealthReporter.ReportHealthToServiceFabric(report); // Telemetry. if (this.IsTelemetryProviderEnabled && this.IsObserverTelemetryEnabled) { var telemetryData = new TelemetryData(this.FabricClientInstance, token) { ApplicationName = conn.TargetApp, Code = FoErrorWarningCodes.Ok, HealthState = "Ok", HealthEventDescription = healthMessage, ObserverName = this.ObserverName, Metric = "Internet Connection State", NodeName = this.NodeName, }; _ = this.TelemetryClient?.ReportMetricAsync( telemetryData, this.Token); } // ETW. if (this.IsEtwEnabled) { Logger.EtwLogger?.Write( ObserverConstants.FabricObserverETWEventName, new { ApplicationName = conn.TargetApp, Code = FoErrorWarningCodes.Ok, HealthState = "Ok", HealthEventDescription = healthMessage, ObserverName = this.ObserverName, Metric = "Internet Connection State", NodeName = this.NodeName, }); } // Reset health state. this.HasActiveFabricErrorOrWarning = false; } } } // Clear _ = this.connectionStatus.RemoveAll(conn => conn.Connected); this.connectionStatus.TrimExcess(); this.connEndpointTestResults.Clear(); }
public ReplicaHealthState(Guid partitionId, long replicaId, HealthState state) : base(state) { this.PartitionId = partitionId; this.ReplicaId = replicaId; }
public async Task UpdateHealthState(HealthState state, string message) { this.HealthReporterService.SendReportForService(state, message); }
public void SetFact(Facts fact, HealthState value, bool invokeChangedEvent = true, bool setAsDirty = true, bool checkValueDiff = true) { this.SetFact(fact, (byte)value, invokeChangedEvent, setAsDirty, checkValueDiff); }
public void PushFactChangeDuringPlanning(Facts fact, HealthState value, bool temporary) { this.PushFactChangeDuringPlanning((byte)fact, (byte)value, temporary); }
public MockPartition(ServiceKind serviceKind, ServicePartitionInformation partitionInformation, HealthState healthState, ServicePartitionStatus partitionStatus) : base(serviceKind, partitionInformation, healthState, partitionStatus) { }
internal DeployedApplicationHealth(string appName, string nodeName, HealthState aggregatedHealthState) : base(aggregatedHealthState) { this.ApplicationName = appName; this.NodeName = nodeName; }
public NodeHealthState(string nodeName, HealthState state) : base(state) { this.NodeName = Guard.IsNotNullOrEmpty(nodeName, nameof(nodeName)); }
private void UpdateHealthState(GameObject _TargetPlayer, float _fHealthCurrentValue, float _fHealthPreviousValue) { // Set an invalid initial previous health state HealthState PrevHealthState = HealthState.INVALID; // Switch on the current health state switch (CurrentHealthState) { // Alive case HealthState.ALIVE: { // If the player's health is the minimum health if (m_fHealth.Get() == k_fMinHealth) { // Change player's state to downed PrevHealthState = CurrentHealthState; CurrentHealthState = HealthState.DOWNED; } // Break switch break; } // Dead case HealthState.DEAD: { // If the player's health is not the minimum health if (!(m_fHealth.Get() == k_fMinHealth)) { // Change player's state to downed PrevHealthState = CurrentHealthState; CurrentHealthState = HealthState.ALIVE; } // Break switch break; } // Downed case HealthState.DOWNED: { // Increment the downed timer fTimerDowned += Time.deltaTime; // If downed timer is equal to or greater than the max downed timer duration if (fTimerDowned >= k_fTimerDownedMaxDuration) { // Change player's state to dead PrevHealthState = CurrentHealthState; CurrentHealthState = HealthState.DEAD; // Reset downed timer fTimerDowned = 0.0f; } // Break switch break; } // Default default: { // Log the current health state as an error Debug.LogError("Health state: " + CurrentHealthState.ToString()); // Break switch break; } } // If the previous health state is valid // And previous health state is not the same as the current health state if ((PrevHealthState != HealthState.INVALID) && (PrevHealthState != HealthState.MAX) && (PrevHealthState != CurrentHealthState)) { // Trigger EventHealthStateChanged EventHealthStateChanged(gameObject, CurrentHealthState, PrevHealthState); } }
public void SetUp(BarrelData data) { Data = data; HealthState = new HealthState(data.HealthData); }
public static void UnhealthyReplicaSkipped(ILogger logger, long replicaId, Guid partition, Uri service, ServiceReplicaStatus replicaStatus, HealthState healthState) { _unhealthyReplicaSkipped(logger, replicaId, partition, service, replicaStatus, healthState, null); }
public abstract HealthState Apply( RoleInstance roleInstance, HealthState input);
public static void ServiceHealthReportFailed(ILogger <Discoverer> logger, HealthState state, Uri serviceName, Exception exception) { _serviceHealthReportFailed(logger, state, serviceName, exception); }
private void AssertStatefulServiceReplicaHealthReported(ReplicaWrapper replica, HealthState expectedHealthState, Func <string, bool> descriptionCheck = null) { // TODO: test helpers don't return the fake partition ID so we can't verify replica.PartitioinId is the correct one. Pending to refactor the fixture helpers. AssertHealthReported( expectedHealthState: expectedHealthState, descriptionCheck: descriptionCheck, extraChecks: report => (report as StatefulServiceReplicaHealthReport) != null && (report as StatefulServiceReplicaHealthReport).ReplicaId == replica.Id, because: $"health '{expectedHealthState}' for stateful replica {replica.Id} should be reported"); }
public static void ReplicaHealthReportFailedInvalidServiceKind(ILogger <Discoverer> logger, HealthState state, long replicaId, ServiceKind serviceKind) { _replicaHealthReportFailedInvalidServiceKind(logger, state, replicaId, serviceKind, null); }
private void Awake() { healthState = GetComponent <HealthState>(); }
public FireState(Tutorial context, ChainedState _back) : base(context) { back = _back; next = new HealthState(context, this); }
void Start() { //Sets our health state to healthy. healthStates = HealthState.healthy; }
/// <summary> /// Execute the HealthCheck request. /// </summary> /// <param name="hc">HealthCheck description.</param> /// <param name="partition">Partition instance.</param> /// <returns>HealthCheck instance.</returns> internal async Task <HealthCheck> ExecuteHealthCheckAsync(HealthCheck hc, Partition partition) { // Check passed parameters. if ((null == partition) || (default(HealthCheck) == hc)) { return(default(HealthCheck)); } // Get the service endpoint of the service being tested. ResolvedServiceEndpoint rse = await this.GetServiceEndpointAsync(hc.ServiceName, partition); if (null == rse) { return(default(HealthCheck)); } // If an endpoint name was specified, search for that name within the ResolvedServiceEndpoint instance. //TODO: string baseAddress = (string.IsNullOrWhiteSpace(hc.Endpoint)) ? rse.GetFirstEndpoint() : rse.GetEndpoint(hc.Endpoint); string baseAddress = rse.Address; Uri uri = new Uri($"{baseAddress}/{hc.SuffixPath}"); // Create the HttpRequest message. HttpRequestMessage request = this.CreateRequestMessage(hc, uri); try { bool success = true; HealthState hs = HealthState.Ok; // Make the request to the service being tested. Stopwatch sw = Stopwatch.StartNew(); HttpResponseMessage response = await this._http.SendAsync(request, HttpCompletionOption.ResponseContentRead, this._token); sw.Stop(); // Evaluate the result of the request. If specific codes were provided, check each of the code arrays to find the result code. if ((null != hc.WarningStatusCodes) && (hc.WarningStatusCodes.Contains((int)response.StatusCode))) { hs = HealthState.Warning; success = false; } else if ((null != hc.ErrorStatusCodes) && (hc.ErrorStatusCodes.Contains((int)response.StatusCode))) { hs = HealthState.Error; success = false; } else if (false == response.IsSuccessStatusCode) { hs = HealthState.Error; success = false; } // Report health result to Service Fabric. this.Client.HealthManager.ReportHealth(new PartitionHealthReport(hc.Partition, new HealthInformation("Watchdog Health Check", hc.Name, hs))); // Report the availability of the tested service to the telemetry provider. await this._telemetry.ReportAvailabilityAsync( hc.ServiceName.AbsoluteUri, hc.Partition.ToString(), hc.Name, hc.LastAttempt, TimeSpan.FromMilliseconds(hc.Duration), null, success, this._token); // Return a new HealthCheck instance containing the results of the request. long count = (success) ? 0 : hc.FailureCount + 1; return(hc.UpdateWith(DateTime.UtcNow, count, sw.ElapsedMilliseconds, response.StatusCode)); } catch (FabricTransientException ex) { //TODO: ServiceEventSource.Current.Exception(ex.Message, ex.GetType().Name, nameof(this.ExecuteHealthCheckAsync)); ServiceEventSource.Current.Message(ex.Message + ex.GetType().Name + nameof(this.ExecuteHealthCheckAsync)); return(hc.UpdateWith(DateTime.UtcNow, hc.FailureCount + 1, -1, System.Net.HttpStatusCode.InternalServerError)); } catch (Exception ex) { //TODO: ServiceEventSource.Current.Exception(ex.Message, ex.GetType().Name, nameof(this.ExecuteHealthCheckAsync)); ServiceEventSource.Current.Message(ex.Message + ex.GetType().Name + nameof(this.ExecuteHealthCheckAsync)); throw; } }
/// <summary> /// Returns a health report /// </summary> /// <param name="context">The service fabric context that the health report is for</param> /// <param name="reportSourceId">The unique reporting source id</param> /// <param name="propertyName">The name of the health property being reported on</param> /// <param name="state">The current state of the health property</param> /// <param name="timeToLive">The time to live of the health report</param> /// <param name="reportType">The entity type the report is for</param> /// <returns>A health report for the appropriate reporting entity</returns> public static HealthReport GetHealthReport(ServiceContext context, string reportSourceId, string propertyName, HealthState state, ReportTypes reportType, TimeSpan timeToLive) { HealthReport report; var information = new HealthInformation(reportSourceId, propertyName, state); information.Description = $"{ propertyName } health state { Enum.GetName(typeof(HealthState), state) }"; information.RemoveWhenExpired = true; information.TimeToLive = timeToLive; information.SequenceNumber = HealthInformation.AutoSequenceNumber; switch (reportType) { case ReportTypes.Cluster: report = new ClusterHealthReport(information); break; case ReportTypes.Application: report = new ApplicationHealthReport(new Uri(context.CodePackageActivationContext.ApplicationName), information); break; case ReportTypes.DeployedApplication: report = new DeployedApplicationHealthReport(new Uri(context.CodePackageActivationContext.ApplicationName), context.NodeContext.NodeName, information); break; case ReportTypes.Service: report = new ServiceHealthReport(context.ServiceName, information); break; case ReportTypes.DeployedService: report = new DeployedServicePackageHealthReport(new Uri(context.CodePackageActivationContext.ApplicationName), context.CodePackageActivationContext.GetServiceManifestName(), context.NodeContext.NodeName, information); break; case ReportTypes.Node: report = new NodeHealthReport(context.NodeContext.NodeName, information); break; case ReportTypes.Instance: if (context is StatelessServiceContext) { report = new StatelessServiceInstanceHealthReport(context.PartitionId, context.ReplicaOrInstanceId, information); } else { report = new StatefulServiceReplicaHealthReport(context.PartitionId, context.ReplicaOrInstanceId, information); } break; default: throw new ArgumentException("Unknown health type", nameof(reportType)); } return report; }
internal void Hurt(int dir) { if (HState.Equals(HealthState.Normal)) { Knock(dir); LoseLife(); HState = HealthState.Hurt; HurtCounter = HURT_DURATION; Game1.Events.Notify(Event.PlayerGotHurt, this); } }
public static void ReplicaHealthReportFailed(ILogger <Discoverer> logger, HealthState state, long replicaId, Exception exception) { _replicaHealthReportFailed(logger, state, replicaId, exception); }
private async Task ProbeClusterHealthAsync(CancellationToken token) { // The point of this service is to emit SF Health telemetry to your external log analytics service, so // if telemetry is not enabled or you don't provide an AppInsights instrumentation key, for example, // then querying HM for health info isn't useful. if (!this.IsTelemetryEnabled || this.ObserverTelemetryClient == null) { return; } token.ThrowIfCancellationRequested(); // Get ClusterObserver settings (specified in PackageRoot/Config/Settings.xml). _ = bool.TryParse( this.GetSettingParameterValue( ObserverConstants.ClusterObserverConfigurationSectionName, ObserverConstants.EmitHealthWarningEvaluationConfigurationSetting, "false"), out bool emitWarningDetails); _ = bool.TryParse( this.GetSettingParameterValue( ObserverConstants.ClusterObserverConfigurationSectionName, ObserverConstants.EmitOkHealthState, "false"), out bool emitOkHealthState); _ = bool.TryParse( this.GetSettingParameterValue( ObserverConstants.ClusterObserverConfigurationSectionName, ObserverConstants.IgnoreSystemAppWarnings, "false"), out bool ignoreSystemAppWarnings); _ = bool.TryParse( this.GetSettingParameterValue( ObserverConstants.ClusterObserverConfigurationSectionName, ObserverConstants.EmitHealthStatistics, "false"), out bool emitHealthStatistics); try { var clusterHealth = await this.FabricClientInstance.HealthManager.GetClusterHealthAsync( this.AsyncClusterOperationTimeoutSeconds, token).ConfigureAwait(true); string telemetryDescription = string.Empty; string clusterHealthStatistics = string.Empty; // Previous run generated unhealthy evaluation report. Clear it (send Ok) . if (emitOkHealthState && clusterHealth.AggregatedHealthState == HealthState.Ok && (this.ClusterHealthState == HealthState.Error || (emitWarningDetails && this.ClusterHealthState == HealthState.Warning))) { telemetryDescription += "Cluster has recovered from previous Error/Warning state."; } else // Construct unhealthy state information. { // If in Warning and you are not sending Warning state reports, then end here. if (!emitWarningDetails && clusterHealth.AggregatedHealthState == HealthState.Warning) { return; } var unhealthyEvaluations = clusterHealth.UnhealthyEvaluations; // No Unhealthy Evaluations means nothing to see here. if (unhealthyEvaluations.Count == 0) { return; } foreach (var evaluation in unhealthyEvaluations) { token.ThrowIfCancellationRequested(); telemetryDescription += $"{Enum.GetName(typeof(HealthEvaluationKind), evaluation.Kind)} - {evaluation.AggregatedHealthState}: {evaluation.Description}{Environment.NewLine}{Environment.NewLine}"; // Application in Warning or Error? // Note: SF System app Warnings can be noisy, ephemeral (not Errors - you should generally not ignore Error states), // so check for them and ignore if specified in your config's IgnoreFabricSystemAppWarnings setting. if (evaluation.Kind == HealthEvaluationKind.Application || evaluation.Kind == HealthEvaluationKind.Applications) { foreach (var app in clusterHealth.ApplicationHealthStates) { Token.ThrowIfCancellationRequested(); if (app.AggregatedHealthState == HealthState.Ok || (!emitWarningDetails && (app.AggregatedHealthState == HealthState.Warning || (evaluation.Kind == HealthEvaluationKind.SystemApplication && ignoreSystemAppWarnings)))) { continue; } telemetryDescription += $"Application in Error or Warning: {app.ApplicationName}{Environment.NewLine}"; foreach (var application in clusterHealth.ApplicationHealthStates) { Token.ThrowIfCancellationRequested(); if (application.AggregatedHealthState == HealthState.Ok || (!emitWarningDetails && application.AggregatedHealthState == HealthState.Warning)) { continue; } var appHealth = await FabricClientInstance.HealthManager.GetApplicationHealthAsync( application.ApplicationName, this.AsyncClusterOperationTimeoutSeconds, token); var serviceHealthStates = appHealth.ServiceHealthStates; var appHealthEvents = appHealth.HealthEvents; // From FO? foreach (var appHealthEvent in appHealthEvents) { Token.ThrowIfCancellationRequested(); if (!FOErrorWarningCodes.AppErrorCodesDictionary.ContainsKey(appHealthEvent.HealthInformation.SourceId)) { continue; } string errorWarning = "Warning"; if (FOErrorWarningCodes.AppErrorCodesDictionary[appHealthEvent.HealthInformation.SourceId].Contains("Error")) { errorWarning = "Error"; } telemetryDescription += $" FabricObserver {errorWarning} Code: {appHealthEvent.HealthInformation.SourceId}{Environment.NewLine}" + $" {errorWarning} Details: {appHealthEvent.HealthInformation.Description}{Environment.NewLine}"; } // Service in error? foreach (var service in serviceHealthStates) { Token.ThrowIfCancellationRequested(); if (service.AggregatedHealthState == HealthState.Ok || (!emitWarningDetails && service.AggregatedHealthState == HealthState.Warning)) { continue; } telemetryDescription += $"Service in Error: {service.ServiceName}{Environment.NewLine}"; } } } } else if (evaluation.Kind == HealthEvaluationKind.Node || evaluation.Kind == HealthEvaluationKind.Nodes) { // Node in Warning or Error? foreach (var node in clusterHealth.NodeHealthStates) { if (node.AggregatedHealthState == HealthState.Ok || (!emitWarningDetails && node.AggregatedHealthState == HealthState.Warning)) { continue; } telemetryDescription += $"Node in Error or Warning: {node.NodeName}{Environment.NewLine}"; var nodeHealth = await FabricClientInstance.HealthManager.GetNodeHealthAsync( node.NodeName, this.AsyncClusterOperationTimeoutSeconds, token); // From FO? foreach (var nodeHealthEvent in nodeHealth.HealthEvents) { Token.ThrowIfCancellationRequested(); if (!FOErrorWarningCodes.NodeErrorCodesDictionary.ContainsKey(nodeHealthEvent.HealthInformation.SourceId)) { continue; } string errorWarning = "Warning"; if (FOErrorWarningCodes.NodeErrorCodesDictionary[nodeHealthEvent.HealthInformation.SourceId].Contains("Error")) { errorWarning = "Error"; } telemetryDescription += $" FabricObserver {errorWarning} Code: {nodeHealthEvent.HealthInformation.SourceId}{Environment.NewLine}" + $" {errorWarning} Details: {nodeHealthEvent.HealthInformation.Description}{Environment.NewLine}"; } } } } // HealthStatistics as a string. if (emitHealthStatistics) { telemetryDescription += $"{clusterHealth.HealthStatistics.ToString()}"; } } // Track current health state for use in next run. this.ClusterHealthState = clusterHealth.AggregatedHealthState; // This means there is no cluster health state data to emit. if (string.IsNullOrEmpty(telemetryDescription)) { return; } // Telemetry. await this.ObserverTelemetryClient?.ReportHealthAsync( HealthScope.Cluster, "AggregatedClusterHealth", clusterHealth.AggregatedHealthState, telemetryDescription, this.ObserverName, this.Token); } catch (Exception e) when(e is FabricException || e is OperationCanceledException || e is TimeoutException) { this.ObserverLogger.LogError( $"Unable to determine cluster health:{Environment.NewLine}{e.ToString()}"); // Telemetry. await this.ObserverTelemetryClient.ReportHealthAsync( HealthScope.Cluster, "AggregatedClusterHealth", HealthState.Unknown, $"ProbeClusterHealthAsync threw {e.Message}{Environment.NewLine}" + $"Unable to determine Cluster Health. Probing will continue.", this.ObserverName, this.Token); } }
/// <summary> /// Checks if the healthState is warning or error state. /// </summary> /// <returns>Boolean result.</returns> internal static bool IsWarningOrError(this HealthState state) { return(state == HealthState.Error || state == HealthState.Warning); }
internal NodeHealth(string nodeName, HealthState aggregatedHealthState) : base(aggregatedHealthState) { this.NodeName = nodeName; }