Example #1
0
        /// <summary>
        /// Reports the capacity health of a collection in a partition to Service Fabric.
        /// </summary>
        /// <param name="healthSourceId">Health source identifier.</param>
        /// <param name="name">Health property name.</param>
        /// <param name="count">Current number of items in the collection.</param>
        /// <param name="capacity">Configured capacity of the collection.</param>
        /// <param name="pWarn">Warning percent of capacity.</param>
        /// <param name="pError">Error percent of capacity.</param>
        /// <param name="ttl">Health report time to live.</param>
        public void ReportHealthReplicaCapacity(string healthSourceId, string name, long count, long capacity, double pWarn, double pError, TimeSpan ttl)
        {
            Guard.ArgumentNotNullOrWhitespace(healthSourceId, nameof(healthSourceId));
            Guard.ArgumentNotNullOrWhitespace(name, nameof(name));

            // Calculate the percentages, warning and error counts.
            double percentCapacity   = (0 == count || 0 == capacity) ? 0.0 : ((double)count / (double)capacity) * 100.0;
            long   queueWarningCount = (long)(capacity * pWarn);
            long   queueErrorCount   = (long)(capacity * pError);

            // Determine the health state based on the count vs. the capacity.
            HealthState hs = (count >= queueErrorCount) ? HealthState.Error
                : ((count >= queueWarningCount) ? HealthState.Warning : HealthState.Ok);

            // Create the health information to report to Service Fabric.
            HealthInformation hi = new HealthInformation(healthSourceId, name, hs);

            hi.TimeToLive        = (0.0 <= ttl.TotalMilliseconds) ? TimeSpan.FromSeconds(30) : ttl;
            hi.Description       = $"Count: {count:N0}, Capacity: {capacity:N0}, Used: {percentCapacity}%";
            hi.RemoveWhenExpired = true;
            hi.SequenceNumber    = HealthInformation.AutoSequenceNumber;

            // Create a replica health report.
            StatefulServiceReplicaHealthReport ssrhr = new StatefulServiceReplicaHealthReport(Context.PartitionId, Context.ReplicaId, hi);

            ServiceFabricClient.HealthManager.ReportHealth(ssrhr);
        }
        public void SendReportForService(HealthState healthState, string message)
        {
            var error   = "";
            var handler = HandlersFactory.GetProfilerHandler(_settingService, _loggerService);

            handler.Start(LOG_TAG, "SendReportForService", null);

            try
            {
                HealthReport healthReport = new StatefulServiceReplicaHealthReport(_partitionId,
                                                                                   _replicaId,
                                                                                   new HealthInformation(_serviceName, message, healthState));

                healthReport.HealthInformation.TimeToLive        = TimeSpan.FromMinutes(_settingService.GetHealthIssuesTimeToLive());
                healthReport.HealthInformation.RemoveWhenExpired = false;
                ReportHealth(healthReport);

                SendReportForNode(healthState, message);
            }
            catch (Exception ex)
            {
                error = ex.Message;
                /* Ignore */
            }
            finally
            {
                handler.Stop(error);
            }
        }
Example #3
0
        protected override HealthReport GetHealthReport(HealthInformation healthInformation)
        {
            HealthReport replicaOrInstanceHealthReport = null;

            switch (this.ParameterSetName)
            {
            /// Replica
            case Constants.SendReplicaHealthReportstatefulServiceParamSetName:
                replicaOrInstanceHealthReport =
                    new StatefulServiceReplicaHealthReport(this.PartitionId, this.ReplicaId, healthInformation);
                break;

            /// Instance
            case Constants.SendReplicaHealthReportStatelessServiceParamSetName:
                replicaOrInstanceHealthReport =
                    new StatelessServiceInstanceHealthReport(this.PartitionId, this.InstanceId, healthInformation);
                break;

            default:
                throw new ArgumentException(string.Format(
                                                CultureInfo.CurrentCulture,
                                                StringResources.Error_ReplicaHealthReportUnknownParamSet,
                                                ParameterSetName));
            }

            return(replicaOrInstanceHealthReport);
        }
Example #4
0
    private void ReportReplicaHealth(
        ServiceFabricDiscoveryOptions options,
        ServiceWrapper service,
        Guid partitionId,
        ReplicaWrapper replica,
        HealthState state,
        string description = null)
    {
        if (!options.ReportReplicasHealth)
        {
            return;
        }

        var healthInformation = new HealthInformation(
            sourceId: HealthReportSourceId,
            property: HealthReportProperty,
            healthState: state)
        {
            Description = description,
            TimeToLive = HealthReportTimeToLive(options),
            RemoveWhenExpired = true,
        };

        HealthReport healthReport;
        switch (service.ServiceKind)
        {
            case ServiceKind.Stateful:
                healthReport = new StatefulServiceReplicaHealthReport(
                    partitionId: partitionId,
                    replicaId: replica.Id,
                    healthInformation: healthInformation);
                break;
            case ServiceKind.Stateless:
                healthReport = new StatelessServiceInstanceHealthReport(
                    partitionId: partitionId,
                    instanceId: replica.Id,
                    healthInformation: healthInformation);
                break;
            default:
                Log.ReplicaHealthReportFailedInvalidServiceKind(_logger, state, replica.Id, service.ServiceKind);
                return;
        }

        var sendOptions = new HealthReportSendOptions { Immediate = state != HealthState.Ok }; // Report immediately if unhealthy
        try
        {
            _serviceFabricCaller.ReportHealth(healthReport, sendOptions);
        }
        catch (Exception ex) // TODO: davidni: not fatal?
        {
            Log.ReplicaHealthReportFailed(_logger, state, replica.Id, ex);
        }
    }
        /// <summary>
        /// Returns a health report
        /// </summary>
        /// <param name="context">The service fabric context that the health report is for</param>
        /// <param name="reportSourceId">The unique reporting source id</param>
        /// <param name="propertyName">The name of the health property being reported on</param>
        /// <param name="state">The current state of the health property</param>
        /// <param name="timeToLive">The time to live of the health report</param>
        /// <param name="reportType">The entity type the report is for</param>
        /// <returns>A health report for the appropriate reporting entity</returns>
        public static HealthReport GetHealthReport(ServiceContext context, string reportSourceId, string propertyName, HealthState state, ReportTypes reportType, TimeSpan timeToLive)
        {
            HealthReport report;
            var          information = new HealthInformation(reportSourceId, propertyName, state);

            information.Description       = $"{ propertyName } health state { Enum.GetName(typeof(HealthState), state) }";
            information.RemoveWhenExpired = true;
            information.TimeToLive        = timeToLive;
            information.SequenceNumber    = HealthInformation.AutoSequenceNumber;

            switch (reportType)
            {
            case ReportTypes.Cluster:
                report = new ClusterHealthReport(information);
                break;

            case ReportTypes.Application:
                report = new ApplicationHealthReport(new Uri(context.CodePackageActivationContext.ApplicationName), information);
                break;

            case ReportTypes.DeployedApplication:
                report = new DeployedApplicationHealthReport(new Uri(context.CodePackageActivationContext.ApplicationName), context.NodeContext.NodeName, information);
                break;

            case ReportTypes.Service:
                report = new ServiceHealthReport(context.ServiceName, information);
                break;

            case ReportTypes.DeployedService:
                report = new DeployedServicePackageHealthReport(new Uri(context.CodePackageActivationContext.ApplicationName), context.CodePackageActivationContext.GetServiceManifestName(), context.NodeContext.NodeName, information);
                break;

            case ReportTypes.Node:
                report = new NodeHealthReport(context.NodeContext.NodeName, information);
                break;

            case ReportTypes.Instance:
                if (context is StatelessServiceContext)
                {
                    report = new StatelessServiceInstanceHealthReport(context.PartitionId, context.ReplicaOrInstanceId, information);
                }
                else
                {
                    report = new StatefulServiceReplicaHealthReport(context.PartitionId, context.ReplicaOrInstanceId, information);
                }
                break;

            default:
                throw new ArgumentException("Unknown health type", nameof(reportType));
            }

            return(report);
        }
        public Task UpdateStatusAsync(string serviceName, string instance, string subStatusName, HealthStatus status, string message)
        {
            var healthInfo = new HealthInformation(
                GetType().FullName,
                subStatusName,
                MapStatus(status))
            {
                Description = message
            };

            System.Fabric.Health.HealthReport report;
            if (instance == null)
            {
                report = new ServiceHealthReport(
                    GetServiceUri(),
                    healthInfo
                    );
            }
            else if (_isStateful)
            {
                report = new StatefulServiceReplicaHealthReport(
                    _context.PartitionId,
                    _context.ReplicaOrInstanceId,
                    healthInfo
                    );
            }
            else
            {
                report = new StatelessServiceInstanceHealthReport(
                    _context.PartitionId,
                    _context.ReplicaOrInstanceId,
                    healthInfo
                    );
            }

            _fabricClient.HealthManager.ReportHealth(report, new HealthReportSendOptions {
                Immediate = true
            });
            return(Task.CompletedTask);
        }
Example #7
0
        /// <summary>
        /// Reports the capacity health of a collection in a partition to Service Fabric.
        /// </summary>
        /// <param name="healthSourceId">Health source identifier.</param>
        /// <param name="name">Health property name.</param>
        /// <param name="latency">AverageLatency instance.</param>
        /// <param name="warnValue">Warning value.</param>
        /// <param name="errorValue">Error value.</param>
        /// <param name="ttl">Health report time to live.</param>
        public void ReportHealthReplicaLatency(string healthSourceId, string name, AverageLatency latency, Int64 warnValue, Int64 errorValue, TimeSpan ttl)
        {
            Guard.ArgumentNotNullOrWhitespace(healthSourceId, nameof(healthSourceId));
            Guard.ArgumentNotNullOrWhitespace(name, nameof(name));

            // Determine the health state based on the count vs. the capacity.
            HealthState hs = (latency.GetLatest() >= errorValue) ? HealthState.Error
                : ((latency.GetLatest() >= warnValue) ? HealthState.Warning : HealthState.Ok);

            // Create the health information to report to Service Fabric.
            HealthInformation hi = new HealthInformation(healthSourceId, name, hs);

            hi.TimeToLive        = (0.0 <= ttl.TotalMilliseconds) ? TimeSpan.FromSeconds(30) : ttl;
            hi.Description       = $"{name} latency: {latency.GetLatest()}";
            hi.RemoveWhenExpired = true;
            hi.SequenceNumber    = HealthInformation.AutoSequenceNumber;

            // Create a replica health report.
            StatefulServiceReplicaHealthReport ssrhr = new StatefulServiceReplicaHealthReport(Context.PartitionId, Context.ReplicaId, hi);

            ServiceFabricClient.HealthManager.ReportHealth(ssrhr);
        }
Example #8
0
        /// <summary>
        /// This function generates Service Fabric Health Reports that will show up in SFX.
        /// </summary>
        /// <param name="healthReport">Utilities.HealthReport instance.</param>
        public void ReportHealthToServiceFabric(HealthReport healthReport)
        {
            if (healthReport == null)
            {
                return;
            }

            // There is no real need to change Immediate to true here for errors/warnings. This only adds unecessary stress to the
            // Health subsystem.
            var sendOptions = new HealthReportSendOptions {
                Immediate = false
            };

            // Quickly send OK (clears warning/errors states).
            if (healthReport.State == HealthState.Ok)
            {
                sendOptions.Immediate = true;
            }

            var timeToLive = TimeSpan.FromMinutes(5);

            if (healthReport.HealthReportTimeToLive != default)
            {
                timeToLive = healthReport.HealthReportTimeToLive;
            }

            TelemetryData healthData = healthReport.HealthData;

            string errWarnPreamble = string.Empty;

            if (healthReport.State == HealthState.Error ||
                healthReport.State == HealthState.Warning)
            {
                errWarnPreamble =
                    $"{healthReport.Observer} detected " +
                    $"{Enum.GetName(typeof(HealthState), healthReport.State)} threshold breach. ";

                // OSObserver does not monitor resources and therefore does not support related usage threshold configuration.
                if (healthReport.Observer == ObserverConstants.OSObserverName &&
                    healthReport.Property == "OSConfiguration")
                {
                    errWarnPreamble = $"{ObserverConstants.OSObserverName} detected potential problem with OS configuration: ";
                }
            }

            string message = $"{errWarnPreamble}{healthReport.HealthMessage}";

            if (healthData != null)
            {
                message = JsonConvert.SerializeObject(healthData);
            }

            if (string.IsNullOrEmpty(healthReport.SourceId))
            {
                healthReport.SourceId = healthReport.Observer;
            }

            if (string.IsNullOrEmpty(healthReport.Property))
            {
                switch (healthReport.Observer)
                {
                case ObserverConstants.AppObserverName:
                    healthReport.Property = "ApplicationHealth";
                    break;

                case ObserverConstants.CertificateObserverName:
                    healthReport.Property = "SecurityHealth";
                    break;

                case ObserverConstants.DiskObserverName:
                    healthReport.Property = "DiskHealth";
                    break;

                case ObserverConstants.FabricSystemObserverName:
                    healthReport.Property = "FabricSystemServiceHealth";
                    break;

                case ObserverConstants.NetworkObserverName:
                    healthReport.Property = "NetworkHealth";
                    break;

                case ObserverConstants.OSObserverName:
                    healthReport.Property = "MachineInformation";
                    break;

                case ObserverConstants.NodeObserverName:
                    healthReport.Property = "MachineResourceHealth";
                    break;

                default:
                    healthReport.Property = $"{healthReport.Observer}_HealthProperty";
                    break;
                }
                ;
            }

            var healthInformation = new HealthInformation(healthReport.SourceId, healthReport.Property, healthReport.State)
            {
                Description       = $"{message}",
                TimeToLive        = timeToLive,
                RemoveWhenExpired = true,
            };

            // Log health event locally.
            if (healthReport.EmitLogEvent)
            {
                if (healthReport.State == HealthState.Error)
                {
                    this.logger.LogError(healthReport.NodeName + ": {0}", healthInformation.Description);
                }
                else if (healthReport.State == HealthState.Warning)
                {
                    this.logger.LogWarning(healthReport.NodeName + ": {0}", healthInformation.Description);
                }
                else
                {
                    this.logger.LogInfo(healthReport.NodeName + ": {0}", healthInformation.Description);
                }
            }

            // To SFX.
            if (healthReport.ReportType == HealthReportType.Application && healthReport.AppName != null)
            {
                var appHealthReport = new ApplicationHealthReport(healthReport.AppName, healthInformation);
                this.fabricClient.HealthManager.ReportHealth(appHealthReport, sendOptions);
            }
            else if (healthReport.ReportType == HealthReportType.Service && healthReport.ServiceName != null)
            {
                var serviceHealthReport = new ServiceHealthReport(healthReport.ServiceName, healthInformation);
                this.fabricClient.HealthManager.ReportHealth(serviceHealthReport, sendOptions);
            }
            else if (healthReport.ReportType == HealthReportType.StatefulService &&
                     healthReport.PartitionId != Guid.Empty && healthReport.ReplicaOrInstanceId > 0)
            {
                var statefulServiceHealthReport = new StatefulServiceReplicaHealthReport(healthReport.PartitionId, healthReport.ReplicaOrInstanceId, healthInformation);
                this.fabricClient.HealthManager.ReportHealth(statefulServiceHealthReport, sendOptions);
            }
            else if (healthReport.ReportType == HealthReportType.StatelessService &&
                     healthReport.PartitionId != Guid.Empty && healthReport.ReplicaOrInstanceId > 0)
            {
                var statelessServiceHealthReport = new StatelessServiceInstanceHealthReport(healthReport.PartitionId, healthReport.ReplicaOrInstanceId, healthInformation);
                this.fabricClient.HealthManager.ReportHealth(statelessServiceHealthReport, sendOptions);
            }
            else if (healthReport.ReportType == HealthReportType.Partition && healthReport.PartitionId != Guid.Empty)
            {
                var partitionHealthReport = new PartitionHealthReport(healthReport.PartitionId, healthInformation);
                this.fabricClient.HealthManager.ReportHealth(partitionHealthReport, sendOptions);
            }
            else if (healthReport.ReportType == HealthReportType.DeployedApplication && healthReport.AppName != null)
            {
                var deployedApplicationHealthReport = new DeployedApplicationHealthReport(healthReport.AppName, healthReport.NodeName, healthInformation);
                this.fabricClient.HealthManager.ReportHealth(deployedApplicationHealthReport, sendOptions);
            }
            else
            {
                var nodeHealthReport = new NodeHealthReport(healthReport.NodeName, healthInformation);
                this.fabricClient.HealthManager.ReportHealth(nodeHealthReport, sendOptions);
            }
        }
        /// <summary>
        /// Returns a health report
        /// </summary>
        /// <param name="context">The service fabric context that the health report is for</param>
        /// <param name="reportSourceId">The unique reporting source id</param>
        /// <param name="propertyName">The name of the health property being reported on</param>
        /// <param name="state">The current state of the health property</param>
        /// <param name="timeToLive">The time to live of the health report</param>
        /// <param name="reportType">The entity type the report is for</param>
        /// <returns>A health report for the appropriate reporting entity</returns>
        public static HealthReport GetHealthReport(ServiceContext context, string reportSourceId, string propertyName, HealthState state, ReportTypes reportType, TimeSpan timeToLive)
        {
            HealthReport report;
            var information = new HealthInformation(reportSourceId, propertyName, state);

            information.Description = $"{ propertyName } health state { Enum.GetName(typeof(HealthState), state) }";
            information.RemoveWhenExpired = true;
            information.TimeToLive = timeToLive;
            information.SequenceNumber = HealthInformation.AutoSequenceNumber;

            switch (reportType)
            {
                case ReportTypes.Cluster:
                    report = new ClusterHealthReport(information);
                    break;

                case ReportTypes.Application:
                    report = new ApplicationHealthReport(new Uri(context.CodePackageActivationContext.ApplicationName), information);
                    break;

                case ReportTypes.DeployedApplication:
                    report = new DeployedApplicationHealthReport(new Uri(context.CodePackageActivationContext.ApplicationName), context.NodeContext.NodeName, information);
                    break;

                case ReportTypes.Service:
                    report = new ServiceHealthReport(context.ServiceName, information);
                    break;

                case ReportTypes.DeployedService:
                    report = new DeployedServicePackageHealthReport(new Uri(context.CodePackageActivationContext.ApplicationName), context.CodePackageActivationContext.GetServiceManifestName(), context.NodeContext.NodeName, information);
                    break;

                case ReportTypes.Node:
                    report = new NodeHealthReport(context.NodeContext.NodeName, information);
                    break;

                case ReportTypes.Instance:
                    if (context is StatelessServiceContext)
                    {
                        report = new StatelessServiceInstanceHealthReport(context.PartitionId, context.ReplicaOrInstanceId, information);
                    }
                    else
                    {
                        report = new StatefulServiceReplicaHealthReport(context.PartitionId, context.ReplicaOrInstanceId, information);
                    }
                    break;

                default:
                    throw new ArgumentException("Unknown health type", nameof(reportType));
            }

            return report;
        }