private void UpdateStateMetrics(ContainerTrackerStateMetrics metrics, ContainerInspectResponse container) { metrics.RestartCount.Set(container.RestartCount); if (container.State.Running) { metrics.RunningState.Set(1); } else if (container.State.Restarting) { metrics.RunningState.Set(0.5); } else { metrics.RunningState.Set(0); } if (container.State.Health != null) { // Publish container health if it exists if (container.State.Health.Status == "healthy") { metrics.HealthState.Set(1); } else if (container.State.Health.Status == "starting") { metrics.HealthState.Set(0.5); } else // "unhealthy" { metrics.HealthState.Set(0); } } else { // Makes sure to unpublish it if it wasn't initially published metrics.HealthState.Unpublish(); } if (container.State.Running && !string.IsNullOrWhiteSpace(container.State.StartedAt)) { metrics.StartTime.SetToTimeUtc(DateTimeOffset.Parse(container.State.StartedAt)); } }
private void UpdateStateMetrics(ContainerTrackerStateMetrics metrics, ContainerInspectResponse container) { metrics.RestartCount.Set(container.RestartCount); if (container.State.Running) { metrics.RunningState.Set(1); } else if (container.State.Restarting) { metrics.RunningState.Set(0.5); } else { metrics.RunningState.Set(0); } if (container.State.Running && !string.IsNullOrWhiteSpace(container.State.StartedAt)) { metrics.StartTime.SetToTimeUtc(DateTimeOffset.Parse(container.State.StartedAt)); } }
/// <summary> /// Requests the tracker to update its data set. /// </summary> /// <remarks> /// Method does not throw exceptions on transient failures, merely logs and ignores them. /// </remarks> public async Task TryUpdateAsync(DockerClient client, CancellationToken cancel) { ContainerInspectResponse container; var resourceStatsRecorder = new StatsRecorder(); try { // First, inspect to get some basic information. using (_metrics.InspectContainerDuration.NewTimer()) container = await client.Containers.InspectContainerAsync(Id, cancel); // Then query for the latest resource usage stats (if container is running). if (container.State.Running) { using var statsTimer = _metrics.GetResourceStatsDuration.NewTimer(); await client.Containers.GetContainerStatsAsync(Id, new ContainerStatsParameters { Stream = false // Only get latest, then stop. }, resourceStatsRecorder, cancel); } } catch (Exception ex) { _metrics.FailedProbeCount.Inc(); _log.Error(Helpers.Debug.GetAllExceptionMessages(ex)); _log.Debug(ex.ToString()); // Only to verbose output. // Errors are ignored - if we fail to get data, we just skip an update and log the failure. // The next update will hopefully get past the error. For now, we just unpublish. Unpublish(); return; } // If anything goes wrong below, it is a fatal error not to be ignored, so not in the try block. // Now that we have the data assembled, update the metrics. if (_stateMetrics == null) { _log.Debug($"First update of state metrics for {DisplayName} ({Id})."); _stateMetrics = new ContainerTrackerStateMetrics(DisplayName); } UpdateStateMetrics(_stateMetrics, container); if (resourceStatsRecorder.Response != null) { if (_resourceMetrics == null) { _log.Debug($"Initializing resource metrics for {DisplayName} ({Id})."); _resourceMetrics = new ContainerTrackerResourceMetrics(DisplayName); } UpdateResourceMetrics(_resourceMetrics, container, resourceStatsRecorder.Response); } else { // It could be we already had resource metrics and now they should go away. // They'll be recreated once we get the resource metrics again (e.g. after it starts). _resourceMetrics?.Dispose(); _resourceMetrics = null; } }