/// <summary> /// Calculates a Scale Recommendation based on passed-in performance metrics. /// </summary> /// <param name="workerCount">The number of workers known to be processing messages for this task hub.</param> /// <param name="performanceHeartbeats">Previously collected, chronologically-ordered performance metrics.</param> /// <returns>Returns a scale recommendation</returns> public virtual ScaleRecommendation MakeScaleRecommendation(int workerCount, PerformanceHeartbeat[] performanceHeartbeats) { if (performanceHeartbeats == null || performanceHeartbeats.Length == 0) { return(new ScaleRecommendation(ScaleAction.None, keepWorkersAlive: true, reason: "No heartbeat metrics")); } int partitionCount = performanceHeartbeats.Last().PartitionCount; QueueMetricHistory workItemQueueLatencyHistory = new QueueMetricHistory(QueueLengthSampleSize); List <QueueMetricHistory> controlQueueLatencyHistory = new List <QueueMetricHistory>(); foreach (PerformanceHeartbeat heartbeat in performanceHeartbeats) { workItemQueueLatencyHistory.Add((int)heartbeat.WorkItemQueueLatency.TotalMilliseconds); for (int i = 0; i < heartbeat.ControlQueueLatencies.Count; ++i) { if (controlQueueLatencyHistory.Count <= i) { controlQueueLatencyHistory.Add(new QueueMetricHistory(QueueLengthSampleSize)); } controlQueueLatencyHistory[i].Add((int)heartbeat.ControlQueueLatencies[i].TotalMilliseconds); } } return(MakeScaleRecommendation(workerCount, partitionCount, workItemQueueLatencyHistory, controlQueueLatencyHistory)); }
static bool IsHighLatency(QueueMetricHistory history) { if (history.Previous == 0) { // If previous was zero, the queue may have been idle, which means // backoff polling might have been the reason for the latency. return(history.Latest >= MaxPollingLatency); } return(history.Latest >= HighLatencyThreshold); }
static bool IsIdle(QueueMetricHistory history) { return(history.IsAllZeros()); }
static bool IsLowLatency(QueueMetricHistory history) { return(history.Latest <= LowLatencyThreshold && history.Previous <= LowLatencyThreshold); }
ScaleRecommendation MakeScaleRecommendation(int workerCount) { // REVIEW: Is zero latency a reliable indicator of idle? bool taskHubIsIdle = IsIdle(this.WorkItemQueueLatencies) && this.ControlQueueLatencies.TrueForAll(IsIdle); if (workerCount == 0 && !taskHubIsIdle) { return(new ScaleRecommendation(ScaleAction.AddWorker, keepWorkersAlive: true, reason: "First worker")); } // Wait until we have enough samples before making specific recommendations if (!this.WorkItemQueueLatencies.IsFull || !this.ControlQueueLatencies.TrueForAll(h => h.IsFull)) { return(new ScaleRecommendation(ScaleAction.None, keepWorkersAlive: !taskHubIsIdle, reason: "Not enough samples")); } if (taskHubIsIdle) { return(new ScaleRecommendation( scaleAction: workerCount > 0 ? ScaleAction.RemoveWorker : ScaleAction.None, keepWorkersAlive: false, reason: "Task hub is idle")); } else if (IsHighLatency(this.WorkItemQueueLatencies)) { return(new ScaleRecommendation( ScaleAction.AddWorker, keepWorkersAlive: true, reason: $"Work-item queue latency: {this.WorkItemQueueLatencies.Latest} > {HighLatencyThreshold}")); } else if (workerCount > this.PartitionCount && IsIdle(this.WorkItemQueueLatencies)) { return(new ScaleRecommendation( ScaleAction.RemoveWorker, keepWorkersAlive: true, reason: $"Work-items idle, #workers > partitions ({workerCount} > {this.PartitionCount})")); } // Control queues are partitioned; only scale-out if there are more partitions than workers. if (workerCount < this.ControlQueueLatencies.Count(IsHighLatency)) { // Some control queues are busy, so scale out until workerCount == partitionCount. QueueMetricHistory metric = this.ControlQueueLatencies.First(IsHighLatency); return(new ScaleRecommendation( ScaleAction.AddWorker, keepWorkersAlive: true, reason: $"High control queue latency: {metric.Latest} > {HighLatencyThreshold}")); } else if (workerCount > this.ControlQueueLatencies.Count(h => !IsIdle(h)) && IsIdle(this.WorkItemQueueLatencies)) { // If the work item queues are idle, scale down to the number of non-idle control queues. return(new ScaleRecommendation( ScaleAction.RemoveWorker, keepWorkersAlive: this.ControlQueueLatencies.Any(IsIdle), reason: $"One or more control queues idle")); } else if (workerCount > 1) { // If all queues are operating efficiently, it can be hard to know if we need to reduce the worker count. // We want to avoid the case where a constant trickle of load after a big scale-out prevents scaling back in. // We also want to avoid scaling in unnecessarily when we've reached optimal scale-out. To balance these // goals, we check for low latencies and vote to scale down 10% of the time when we see this. The thought is // that it's a slow scale-in that will get automatically corrected once latencies start increasing again. bool tryRandomScaleDown = Random.Next(10) == 0; if (tryRandomScaleDown && this.ControlQueueLatencies.TrueForAll(IsLowLatency) && this.WorkItemQueueLatencies.TrueForAll(latency => latency < LowLatencyThreshold)) { return(new ScaleRecommendation( ScaleAction.RemoveWorker, keepWorkersAlive: true, reason: $"All queues are not busy")); } } // Load exists, but none of our scale filters were triggered, so we assume that the current worker // assignments are close to ideal for the current workload. return(new ScaleRecommendation(ScaleAction.None, keepWorkersAlive: true, reason: $"Queue latencies are healthy")); }