public async Task <DurableTaskTriggerMetrics> GetMetricsAsync() { DurableTaskTriggerMetrics metrics = new DurableTaskTriggerMetrics(); // Durable stores its own metrics, so we just collect them here PerformanceHeartbeat heartbeat = null; try { DisconnectedPerformanceMonitor performanceMonitor = this.GetPerformanceMonitor(); heartbeat = await performanceMonitor.PulseAsync(); } catch (StorageException e) { this.traceHelper.ExtensionWarningEvent(this.hubName, this.functionName.Name, string.Empty, e.ToString()); } if (heartbeat != null) { metrics.PartitionCount = heartbeat.PartitionCount; metrics.ControlQueueLengths = JsonConvert.SerializeObject(heartbeat.ControlQueueLengths); metrics.ControlQueueLatencies = JsonConvert.SerializeObject(heartbeat.ControlQueueLatencies); metrics.WorkItemQueueLength = heartbeat.WorkItemQueueLength; if (heartbeat.WorkItemQueueLatency != null) { metrics.WorkItemQueueLatency = heartbeat.WorkItemQueueLatency.ToString(); } } return(metrics); }
public async Task MonitorIdleTaskHubDisconnected() { var settings = new AzureStorageOrchestrationServiceSettings { StorageConnectionString = TestHelpers.GetTestStorageAccountConnectionString(), TaskHubName = nameof(MonitorIdleTaskHubDisconnected), PartitionCount = 4, }; var service = new AzureStorageOrchestrationService(settings); var monitor = new DisconnectedPerformanceMonitor(settings.StorageConnectionString, settings.TaskHubName); await service.DeleteAsync(); // A null heartbeat is expected when the task hub does not exist. PerformanceHeartbeat heartbeat = await monitor.PulseAsync(currentWorkerCount : 0); Assert.IsNull(heartbeat); await service.CreateAsync(); ScaleRecommendation recommendation; for (int i = 0; i < 10; i++) { heartbeat = await monitor.PulseAsync(currentWorkerCount : 0); Assert.IsNotNull(heartbeat); Assert.AreEqual(settings.PartitionCount, heartbeat.PartitionCount); Assert.AreEqual(settings.PartitionCount, heartbeat.ControlQueueLengths.Count); Assert.AreEqual(settings.PartitionCount, heartbeat.ControlQueueLatencies.Count); Assert.AreEqual(0, heartbeat.ControlQueueLengths.Count(l => l != 0)); Assert.AreEqual(0, heartbeat.ControlQueueLatencies.Count(l => l != TimeSpan.Zero)); Assert.AreEqual(0, heartbeat.WorkItemQueueLength); Assert.AreEqual(0.0, heartbeat.WorkItemQueueLatencyTrend); Assert.AreEqual(TimeSpan.Zero, heartbeat.WorkItemQueueLatency); recommendation = heartbeat.ScaleRecommendation; Assert.IsNotNull(recommendation); Assert.AreEqual(ScaleAction.None, recommendation.Action); Assert.AreEqual(false, recommendation.KeepWorkersAlive); Assert.IsNotNull(recommendation.Reason); } // If any workers are assigned, the recommendation should be to have them removed. heartbeat = await monitor.PulseAsync(currentWorkerCount : 1); recommendation = heartbeat.ScaleRecommendation; Assert.IsNotNull(recommendation); Assert.AreEqual(ScaleAction.RemoveWorker, recommendation.Action); Assert.AreEqual(false, recommendation.KeepWorkersAlive); Assert.IsNotNull(recommendation.Reason); }
private DisconnectedPerformanceMonitor GetPerformanceMonitor() { if (this.performanceMonitor == null) { if (this.storageConnectionString == null) { throw new ArgumentNullException(nameof(this.storageConnectionString)); } this.performanceMonitor = new DisconnectedPerformanceMonitor(this.storageConnectionString, this.hubName); } return(this.performanceMonitor); }
public DurableTaskScaleMonitor( string functionId, FunctionName functionName, string hubName, string storageConnectionString, EndToEndTraceHelper traceHelper, DisconnectedPerformanceMonitor performanceMonitor = null) { this.functionId = functionId; this.functionName = functionName; this.hubName = hubName; this.storageConnectionString = storageConnectionString; this.performanceMonitor = performanceMonitor; this.traceHelper = traceHelper; this.scaleMonitorDescriptor = new ScaleMonitorDescriptor($"{this.functionId}-DurableTaskTrigger-{this.hubName}".ToLower()); }
public async Task MonitorIncreasingControlQueueLoadDisconnected() { var settings = new AzureStorageOrchestrationServiceSettings() { StorageConnectionString = TestHelpers.GetTestStorageAccountConnectionString(), TaskHubName = nameof(MonitorIncreasingControlQueueLoadDisconnected), PartitionCount = 4, }; var service = new AzureStorageOrchestrationService(settings); var monitor = new DisconnectedPerformanceMonitor(settings.StorageConnectionString, settings.TaskHubName); int simulatedWorkerCount = 0; await service.CreateAsync(); // A heartbeat should come back with no recommendation since there is no data. PerformanceHeartbeat heartbeat = await monitor.PulseAsync(simulatedWorkerCount); Assert.IsNotNull(heartbeat); Assert.IsNotNull(heartbeat.ScaleRecommendation); Assert.AreEqual(ScaleAction.None, heartbeat.ScaleRecommendation.Action); Assert.IsFalse(heartbeat.ScaleRecommendation.KeepWorkersAlive); var client = new TaskHubClient(service); var previousTotalLatency = TimeSpan.Zero; for (int i = 1; i < settings.PartitionCount + 10; i++) { await client.CreateOrchestrationInstanceAsync(typeof(NoOpOrchestration), input : null); heartbeat = await monitor.PulseAsync(simulatedWorkerCount); Assert.IsNotNull(heartbeat); ScaleRecommendation recommendation = heartbeat.ScaleRecommendation; Assert.IsNotNull(recommendation); Assert.IsTrue(recommendation.KeepWorkersAlive); Assert.AreEqual(settings.PartitionCount, heartbeat.PartitionCount); Assert.AreEqual(settings.PartitionCount, heartbeat.ControlQueueLengths.Count); Assert.AreEqual(i, heartbeat.ControlQueueLengths.Sum()); Assert.AreEqual(0, heartbeat.WorkItemQueueLength); Assert.AreEqual(TimeSpan.Zero, heartbeat.WorkItemQueueLatency); TimeSpan currentTotalLatency = TimeSpan.FromTicks(heartbeat.ControlQueueLatencies.Sum(ts => ts.Ticks)); Assert.IsTrue(currentTotalLatency > previousTotalLatency); if (i + 1 < DisconnectedPerformanceMonitor.QueueLengthSampleSize) { int queuesWithNonZeroLatencies = heartbeat.ControlQueueLatencies.Count(t => t > TimeSpan.Zero); Assert.IsTrue(queuesWithNonZeroLatencies > 0 && queuesWithNonZeroLatencies <= i); int queuesWithAtLeastOneMessage = heartbeat.ControlQueueLengths.Count(l => l > 0); Assert.IsTrue(queuesWithAtLeastOneMessage > 0 && queuesWithAtLeastOneMessage <= i); ScaleAction expectedScaleAction = simulatedWorkerCount == 0 ? ScaleAction.AddWorker : ScaleAction.None; Assert.AreEqual(expectedScaleAction, recommendation.Action); } else { // Validate that control queue latencies are going up with each iteration. Assert.IsTrue(currentTotalLatency.Ticks > previousTotalLatency.Ticks); previousTotalLatency = currentTotalLatency; } Assert.AreEqual(0, heartbeat.WorkItemQueueLength); Assert.AreEqual(0.0, heartbeat.WorkItemQueueLatencyTrend); if (recommendation.Action == ScaleAction.AddWorker) { simulatedWorkerCount++; } // The high-latency threshold is 1 second Thread.Sleep(TimeSpan.FromSeconds(1.1)); } }
private ScaleStatus GetScaleStatusCore(int workerCount, DurableTaskTriggerMetrics[] metrics) { var scaleStatus = new ScaleStatus() { Vote = ScaleVote.None }; if (metrics == null) { return(scaleStatus); } var heartbeats = new PerformanceHeartbeat[metrics.Length]; for (int i = 0; i < metrics.Length; ++i) { TimeSpan workItemQueueLatency; bool parseResult = TimeSpan.TryParse(metrics[i].WorkItemQueueLatency, out workItemQueueLatency); heartbeats[i] = new PerformanceHeartbeat() { PartitionCount = metrics[i].PartitionCount, WorkItemQueueLatency = parseResult ? workItemQueueLatency : TimeSpan.FromMilliseconds(0), WorkItemQueueLength = metrics[i].WorkItemQueueLength, }; if (metrics[i].ControlQueueLengths == null) { heartbeats[i].ControlQueueLengths = new List <int>(); } else { heartbeats[i].ControlQueueLengths = JsonConvert.DeserializeObject <IReadOnlyList <int> >(metrics[i].ControlQueueLengths); } if (metrics[i].ControlQueueLatencies == null) { heartbeats[i].ControlQueueLatencies = new List <TimeSpan>(); } else { heartbeats[i].ControlQueueLatencies = JsonConvert.DeserializeObject <IReadOnlyList <TimeSpan> >(metrics[i].ControlQueueLatencies); } } DisconnectedPerformanceMonitor performanceMonitor = this.GetPerformanceMonitor(); var scaleRecommendation = performanceMonitor.MakeScaleRecommendation(workerCount, heartbeats.ToArray()); bool writeToUserLogs = false; switch (scaleRecommendation?.Action) { case ScaleAction.AddWorker: scaleStatus.Vote = ScaleVote.ScaleOut; writeToUserLogs = true; break; case ScaleAction.RemoveWorker: scaleStatus.Vote = ScaleVote.ScaleIn; writeToUserLogs = true; break; default: scaleStatus.Vote = ScaleVote.None; break; } this.traceHelper.ExtensionInformationalEvent( this.hubName, string.Empty, this.functionName.Name, $"Durable Functions Trigger Scale Decision: {scaleStatus.Vote.ToString()}, Reason: {scaleRecommendation?.Reason}", writeToUserLogs: writeToUserLogs); return(scaleStatus); }
private void SetStorageAccount(DurableTaskContext context) { var storageAccount = CloudStorageAccount.Parse(context.StorageAccount); _monitor = new DisconnectedPerformanceMonitor(storageAccount, context.TaskHub, context.MaxPollingIntervalMillisecond); }