private async Task <bool> AttemptToScaleAsync(RuleContext context, IRedisInstance redisInstance, CancellationToken cancellationToken) { Contract.Requires(redisInstance.IsReadyToScale); // Fetch which cluster size we want, and start scaling operation if needed. var currentClusterSize = redisInstance.ClusterSize; var targetClusterSizeResult = await _redisAutoscalingAgent.EstimateBestClusterSizeAsync(context.IntoOperationContext(_configuration.Logger), redisInstance); if (!targetClusterSizeResult.Succeeded) { Emit(context, "Autoscale", Severity.Error, $"Failed to find best plan for instance `{redisInstance.Name}` in plan `{currentClusterSize}`. Result=[{targetClusterSizeResult}]"); return(false); } var modelOutput = targetClusterSizeResult.Value; Contract.AssertNotNull(modelOutput); var targetClusterSize = modelOutput.TargetClusterSize; if (targetClusterSize.Equals(redisInstance.ClusterSize) || modelOutput.ScalePath.Count == 0) { return(false); } Emit(context, "Autoscale", Severity.Warning, $"Autoscaling from `{currentClusterSize}` to `{targetClusterSize}` via scale path `{currentClusterSize} -> {string.Join(" -> ", modelOutput.ScalePath)}` for instance `{redisInstance.Name}`. Solution cost is `{modelOutput.Cost}`"); var scaleResult = await redisInstance.ScaleAsync(modelOutput.ScalePath, cancellationToken); if (!scaleResult) { Emit(context, "Autoscale", Severity.Error, $"Autoscale attempt from `{currentClusterSize}` to `{targetClusterSize}` for instance `{redisInstance.Name}` failed. Result=[{scaleResult}]"); scaleResult.ThrowIfFailure(); } return(true); }
private async Task <bool> AttemptToScaleAsync(RuleContext context, IRedisInstance redisInstance) { Contract.Requires(redisInstance.IsReadyToScale); var operationContext = context.IntoOperationContext(_configuration.Logger); // Fetch which cluster size we want, and start scaling operation if needed. var currentClusterSize = redisInstance.ClusterSize; var targetClusterSizeResult = await _redisAutoscalingAgent.EstimateBestClusterSizeAsync(operationContext, redisInstance); if (!targetClusterSizeResult.Succeeded) { Emit(context, "Autoscale", Severity.Error, $"Failed to find best plan for instance `{redisInstance.Name}` in plan `{currentClusterSize}`. Result=[{targetClusterSizeResult}]"); return(false); } var modelOutput = targetClusterSizeResult.Value; Contract.AssertNotNull(modelOutput); var targetClusterSize = modelOutput.TargetClusterSize; if (targetClusterSize.Equals(currentClusterSize) || modelOutput.ScalePath.Count == 0) { return(false); } if (RedisScalingUtilities.IsDownScale(currentClusterSize, targetClusterSize)) { // Downscales are typically about saving money rather than system health, hence, it's deprioritized. // Force downscales to happen during very comfortable business hours in PST, to ensure we're always // available if things go wrong. We disregard holidays because it's a pain to handle them. if (!TimeConstraints.BusinessHours.SatisfiedPST(_configuration.Clock.UtcNow)) { Emit(context, "Autoscale", Severity.Info, $"Refused autoscale from `{currentClusterSize}` to `{targetClusterSize}` via scale path `{currentClusterSize} -> {string.Join(" -> ", modelOutput.ScalePath)}` for instance `{redisInstance.Name}` due to business hours constraints"); return(false); } // Downscales are performed in phases instead of all at once. If the model proposes an autoscale, we'll // only take the first step of it in the current iteration, and force wait some amount of time until we // allow this instance to be downscaled again. This gives some time to evaluate the effects of the last // downscale (which typically takes time because migration's effects on instance memory and cpu load // take some time to see). // // The intent of this measure is to avoid situations where our downscale causes heightened load in the // remaining shards, forcing us to scale back to our original size after some time. This effect creates // "autoscale loops" over time. modelOutput.ScalePath = modelOutput.ScalePath.Take(1).ToList(); if (_lastAutoscaleTimeUtc.TryGetValue(redisInstance.Id, out var lastAutoscaleTimeUtc)) { var now = _configuration.Clock.UtcNow; if (now - lastAutoscaleTimeUtc < _configuration.MinimumWaitTimeBetweenDownscaleSteps) { return(false); } } } Emit(context, "Autoscale", Severity.Warning, $"Autoscaling from `{currentClusterSize}` ({currentClusterSize.MonthlyCostUsd} USD/mo) to `{targetClusterSize}` ({targetClusterSize.MonthlyCostUsd} USD/mo) via scale path `{currentClusterSize} -> {string.Join(" -> ", modelOutput.ScalePath)}` for instance `{redisInstance.Name}`. CostFunction=[{modelOutput.Cost}]"); var scaleResult = await redisInstance.ScaleAsync(operationContext, modelOutput.ScalePath); _lastAutoscaleTimeUtc[redisInstance.Id] = _configuration.Clock.UtcNow; if (!scaleResult) { Emit(context, "Autoscale", Severity.Error, $"Autoscale attempt from `{currentClusterSize}` to `{targetClusterSize}` for instance `{redisInstance.Name}` failed. Result=[{scaleResult}]"); scaleResult.ThrowIfFailure(); } return(true); }