public void CanScaleBetween(string fromString, string toString) { var from = RedisClusterSize.TryParse(fromString).ThrowIfFailure(); var to = RedisClusterSize.TryParse(toString).ThrowIfFailure(); Assert.True(RedisScalingUtilities.CanScale(from, to)); }
public void DownscaleManualExamples(string fromString, string toString) { var from = RedisClusterSize.TryParse(fromString).ThrowIfFailure(); var to = RedisClusterSize.TryParse(toString).ThrowIfFailure(); Assert.True(RedisScalingUtilities.IsDownScale(from, to)); }
private Result <ModelOutput> Predict(RedisClusterSize currentClusterSize, ModelContext modelContext) { var shortestPaths = ComputeAllowedPaths(currentClusterSize, modelContext); var eligibleClusterSizes = shortestPaths .Select(kvp => (Size: kvp.Key, Node: kvp.Value)) // Find all plans that we can reach from the current one via scaling operations, and that we allow scaling to .Where(entry => entry.Node.ShortestDistanceFromSource != double.PositiveInfinity && IsScalingAllowed(currentClusterSize, entry.Size, modelContext)) // Compute the cost of taking the given route .Select(entry => (entry.Size, entry.Node, Cost: CostFunction(currentClusterSize, entry.Size, modelContext, shortestPaths))) .ToList(); // Rank them by cost ascending var costSorted = eligibleClusterSizes .OrderBy(pair => pair.Cost) .ToList(); if (costSorted.Count == 0) { return(new Result <ModelOutput>(errorMessage: "No cluster size available for scaling")); } return(new ModelOutput( targetClusterSize: costSorted[0].Size, modelContext: modelContext, cost: costSorted[0].Cost, scalePath: RedisScalingUtilities.ComputeShortestPath(shortestPaths, currentClusterSize, costSorted[0].Size))); }
private static Result <ModelOutput> Predict(RedisClusterSize currentClusterSize, ModelContext modelContext) { // TODO: autoscaler should consider the server load percentage as well. If a shard had a very high load // percentage, it means that it is for some reason receiving an uneven load. Hence, adding shards helps in // this situation. There is no easy way to add that to the current model. Ideas: // - If any server reached a load >70% at any time in the period analyzed, we need to guarantee that // there's at least as many shards as there were before (i.e. no downscales are allowed). var shortestPaths = ComputeAllowedPaths(currentClusterSize, modelContext); var eligibleClusterSizes = shortestPaths .Select(kvp => (Size: kvp.Key, Node: kvp.Value)) // Find all plans that we can reach from the current one via scaling operations, and that we allow scaling to .Where(entry => entry.Node.ShortestDistanceFromSource != double.PositiveInfinity && IsScalingAllowed(currentClusterSize, entry.Size, modelContext)) // Compute the cost of taking the given route .Select(entry => (entry.Size, entry.Node, Cost: CostFunction(currentClusterSize, entry.Size, modelContext, shortestPaths))) .ToList(); // Rank them by cost ascending var costSorted = eligibleClusterSizes .OrderBy(pair => pair.Cost) .ToList(); if (costSorted.Count == 0) { return(new Result <ModelOutput>(errorMessage: "No cluster size available for scaling")); } return(new ModelOutput( targetClusterSize: costSorted[0].Size, modelContext: modelContext, cost: costSorted[0].Cost, scalePath: RedisScalingUtilities.ComputeShortestPath(shortestPaths, currentClusterSize, costSorted[0].Size))); }
/// <summary> /// Decides whether a scaling move is allowed. At this point, we don't know if Azure Cache for Redis business /// rules allow scaling from the current to the target size. We just decide whether it is reasonable based on /// our knowledge of our production workload. /// /// The autoscaler will figure out how to reach the desired plan. /// </summary> private static bool IsScalingAllowed( RedisClusterSize current, RedisClusterSize target, ModelContext modelContext) { // Cluster must be able to handle the amount of data we'll give it, with some overhead in case of // production issues. Notice we don't introduce a per-shard restriction; reason for this is that the shards // distribute keys evenly. if (target.ClusterMemorySizeMb < modelContext.MinimumAllowedClusterMemoryMb) { return(false); } // Cluster must be able to handle the amount of operations needed. Notice we don't introduce a per-shard // restriction; reason for this is that the shards distribute keys evenly. if (target.EstimatedRequestsPerSecond < modelContext.MinimumAllowedClusterRps) { return(false); } // Disallow going over the maximum allowed cluster memory if (modelContext.MaximumAllowedClusterMemoryMb != null && target.ClusterMemorySizeMb > modelContext.MaximumAllowedClusterMemoryMb.Value) { return(false); } return(true); }
public ModelOutput(RedisClusterSize targetClusterSize, ModelContext modelContext, double cost, IReadOnlyList <RedisClusterSize> scalePath) { TargetClusterSize = targetClusterSize; ModelContext = modelContext; Cost = cost; ScalePath = scalePath; }
public void CanFindEmptyRoute() { var from = RedisClusterSize.Parse("P1/1"); var to = RedisClusterSize.Parse("P1/1"); var path = RedisScalingUtilities.ComputeShortestPath(from, to, size => size.ScaleEligibleSizes, (f, t) => 1); path.Should().BeEmpty(); }
public void FailsOnNonExistantRoute() { var from = RedisClusterSize.Parse("P1/1"); var to = RedisClusterSize.Parse("P3/3"); var path = RedisScalingUtilities.ComputeShortestPath(from, to, size => new RedisClusterSize[] { }, (f, t) => 1); path.Should().BeEmpty(); }
public void SucceedsOnSimpleRoute() { var from = RedisClusterSize.Parse("P1/1"); var to = RedisClusterSize.Parse("P3/3"); var path = RedisScalingUtilities.ComputeShortestPath(from, to, size => size.ScaleEligibleSizes, (f, t) => 1); path.Should().BeEquivalentTo(new RedisClusterSize[] { RedisClusterSize.Parse("P3/1"), RedisClusterSize.Parse("P3/3") }); }
public void CanFindSingleRoute() { var from = RedisClusterSize.Parse("P1/1"); var to = RedisClusterSize.Parse("P1/2"); var path = RedisScalingUtilities.ComputeShortestPath(from, to, size => size.ScaleEligibleSizes, (f, t) => 1); path.Count.Should().Be(1); path[0].Should().Be(to); }
/// <summary> /// Decides whether a scaling move is allowed. At this point, we don't know if Azure Cache for Redis business /// rules allow scaling from the current to the target size. We just decide whether it is reasonable based on /// our knowledge of our production workload. /// /// The autoscaler will figure out how to reach the desired plan. /// </summary> private bool IsScalingAllowed( RedisClusterSize currentClusterSize, RedisClusterSize targetClusterSize, ModelContext modelContext) { // WARNING: order matters in the following if statements. Please be careful. // Cluster must be able to handle the amount of data we'll give it, with some overhead in case of // production issues. Notice we don't introduce a per-shard restriction; reason for this is that the shards // distribute keys evenly. if (targetClusterSize.ClusterMemorySizeMb < modelContext.MinimumAllowedClusterMemoryMb) { return(false); } // Cluster must be able to handle the amount of operations needed. Notice we don't introduce a per-shard // restriction; reason for this is that the shards distribute keys evenly. if (targetClusterSize.EstimatedRequestsPerSecond < modelContext.MinimumAllowedClusterRps) { return(false); } // Disallow going over the maximum allowed cluster memory // NOTE: we only constrain on the target not being over the allowed size, rather than all nodes in the // path. The reason for this is that our ability to reach all nodes is based on being able to scale above // any specific memory threshold. if (modelContext.MaximumAllowedClusterMemoryMb != null && targetClusterSize.ClusterMemorySizeMb > modelContext.MaximumAllowedClusterMemoryMb.Value) { return(false); } // Always allow not doing anything if it's available. // NOTE: this is here because in downscale situations we always want to ensure we have the "status quo" // action available. if (currentClusterSize.Equals(targetClusterSize)) { return(true); } // Disallow downscales that don't improve cost significantly if (_configuration.MinimumCostSavingForDownScaling != null) { var monthlyCostDelta = targetClusterSize.MonthlyCostUsd - currentClusterSize.MonthlyCostUsd; if (RedisScalingUtilities.IsDownScale(currentClusterSize, targetClusterSize) && monthlyCostDelta <= 0 && -monthlyCostDelta < _configuration.MinimumCostSavingForDownScaling) { return(false); } } return(true); }
/// <summary> /// This function embodies the concept of "how much does it cost to switch from /// <paramref name="current"/> to <paramref name="target"/>". At this point, we can assume that: /// - The two input sizes are valid states to be in /// - We can reach the target from current via some amount of autoscaling operations /// Hence, we're just ranking amonst the many potential states. /// </summary> private static double CostFunction(RedisClusterSize current, RedisClusterSize target, ModelContext modelContext, IReadOnlyDictionary <RedisClusterSize, RedisScalingUtilities.Node> shortestPaths) { // Switching to the same size (i.e. no op) is free if (current.Equals(target)) { return(0); } var shortestPath = RedisScalingUtilities.ComputeShortestPath(shortestPaths, current, target); Contract.Assert(shortestPath.Count > 0); // Positive if we are spending more money, negative if we are saving return((double)(target.MonthlyCostUsd - current.MonthlyCostUsd)); }
public Task DisallowsLowCostDownscalesAsync() { return(RunTestAsync(async(operationContext, redisAutoscalingAgent) => { var redisInstance = new MockRedisInstance(RedisClusterSize.Parse("P2/1")); redisAutoscalingAgent.UsedMemoryBytes.Add("7.4 GB".ToSize()); redisAutoscalingAgent.OperationsPerSecond.Add(10000); var modelOutput = await redisAutoscalingAgent .EstimateBestClusterSizeAsync(operationContext, redisInstance) .ThrowIfFailureAsync(); modelOutput .ScalePath .Should() .BeEmpty(); })); }
public Task PrefersAddingShardsWhenMemoryGrowsAsync(string initialClusterSize, IEnumerable <string> expectedPath, string usedMemoryAcrossAllShards) { return(RunTestAsync(async(operationContext, redisAutoscalingAgent) => { redisAutoscalingAgent.UsedMemoryBytes.Add(usedMemoryAcrossAllShards.ToSize()); redisAutoscalingAgent.OperationsPerSecond.Add(10); var redisInstance = new MockRedisInstance(RedisClusterSize.Parse(initialClusterSize)); var modelOutput = await redisAutoscalingAgent .EstimateBestClusterSizeAsync(operationContext, redisInstance) .ThrowIfFailureAsync(); modelOutput .ScalePath .Should() .BeEquivalentTo(expectedPath.Select(size => RedisClusterSize.Parse(size))); })); }
private async Task ComputeServerLoadFeaturesAsync(OperationContext context, DateTime now, string redisAzureId, ModelContext modelContext, RedisClusterSize currentClusterSize) { var maximumServerLoadAcrossShardsList = await FetchMaximumServerLoadAcrossShardsAsync(context, now, redisAzureId); // Patch in case the list happens to be empty (might only ever happen in tests) var maximumServerLoadAcrossShards = maximumServerLoadAcrossShardsList.Any() ? maximumServerLoadAcrossShardsList.Max() : 0; if (maximumServerLoadAcrossShards < _configuration.MediumServerLoadPct) { return; } modelContext.MinimumNumberOfShardsAllowed = currentClusterSize.Shards; if (maximumServerLoadAcrossShards < _configuration.HighServerLoadPct) { return; } modelContext.DisallowChangingNumberOfShards = true; }
private async Task ComputeMemoryRelatedFeaturesAsync(OperationContext context, DateTime now, string redisAzureId, RedisClusterSize currentClusterSize, ModelContext modelContext) { var startTimeUtc = now - _configuration.UsedMemoryLookback; var endTimeUtc = now; var usedMemoryBytes = await _monitorManagementClient.GetMetricsWithDimensionAsync( redisAzureId, new[] { AzureRedisShardMetric.UsedMemory.ToMetricName() }, "ShardId", startTimeUtc, endTimeUtc, _configuration.UsedMemoryAggregationInterval, aggregations : new[] { AggregationType.Maximum }, context.Token); // NOTE: Measurement values may be null if we are querying for data that is not present (i.e. a shard that // has disappeared, or such). var groupedMetrics = usedMemoryBytes .SelectMany(kvp => kvp.Value.Select((measurement, index) => (measurement, index))) .GroupBy(entry => entry.index) .OrderBy(group => group.Key) .Select(group => group.Sum(entry => entry.measurement.Maximum ?? 0)) .ToList(); // Metric is reported in bytes, we use megabytes for everything var expectedClusterMemoryUsageMb = groupedMetrics.Max() / 1e+6; modelContext.MinimumAllowedClusterMemoryMb = (1 + _configuration.MinimumExtraMemoryAvailable) * expectedClusterMemoryUsageMb; modelContext.MaximumAllowedClusterMemoryMb = _configuration.MaximumClusterMemoryAllowedMb; }
private async Task ComputeWorkloadRelatedFeaturesAsync(OperationContext context, DateTime now, string redisAzureId, RedisClusterSize currentClusterSize, ModelContext modelContext) { var startTimeUtc = now - _configuration.WorkloadLookback; var endTimeUtc = now; var operationsPerSecond = await _monitorManagementClient.GetMetricsWithDimensionAsync( redisAzureId, new[] { AzureRedisShardMetric.OperationsPerSecond.ToMetricName() }, "ShardId", startTimeUtc, endTimeUtc, _configuration.WorkloadAggregationInterval, aggregations : new[] { AggregationType.Maximum }, context.Token); // NOTE: Measurement values may be null if we are querying for data that is not present (i.e. a shard that // has disappeared, or such). var groupedMetrics = operationsPerSecond .SelectMany(kvp => kvp.Value.Select((measurement, index) => (measurement, index))) .GroupBy(entry => entry.index) .OrderBy(group => group.Key) .Select(group => group.Sum(entry => entry.measurement.Maximum ?? 0)) .ToList(); if (groupedMetrics.Count == 0) { // If all metrics are missing, we won't constraint plans on having a certain minimum number of // operations. This is used to account for an Azure Monitor API bug whereby some metrics may not be // reported return; } // ops/s scales linearly with shards var expectedClusterRps = groupedMetrics.Max(); modelContext.MinimumAllowedClusterRps = (1 + _configuration.MinimumWorkloadExtraPct) * expectedClusterRps; }
private async Task <ModelContext> ComputeFeaturesAsync(OperationContext context, DateTime now, string redisAzureId, RedisClusterSize currentClusterSize) { var modelContext = new ModelContext(); await Task.WhenAll( ComputeMemoryRelatedFeaturesAsync(context, now, redisAzureId, currentClusterSize, modelContext), ComputeWorkloadRelatedFeaturesAsync(context, now, redisAzureId, currentClusterSize, modelContext) ); return(modelContext); }
private static IReadOnlyDictionary <RedisClusterSize, RedisScalingUtilities.Node> ComputeAllowedPaths(RedisClusterSize currentClusterSize, ModelContext modelContext) { // We need to reach the target cluster size, but we can't do it in one shot because business rules won't // let us, so we need to compute a path to get to it. This is probably the most complex part of the // algorithm, there are several competing aspects we want to optimize for, in descending importance: // - We want for memory to get to the target level ASAP // - We want to keep the number of shards as stable as possible, given that changing them can cause build // failures // - We'd like to get there in the fewest amount of time possible // - The route needs to be deterministic, so that if we are forced to stop and re-compute it we'll take // the same route. // - We'd like to minimize the cost of the route // Multi-constraint optimization over graphs is NP-complete and algorithms are hard to come up with, so we // do our best. Func <RedisClusterSize, IEnumerable <RedisClusterSize> > neighbors = currentClusterSize => currentClusterSize.ScaleEligibleSizes.Where(targetClusterSize => { // Constrain paths to downscale at most one shard at the time. This only makes paths longer, so it // is safe. The reason behind this is that the service doesn't really tolerate big reductions. if (targetClusterSize.Shards < currentClusterSize.Shards) { return(targetClusterSize.Shards == currentClusterSize.Shards - 1); } return(true); }); Func <RedisClusterSize, RedisClusterSize, double> weight = (from, to) => { // This factor is used to avoid transitioning to any kind of intermediate plan that may cause a // production outage. If we don't have it, we may transition into a state in which we have less // cluster memory available than we need. By adjusting the weight function, we guarantee that // this only happens iff there is no better path; moreover, we will always choose the lesser of // two evils if given no choice. double clusterMemoryPenalization = 0; var delta = to.ClusterMemorySizeMb - modelContext.MinimumAllowedClusterMemoryMb; if (delta < 0) { // The amount of cluster memory is less than we need, so we penalize taking this path by // adding the amount of memory that keeps us away from the target. clusterMemoryPenalization = -delta; } // This needs to be at least one so we don't pick minimum paths that are arbitrarily long return(1 + clusterMemoryPenalization); }; return(RedisScalingUtilities.ComputeOneToAllShortestPath(vertices: RedisClusterSize.Instances, neighbors: neighbors, weight: weight, from: currentClusterSize)); }
internal ReadOnlyRedisInstance(IAzure azure, string resourceId, IRedisCache redisCache, RedisClusterSize clusterSize) : base(azure, resourceId, redisCache, clusterSize) { }
public MockRedisInstance(RedisClusterSize clusterSize) { ClusterSize = clusterSize; }