public void CanScaleBetween(string fromString, string toString) { var from = RedisClusterSize.TryParse(fromString).ThrowIfFailure(); var to = RedisClusterSize.TryParse(toString).ThrowIfFailure(); Assert.True(RedisScalingUtilities.CanScale(from, to)); }
private Result <ModelOutput> Predict(RedisClusterSize currentClusterSize, ModelContext modelContext) { var shortestPaths = ComputeAllowedPaths(currentClusterSize, modelContext); var eligibleClusterSizes = shortestPaths .Select(kvp => (Size: kvp.Key, Node: kvp.Value)) // Find all plans that we can reach from the current one via scaling operations, and that we allow scaling to .Where(entry => entry.Node.ShortestDistanceFromSource != double.PositiveInfinity && IsScalingAllowed(currentClusterSize, entry.Size, modelContext)) // Compute the cost of taking the given route .Select(entry => (entry.Size, entry.Node, Cost: CostFunction(currentClusterSize, entry.Size, modelContext, shortestPaths))) .ToList(); // Rank them by cost ascending var costSorted = eligibleClusterSizes .OrderBy(pair => pair.Cost) .ToList(); if (costSorted.Count == 0) { return(new Result <ModelOutput>(errorMessage: "No cluster size available for scaling")); } return(new ModelOutput( targetClusterSize: costSorted[0].Size, modelContext: modelContext, cost: costSorted[0].Cost, scalePath: RedisScalingUtilities.ComputeShortestPath(shortestPaths, currentClusterSize, costSorted[0].Size))); }
private static Result <ModelOutput> Predict(RedisClusterSize currentClusterSize, ModelContext modelContext) { // TODO: autoscaler should consider the server load percentage as well. If a shard had a very high load // percentage, it means that it is for some reason receiving an uneven load. Hence, adding shards helps in // this situation. There is no easy way to add that to the current model. Ideas: // - If any server reached a load >70% at any time in the period analyzed, we need to guarantee that // there's at least as many shards as there were before (i.e. no downscales are allowed). var shortestPaths = ComputeAllowedPaths(currentClusterSize, modelContext); var eligibleClusterSizes = shortestPaths .Select(kvp => (Size: kvp.Key, Node: kvp.Value)) // Find all plans that we can reach from the current one via scaling operations, and that we allow scaling to .Where(entry => entry.Node.ShortestDistanceFromSource != double.PositiveInfinity && IsScalingAllowed(currentClusterSize, entry.Size, modelContext)) // Compute the cost of taking the given route .Select(entry => (entry.Size, entry.Node, Cost: CostFunction(currentClusterSize, entry.Size, modelContext, shortestPaths))) .ToList(); // Rank them by cost ascending var costSorted = eligibleClusterSizes .OrderBy(pair => pair.Cost) .ToList(); if (costSorted.Count == 0) { return(new Result <ModelOutput>(errorMessage: "No cluster size available for scaling")); } return(new ModelOutput( targetClusterSize: costSorted[0].Size, modelContext: modelContext, cost: costSorted[0].Cost, scalePath: RedisScalingUtilities.ComputeShortestPath(shortestPaths, currentClusterSize, costSorted[0].Size))); }
public void DownscaleManualExamples(string fromString, string toString) { var from = RedisClusterSize.TryParse(fromString).ThrowIfFailure(); var to = RedisClusterSize.TryParse(toString).ThrowIfFailure(); Assert.True(RedisScalingUtilities.IsDownScale(from, to)); }
public void FailsOnNonExistantRoute() { var from = RedisClusterSize.Parse("P1/1"); var to = RedisClusterSize.Parse("P3/3"); var path = RedisScalingUtilities.ComputeShortestPath(from, to, size => new RedisClusterSize[] { }, (f, t) => 1); path.Should().BeEmpty(); }
public void SucceedsOnSimpleRoute() { var from = RedisClusterSize.Parse("P1/1"); var to = RedisClusterSize.Parse("P3/3"); var path = RedisScalingUtilities.ComputeShortestPath(from, to, size => size.ScaleEligibleSizes, (f, t) => 1); path.Should().BeEquivalentTo(new RedisClusterSize[] { RedisClusterSize.Parse("P3/1"), RedisClusterSize.Parse("P3/3") }); }
public void CanFindEmptyRoute() { var from = RedisClusterSize.Parse("P1/1"); var to = RedisClusterSize.Parse("P1/1"); var path = RedisScalingUtilities.ComputeShortestPath(from, to, size => size.ScaleEligibleSizes, (f, t) => 1); path.Should().BeEmpty(); }
public void CanFindSingleRoute() { var from = RedisClusterSize.Parse("P1/1"); var to = RedisClusterSize.Parse("P1/2"); var path = RedisScalingUtilities.ComputeShortestPath(from, to, size => size.ScaleEligibleSizes, (f, t) => 1); path.Count.Should().Be(1); path[0].Should().Be(to); }
private static IReadOnlyDictionary <RedisClusterSize, RedisScalingUtilities.Node> ComputeAllowedPaths(RedisClusterSize currentClusterSize, ModelContext modelContext) { // We need to reach the target cluster size, but we can't do it in one shot because business rules won't // let us, so we need to compute a path to get to it. This is probably the most complex part of the // algorithm, there are several competing aspects we want to optimize for, in descending importance: // - We want for memory to get to the target level ASAP // - We want to keep the number of shards as stable as possible, given that changing them can cause build // failures // - We'd like to get there in the fewest amount of time possible // - The route needs to be deterministic, so that if we are forced to stop and re-compute it we'll take // the same route. // - We'd like to minimize the cost of the route // Multi-constraint optimization over graphs is NP-complete and algorithms are hard to come up with, so we // do our best. Func <RedisClusterSize, IEnumerable <RedisClusterSize> > neighbors = currentClusterSize => currentClusterSize.ScaleEligibleSizes.Where(targetClusterSize => { // Constrain paths to downscale at most one shard at the time. This only makes paths longer, so it // is safe. The reason behind this is that the service doesn't really tolerate big reductions. if (targetClusterSize.Shards < currentClusterSize.Shards) { return(targetClusterSize.Shards == currentClusterSize.Shards - 1); } return(true); }); Func <RedisClusterSize, RedisClusterSize, double> weight = (from, to) => { // This factor is used to avoid transitioning to any kind of intermediate plan that may cause a // production outage. If we don't have it, we may transition into a state in which we have less // cluster memory available than we need. By adjusting the weight function, we guarantee that // this only happens iff there is no better path; moreover, we will always choose the lesser of // two evils if given no choice. double clusterMemoryPenalization = 0; var delta = to.ClusterMemorySizeMb - modelContext.MinimumAllowedClusterMemoryMb; if (delta < 0) { // The amount of cluster memory is less than we need, so we penalize taking this path by // adding the amount of memory that keeps us away from the target. clusterMemoryPenalization = -delta; } // This needs to be at least one so we don't pick minimum paths that are arbitrarily long return(1 + clusterMemoryPenalization); }; return(RedisScalingUtilities.ComputeOneToAllShortestPath(vertices: RedisClusterSize.Instances, neighbors: neighbors, weight: weight, from: currentClusterSize)); }
/// <summary> /// Decides whether a scaling move is allowed. At this point, we don't know if Azure Cache for Redis business /// rules allow scaling from the current to the target size. We just decide whether it is reasonable based on /// our knowledge of our production workload. /// /// The autoscaler will figure out how to reach the desired plan. /// </summary> private bool IsScalingAllowed( RedisClusterSize currentClusterSize, RedisClusterSize targetClusterSize, ModelContext modelContext) { // WARNING: order matters in the following if statements. Please be careful. // Cluster must be able to handle the amount of data we'll give it, with some overhead in case of // production issues. Notice we don't introduce a per-shard restriction; reason for this is that the shards // distribute keys evenly. if (targetClusterSize.ClusterMemorySizeMb < modelContext.MinimumAllowedClusterMemoryMb) { return(false); } // Cluster must be able to handle the amount of operations needed. Notice we don't introduce a per-shard // restriction; reason for this is that the shards distribute keys evenly. if (targetClusterSize.EstimatedRequestsPerSecond < modelContext.MinimumAllowedClusterRps) { return(false); } // Disallow going over the maximum allowed cluster memory // NOTE: we only constrain on the target not being over the allowed size, rather than all nodes in the // path. The reason for this is that our ability to reach all nodes is based on being able to scale above // any specific memory threshold. if (modelContext.MaximumAllowedClusterMemoryMb != null && targetClusterSize.ClusterMemorySizeMb > modelContext.MaximumAllowedClusterMemoryMb.Value) { return(false); } // Always allow not doing anything if it's available. // NOTE: this is here because in downscale situations we always want to ensure we have the "status quo" // action available. if (currentClusterSize.Equals(targetClusterSize)) { return(true); } // Disallow downscales that don't improve cost significantly if (_configuration.MinimumCostSavingForDownScaling != null) { var monthlyCostDelta = targetClusterSize.MonthlyCostUsd - currentClusterSize.MonthlyCostUsd; if (RedisScalingUtilities.IsDownScale(currentClusterSize, targetClusterSize) && monthlyCostDelta <= 0 && -monthlyCostDelta < _configuration.MinimumCostSavingForDownScaling) { return(false); } } return(true); }
public void CantScaleIfEitherInstanceOrPlanChanges() { var cantScaleRelation = from source in RedisClusterSize.Instances from target in RedisClusterSize.Instances where !source.Equals(target) where !RedisScalingUtilities.CanScale(source, target) select(source, target); foreach (var(source, target) in cantScaleRelation) { (!source.Tier.Equals(target.Tier) || source.Shards != target.Shards).Should().BeTrue($"{source} -> {target}"); } }
public void CanChangeShardsWithinSameTier() { foreach (var group in RedisClusterSize.Instances.GroupBy(size => size.Tier)) { var instances = group.ToList(); foreach (var inst1 in instances) { foreach (var inst2 in instances) { Assert.True(RedisScalingUtilities.CanScale(inst1, inst2)); } } } }
/// <summary> /// This function embodies the concept of "how much does it cost to switch from /// <paramref name="current"/> to <paramref name="target"/>". At this point, we can assume that: /// - The two input sizes are valid states to be in /// - We can reach the target from current via some amount of autoscaling operations /// Hence, we're just ranking amonst the many potential states. /// </summary> private static double CostFunction(RedisClusterSize current, RedisClusterSize target, ModelContext modelContext, IReadOnlyDictionary <RedisClusterSize, RedisScalingUtilities.Node> shortestPaths) { // Switching to the same size (i.e. no op) is free if (current.Equals(target)) { return(0); } var shortestPath = RedisScalingUtilities.ComputeShortestPath(shortestPaths, current, target); Contract.Assert(shortestPath.Count > 0); // Positive if we are spending more money, negative if we are saving return((double)(target.MonthlyCostUsd - current.MonthlyCostUsd)); }
public void LoweringTierIsDownscaling() { foreach (var source in RedisClusterSize.Instances) { var candidates = from size in source.ScaleEligibleSizes where size.Shards == source.Shards && RedisScalingUtilities.IsDownScale(source.Tier, size.Tier) select size; foreach (var to in candidates) { RedisScalingUtilities.IsDownScale(source, to).Should().BeTrue(); } } }
public void RemovingShardsIsDownscaling() { foreach (var source in RedisClusterSize.Instances) { var candidates = from size in source.ScaleEligibleSizes where size.Tier.Equals(source.Tier) && size.Shards < source.Shards select size; foreach (var to in candidates) { RedisScalingUtilities.IsDownScale(source, to).Should().BeTrue(); } } }
public void CanScaleAcrossPremiumPlansWhenShardsRemainEqual() { foreach (var group in RedisClusterSize .Instances .Where(size => size.Tier.Plan == RedisPlan.Premium) .GroupBy(size => size.Shards)) { var instances = group.ToList(); foreach (var inst1 in instances) { foreach (var inst2 in instances) { Assert.True(RedisScalingUtilities.CanScale(inst1, inst2)); } } } }