/// <summary> /// Decides whether a scaling move is allowed. At this point, we don't know if Azure Cache for Redis business /// rules allow scaling from the current to the target size. We just decide whether it is reasonable based on /// our knowledge of our production workload. /// /// The autoscaler will figure out how to reach the desired plan. /// </summary> public static bool IsScalingAllowed( RedisClusterSize current, RedisClusterSize target, ModelContext modelContext) { // Cluster must be able to handle the amount of data we'll give it, with some overhead in case of // production issues. Notice we don't introduce a per-shard restriction; reason for this is that the shards // distribute keys evenly. if (target.ClusterMemorySizeMb < modelContext.MinimumAllowedClusterMemoryMb) { return(false); } // Cluster must be able to handle the amount of operations needed. Notice we don't introduce a per-shard // restriction; reason for this is that the shards distribute keys evenly. if (target.EstimatedRequestsPerSecond < modelContext.MinimumAllowedClusterRps) { return(false); } // Disallow going over the maximum allowed cluster memory if (modelContext.MaximumAllowedClusterMemoryMb != null && target.ClusterMemorySizeMb > modelContext.MaximumAllowedClusterMemoryMb.Value) { return(false); } return(true); }
public static bool IsDownScale(RedisClusterSize from, RedisClusterSize to) { // Same tier (i.e. P3) means that we only care about shards if (from.Tier.Equals(to.Tier)) { return(from.Shards > to.Shards); } // Distinct tier, but same number of shards means we only care about tier if (from.Shards == to.Shards) { return(IsDownScale(from.Tier, to.Tier)); } // Distinct tier and distinct number of shards gets a bit more complicated, so we turn to looking at memory // capacity, server capacity, and cost if (from.ClusterMemorySizeMb > to.ClusterMemorySizeMb) { return(true); } if (from.EstimatedRequestsPerSecond > to.EstimatedRequestsPerSecond) { return(true); } if (from.MonthlyCostUsd > to.MonthlyCostUsd) { return(true); } return(false); }
public ModelOutput(RedisClusterSize targetClusterSize, ModelContext modelContext, double cost, IReadOnlyList <RedisClusterSize> scalePath) { TargetClusterSize = targetClusterSize; ModelContext = modelContext; Cost = cost; ScalePath = scalePath; }
private async Task <BoolResult> SubmitScaleRequestAsync(RedisClusterSize targetClusterSize, CancellationToken cancellationToken) { var instance = RedisCache.Update(); if (!ClusterSize.Tier.Equals(targetClusterSize.Tier)) { switch (targetClusterSize.Tier.Plan) { case RedisPlan.Basic: instance = instance.WithBasicSku(targetClusterSize.Tier.Capacity); break; case RedisPlan.Standard: instance = instance.WithStandardSku(targetClusterSize.Tier.Capacity); break; case RedisPlan.Premium: instance = instance.WithPremiumSku(targetClusterSize.Tier.Capacity); break; } } if (ClusterSize.Shards != targetClusterSize.Shards) { instance = instance.WithShardCount(targetClusterSize.Shards); } await instance.ApplyAsync(cancellationToken); return(BoolResult.Success); }
public static Result <ModelOutput> Predict(RedisClusterSize currentClusterSize, ModelContext modelContext) { var shortestPaths = ComputeAllowedPaths(currentClusterSize, modelContext); var eligibleClusterSizes = shortestPaths .Select(kvp => (Size: kvp.Key, Node: kvp.Value)) // Find all plans that we can reach from the current one via scaling operations, and that we allow scaling to .Where(entry => entry.Node.ShortestDistanceFromSource != double.PositiveInfinity && IsScalingAllowed(currentClusterSize, entry.Size, modelContext)) // Compute the cost of taking the given route .Select(entry => (entry.Size, entry.Node, Cost: CostFunction(currentClusterSize, entry.Size, modelContext, shortestPaths))) .ToList(); // Rank them by cost ascending var costSorted = eligibleClusterSizes .OrderBy(pair => pair.Cost) .ToList(); if (costSorted.Count == 0) { return(new Result <ModelOutput>(errorMessage: "No cluster size available for scaling")); } return(new ModelOutput( targetClusterSize: costSorted[0].Size, modelContext: modelContext, cost: costSorted[0].Cost, scalePath: RedisScalingUtilities.ComputeShortestPath(shortestPaths, currentClusterSize, costSorted[0].Size))); }
private RedisInstance(IAzure azure, string resourceId, IRedisCache redisCache, RedisClusterSize clusterSize) { Contract.RequiresNotNullOrEmpty(resourceId); _azure = azure; _resourceId = resourceId; RedisCache = redisCache; ClusterSize = clusterSize; }
public async Task <BoolResult> RefreshAsync(CancellationToken cancellationToken = default) { return((await GenerateInstanceMetadataAsync(_azure, _resourceId, cancellationToken)) .Select(result => { RedisCache = result.Cache; ClusterSize = result.Size; return BoolResult.Success; })); }
public static async Task <Result <(IRedisCache Cache, RedisClusterSize Size)> > GenerateInstanceMetadataAsync(IAzure azure, string resourceId, CancellationToken cancellationToken = default) { // TODO: error handling var redisCache = await azure.RedisCaches.GetByIdAsync(resourceId, cancellationToken); var clusterSize = RedisClusterSize.FromAzureCache(redisCache).ThrowIfFailure(); Contract.AssertNotNull(clusterSize); return(new Result <(IRedisCache Cache, RedisClusterSize Size)>((redisCache, clusterSize))); }
private Task <BoolResult> RequestScaleAsync(OperationContext context, RedisClusterSize targetClusterSize) { string extraMessage = $"CurrentClusterSize=[{ClusterSize}] TargetClusterSize=[{targetClusterSize}]"; return(context.PerformOperationAsync(Tracer, async() => { if (ClusterSize.Equals(targetClusterSize)) { return new BoolResult(errorMessage: $"No-op scale request attempted (`{ClusterSize}` -> `{targetClusterSize}`) on instance `{Name}`"); } if (!RedisScalingUtilities.CanScale(ClusterSize, targetClusterSize)) { return new BoolResult(errorMessage: $"Scale request `{ClusterSize}` -> `{targetClusterSize}` on instance `{Name}` is disallowed by Azure Cache for Redis"); } if (!IsReadyToScale) { return new BoolResult(errorMessage: $"Redis instance `{Name}` is not ready to scale, current provisioning state is `{RedisCache.ProvisioningState}`"); } var instance = RedisCache.Update(); if (!ClusterSize.Tier.Equals(targetClusterSize.Tier)) { switch (targetClusterSize.Tier.Plan) { case RedisPlan.Basic: instance = instance.WithBasicSku(targetClusterSize.Tier.Capacity); break; case RedisPlan.Standard: instance = instance.WithStandardSku(targetClusterSize.Tier.Capacity); break; case RedisPlan.Premium: instance = instance.WithPremiumSku(targetClusterSize.Tier.Capacity); break; } } if (ClusterSize.Shards != targetClusterSize.Shards) { instance = instance.WithShardCount(targetClusterSize.Shards); } await instance.ApplyAsync(context.Token); return BoolResult.Success; }, extraStartMessage: extraMessage, extraEndMessage: _ => extraMessage, pendingOperationTracingInterval: TimeSpan.FromMinutes(1))); }
public static IReadOnlyList <RedisClusterSize> ComputeShortestPath(RedisClusterSize from, RedisClusterSize to, Func <RedisClusterSize, IEnumerable <RedisClusterSize> > neighbors, Func <RedisClusterSize, RedisClusterSize, double> weight, IReadOnlyList <RedisClusterSize>?vertices = null) { if (from.Equals(to)) { return(Array.Empty <RedisClusterSize>()); } vertices ??= RedisClusterSize.Instances; var shortestPaths = ComputeOneToAllShortestPath(vertices, neighbors, weight, from); return(ComputeShortestPath(shortestPaths, from, to)); }
/// <summary> /// This function embodies the concept of "how much does it cost to switch from /// <paramref name="current"/> to <paramref name="target"/>". At this point, we can assume that: /// - The two input sizes are valid states to be in /// - We can reach the target from current via some amount of autoscaling operations /// Hence, we're just ranking amonst the many potential states. /// </summary> public static double CostFunction(RedisClusterSize current, RedisClusterSize target, ModelContext modelContext, IReadOnlyDictionary <RedisClusterSize, RedisScalingUtilities.Node> shortestPaths) { // Switching to the same size (i.e. no op) is free if (current.Equals(target)) { return(0); } var shortestPath = RedisScalingUtilities.ComputeShortestPath(shortestPaths, current, target); Contract.Assert(shortestPath.Count > 0); // Positive if we are spending more money, negative if we are saving return((double)(target.MonthlyCostUsd - current.MonthlyCostUsd)); }
public static Result <IRedisInstance> FromPreloaded(IAzure azure, IRedisCache redisCache, bool readOnly) { return(RedisClusterSize .FromAzureCache(redisCache) .Select(clusterSize => { if (readOnly) { return (IRedisInstance) new ReadOnlyRedisInstance(azure, redisCache.Id, redisCache, clusterSize); } else { return (IRedisInstance) new RedisInstance(azure, redisCache.Id, redisCache, clusterSize); } })); }
private async Task <BoolResult> RequestScaleAsync(RedisClusterSize targetClusterSize, CancellationToken cancellationToken = default) { if (ClusterSize.Equals(targetClusterSize)) { return(new BoolResult(errorMessage: $"No-op scale request attempted (`{ClusterSize}` -> `{targetClusterSize}`) on instance `{Name}`")); } if (!RedisScalingUtilities.CanScale(ClusterSize, targetClusterSize)) { return(new BoolResult(errorMessage: $"Scale request `{ClusterSize}` -> `{targetClusterSize}` on instance `{Name}` is disallowed by Azure Cache for Redis")); } if (!IsReadyToScale) { return(new BoolResult(errorMessage: $"Redis instance `{Name}` is not ready to scale, current provisioning state is `{RedisCache.ProvisioningState}`")); } var instance = RedisCache.Update(); if (!ClusterSize.Tier.Equals(targetClusterSize.Tier)) { switch (targetClusterSize.Tier.Plan) { case RedisPlan.Basic: instance = instance.WithBasicSku(targetClusterSize.Tier.Capacity); break; case RedisPlan.Standard: instance = instance.WithStandardSku(targetClusterSize.Tier.Capacity); break; case RedisPlan.Premium: instance = instance.WithPremiumSku(targetClusterSize.Tier.Capacity); break; } } if (ClusterSize.Shards != targetClusterSize.Shards) { instance = instance.WithShardCount(targetClusterSize.Shards); } await instance.ApplyAsync(cancellationToken); return(BoolResult.Success); }
private async Task <BoolResult> RequestScaleAsync(RedisClusterSize targetClusterSize, CancellationToken cancellationToken = default) { if (ClusterSize.Equals(targetClusterSize)) { return(new BoolResult(errorMessage: $"No-op scale request attempted (`{ClusterSize}` -> `{targetClusterSize}`) on instance `{Name}`")); } if (!RedisScalingUtilities.CanScale(ClusterSize, targetClusterSize)) { return(new BoolResult(errorMessage: $"Scale request `{ClusterSize}` -> `{targetClusterSize}` on instance `{Name}` is disallowed by Azure Cache for Redis")); } if (!IsReadyToScale) { return(new BoolResult(errorMessage: $"Redis instance `{Name}` is not ready to scale, current provisioning state is `{RedisCache.ProvisioningState}`")); } return(await SubmitScaleRequestAsync(targetClusterSize, cancellationToken)); }
public static bool CanScale(RedisClusterSize from, RedisClusterSize to) { if (from.Equals(to)) { return(true); } if (!CanScale(from.Tier, to.Tier)) { return(false); } if (from.Shards != to.Shards && !from.Tier.Equals(to.Tier)) { // Azure can't change both shards and tiers at once, we need to do them one at a time. return(false); } return(true); }
public static TimeSpan ExpectedScalingDelay(RedisClusterSize from, RedisClusterSize to) { Contract.Requires(CanScale(from, to)); if (from.Equals(to)) { return(TimeSpan.Zero); } if (from.Tier.Equals(to.Tier)) { // The tier is the same, so autoscaling will be either adding or reducing shards var shardDelta = Math.Abs(from.Shards - to.Shards); return(TimeSpan.FromTicks(Constants.RedisScaleTimePerShard.Ticks * shardDelta)); } else { // Tier changed, which means the number of shards didn't. However, we will take the same amount of time // as the amount of shards that need to change tier. Contract.Assert(from.Shards == to.Shards); return(TimeSpan.FromTicks(Constants.RedisScaleTimePerShard.Ticks * from.Shards)); } }
public static Result <RedisInstance> FromPreloaded(IAzure azure, IRedisCache redisCache) { return(RedisClusterSize .FromAzureCache(redisCache) .Select(clusterSize => new RedisInstance(azure, redisCache.Id, redisCache, clusterSize))); }
internal RedisInstance(IAzure azure, string resourceId, IRedisCache redisCache, RedisClusterSize clusterSize) : base(azure, resourceId, redisCache, clusterSize) { }
public Node(RedisClusterSize clusterSize) { ClusterSize = clusterSize; }
public static Dictionary <RedisClusterSize, Node> ComputeOneToAllShortestPath(IReadOnlyList <RedisClusterSize> vertices, Func <RedisClusterSize, IEnumerable <RedisClusterSize> > neighbors, Func <RedisClusterSize, RedisClusterSize, double> weight, RedisClusterSize from) { // We need to find a valid scale order to reach the target cluster size from the current one. To find it, // create an implicit graph G = (V, E) where V is the set of Redis sizes, and E is the set of valid // scalings given by the CanScale relation. In this graph, finding a shortest path between the current and // target sizes is equivalent to figuring out a way to scale among them optimally, as given by whatever // weight function we choose. var translation = new Dictionary <RedisClusterSize, Node>(capacity: vertices.Count); var minPriorityQueue = new SortedSet <Node>(comparer: NodeComparer.Instance); foreach (var vertex in vertices) { var node = new Node(vertex); if (vertex.Equals(from)) { node.ShortestDistanceFromSource = 0; } minPriorityQueue.Add(node); translation[vertex] = node; } while (minPriorityQueue.Count > 0) { var node = minPriorityQueue.Min; Contract.AssertNotNull(node); minPriorityQueue.Remove(node); if (node.Visited) { continue; } node.Visited = true; foreach (var target in neighbors(node.ClusterSize)) { var adjacent = translation[target]; Contract.AssertNotNull(adjacent); var distanceThroughNode = node.ShortestDistanceFromSource + weight(node.ClusterSize, target); if (distanceThroughNode >= adjacent.ShortestDistanceFromSource) { continue; } // Typically, we'd like to do a decrease priority operation here. This is a work-around to avoid // using a more complex data structure. minPriorityQueue.Remove(adjacent); adjacent.ShortestDistanceFromSource = distanceThroughNode; adjacent.Predecessor = node; minPriorityQueue.Add(adjacent); } } return(translation); }
public static IReadOnlyList <RedisClusterSize> ComputeShortestPath(IReadOnlyDictionary <RedisClusterSize, Node> shortestPaths, RedisClusterSize from, RedisClusterSize to) { if (from.Equals(to)) { return(Array.Empty <RedisClusterSize>()); } var path = new List <Node>(); var current = shortestPaths[to]; while (current.Predecessor != null) { path.Add(current); current = current.Predecessor; } if (!current.ClusterSize.Equals(from)) { return(Array.Empty <RedisClusterSize>()); } path.Reverse(); return(path.Select(p => p.ClusterSize).ToList()); }
private async Task ComputeMemoryRelatedFeaturesAsync(DateTime now, string redisAzureId, RedisClusterSize currentClusterSize, ModelContext modelContext, CancellationToken cancellationToken = default) { var startTimeUtc = now - _configuration.UsedMemoryLookback; var endTimeUtc = now; // This maximum is the maximum across all shards. There's no way to tell what the memory usage is for // everything precisely without fetching metrics for each shard individually. var usedMemoryTasks = Enumerable.Range(0, 10) .Select(shard => _monitorManagementClient.GetMetricAsync( redisAzureId, AzureRedisShardMetric.UsedMemory.ToMetricName(shard: shard), startTimeUtc, endTimeUtc, _configuration.UsedMemoryAggregationInterval, AggregationType.Maximum, cancellationToken)); var usedMemoryBytes = await Task.WhenAll(usedMemoryTasks); var groupedMetrics = usedMemoryBytes .SelectMany(measurements => measurements.Select((measurement, index) => (measurement, index))) .GroupBy(entry => entry.index) .OrderBy(group => group.Key) .Select(group => group.Sum(entry => entry.measurement.Value ?? 0)) .ToList(); // Metric is reported in bytes, we use megabytes for everything var expectedClusterMemoryUsageMb = groupedMetrics.Max() / 1e+6; modelContext.MinimumAllowedClusterMemoryMb = (1 + _configuration.MinimumExtraMemoryAvailable) * expectedClusterMemoryUsageMb; modelContext.MaximumAllowedClusterMemoryMb = _configuration.MaximumClusterMemoryAllowedMb; }
private async Task ComputeWorkloadRelatedFeaturesAsync(DateTime now, string redisAzureId, RedisClusterSize currentClusterSize, ModelContext modelContext, CancellationToken cancellationToken) { var startTimeUtc = now - _configuration.WorkloadLookback; var endTimeUtc = now; // This maximum is the maximum across all shards. There's no way to tell what the memory usage is for // everything precisely without fetching metrics for each shard individually. var operationsPerSecondTasks = Enumerable.Range(0, 10) .Select(shard => _monitorManagementClient.GetMetricAsync( redisAzureId, AzureRedisShardMetric.OperationsPerSecond.ToMetricName(shard: shard), startTimeUtc, endTimeUtc, _configuration.WorkloadAggregationInterval, AggregationType.Maximum, cancellationToken)); var operationsPerSecond = await Task.WhenAll(operationsPerSecondTasks); var groupedMetrics = operationsPerSecond .SelectMany(measurements => measurements.Select((measurement, index) => (measurement, index))) .GroupBy(entry => entry.index) .OrderBy(group => group.Key) .Select(group => group.Sum(entry => entry.measurement.Value ?? 0)) .ToList(); // ops/s scales linearly with shards var expectedClusterRps = groupedMetrics.Max(); modelContext.MinimumAllowedClusterRps = (1 + _configuration.MinimumWorkloadExtraPct) * expectedClusterRps; }
private async Task <ModelContext> ComputeFeaturesAsync(DateTime now, string redisAzureId, RedisClusterSize currentClusterSize, CancellationToken cancellationToken = default) { var modelContext = new ModelContext(); await ComputeMemoryRelatedFeaturesAsync(now, redisAzureId, currentClusterSize, modelContext, cancellationToken); await ComputeWorkloadRelatedFeaturesAsync(now, redisAzureId, currentClusterSize, modelContext, cancellationToken); return(modelContext); }
public static IReadOnlyDictionary <RedisClusterSize, RedisScalingUtilities.Node> ComputeAllowedPaths(RedisClusterSize currentClusterSize, ModelContext modelContext) { // We need to reach the target cluster size, but we can't do it in one shot because business rules won't // let us, so we need to compute a path to get to it. This is probably the most complex part of the // algorithm, there are several competing aspects we want to optimize for, in descending importance: // - We want for memory to get to the target level ASAP // - We want to keep the number of shards as stable as possible, given that changing them can cause build // failures // - We'd like to get there in the fewest amount of time possible // - The route needs to be deterministic, so that if we are forced to stop and re-compute it we'll take // the same route. // - We'd like to minimize the cost of the route // Multi-constraint optimization over graphs is NP-complete and algorithms are hard to come up with, so we // do our best. Func <RedisClusterSize, IEnumerable <RedisClusterSize> > neighbors = currentClusterSize => currentClusterSize.ScaleEligibleSizes.Where(targetClusterSize => { // Constrain paths to downscale at most one shard at the time. This only makes paths longer, so it // is safe. The reason behind this is that the service doesn't really tolerate big reductions. if (targetClusterSize.Shards < currentClusterSize.Shards) { return(targetClusterSize.Shards == currentClusterSize.Shards - 1); } return(true); }); Func <RedisClusterSize, RedisClusterSize, double> weight = (from, to) => { // This factor is used to avoid transitioning to any kind of intermediate plan that may cause a // production outage. If we don't have it, we may transition into a state in which we have less // cluster memory available than we need. By adjusting the weight function, we guarantee that // this only happens iff there is no better path; moreover, we will always choose the lesser of // two evils if given no choice. double clusterMemoryPenalization = 0; var delta = to.ClusterMemorySizeMb - modelContext.MinimumAllowedClusterMemoryMb; if (delta < 0) { // The amount of cluster memory is less than we need, so we penalize taking this path by // adding the amount of memory that keeps us away from the target. clusterMemoryPenalization = -delta; } // This needs to be at least one so we don't pick minimum paths that are arbitrarily long return(1 + clusterMemoryPenalization); }; return(RedisScalingUtilities.ComputeOneToAllShortestPath(vertices: RedisClusterSize.Instances, neighbors: neighbors, weight: weight, from: currentClusterSize)); }