/// <summary> /// This function embodies the concept of "how much does it cost to switch from /// <paramref name="current"/> to <paramref name="target"/>". At this point, we can assume that: /// - The two input sizes are valid states to be in /// - We can reach the target from current via some amount of autoscaling operations /// Hence, we're just ranking amonst the many potential states. /// </summary> private static double CostFunction(RedisClusterSize current, RedisClusterSize target, ModelContext modelContext, IReadOnlyDictionary <RedisClusterSize, RedisScalingUtilities.Node> shortestPaths) { // Switching to the same size (i.e. no op) is free if (current.Equals(target)) { return(0); } var shortestPath = RedisScalingUtilities.ComputeShortestPath(shortestPaths, current, target); Contract.Assert(shortestPath.Count > 0); // Positive if we are spending more money, negative if we are saving return((double)(target.MonthlyCostUsd - current.MonthlyCostUsd)); }
private async Task ComputeServerLoadFeaturesAsync(OperationContext context, DateTime now, string redisAzureId, ModelContext modelContext, RedisClusterSize currentClusterSize) { var maximumServerLoadAcrossShardsList = await FetchMaximumServerLoadAcrossShardsAsync(context, now, redisAzureId); // Patch in case the list happens to be empty (might only ever happen in tests) var maximumServerLoadAcrossShards = maximumServerLoadAcrossShardsList.Any() ? maximumServerLoadAcrossShardsList.Max() : 0; if (maximumServerLoadAcrossShards < _configuration.MediumServerLoadPct) { return; } modelContext.MinimumNumberOfShardsAllowed = currentClusterSize.Shards; if (maximumServerLoadAcrossShards < _configuration.HighServerLoadPct) { return; } modelContext.DisallowChangingNumberOfShards = true; }
private async Task ComputeWorkloadFeaturesAsync(OperationContext context, DateTime now, string redisAzureId, ModelContext modelContext) { var groupedMetrics = await FetchOperationsPerSecondPerShardAsync(context, now, redisAzureId); if (groupedMetrics.Count == 0) { // If all metrics are missing, we won't constraint plans on having a certain minimum number of // operations. This is used to account for an Azure Monitor API bug whereby some metrics may not be // reported return; } // ops/s scales linearly with shards var expectedClusterRps = groupedMetrics.Max(); modelContext.MinimumAllowedClusterRps = (1 + _configuration.MinimumWorkloadExtraPct) * expectedClusterRps; }
private async Task ComputeMemoryFeaturesAsync(OperationContext context, DateTime now, string redisAzureId, ModelContext modelContext) { var groupedMetrics = await FetchMemoryUsedPerShardAsync(context, now, redisAzureId); // Metric is reported in bytes, we use megabytes for everything var expectedClusterMemoryUsageMb = groupedMetrics.Max() / 1e+6; modelContext.MinimumAllowedClusterMemoryMb = (1 + _configuration.MinimumExtraMemoryAvailable) * expectedClusterMemoryUsageMb; modelContext.MaximumAllowedClusterMemoryMb = _configuration.MaximumClusterMemoryAllowedMb; }
private static IReadOnlyDictionary <RedisClusterSize, RedisScalingUtilities.Node> ComputeAllowedPaths(RedisClusterSize currentClusterSize, ModelContext modelContext) { // We need to reach the target cluster size, but we can't do it in one shot because business rules won't // let us, so we need to compute a path to get to it. This is probably the most complex part of the // algorithm, there are several competing aspects we want to optimize for, in descending importance: // - We want for memory to get to the target level ASAP // - We want to keep the number of shards as stable as possible, given that changing them can cause build // failures // - We'd like to get there in the fewest amount of time possible // - The route needs to be deterministic, so that if we are forced to stop and re-compute it we'll take // the same route. // - We'd like to minimize the cost of the route // Multi-constraint optimization over graphs is NP-complete and algorithms are hard to come up with, so we // do our best. Func <RedisClusterSize, IEnumerable <RedisClusterSize> > neighbors = currentClusterSize => currentClusterSize.ScaleEligibleSizes.Where(targetClusterSize => { if (modelContext.DisallowChangingNumberOfShards && targetClusterSize.Shards != currentClusterSize.Shards) { return(false); } if (modelContext.MinimumNumberOfShardsAllowed != null && targetClusterSize.Shards < modelContext.MinimumNumberOfShardsAllowed.Value) { return(false); } // Constrain paths to downscale at most one shard at the time. This only makes paths longer, so it // is safe. The reason behind this is that the service doesn't really tolerate big reductions. if (targetClusterSize.Shards < currentClusterSize.Shards) { return(targetClusterSize.Shards == currentClusterSize.Shards - 1); } return(true); }); Func <RedisClusterSize, RedisClusterSize, double> weight = (from, to) => { // This factor is used to avoid transitioning to any kind of intermediate plan that may cause a // production outage. If we don't have it, we may transition into a state in which we have less // cluster memory available than we need. By adjusting the weight function, we guarantee that // this only happens iff there is no better path; moreover, we will always choose the lesser of // two evils if given no choice. double clusterMemoryPenalization = 0; var delta = to.ClusterMemorySizeMb - modelContext.MinimumAllowedClusterMemoryMb; if (delta < 0) { // The amount of cluster memory is less than we need, so we penalize taking this path by // adding the amount of memory that keeps us away from the target. clusterMemoryPenalization = -delta; } // This needs to be at least one so we don't pick minimum paths that are arbitrarily long return(1 + clusterMemoryPenalization); }; return(RedisScalingUtilities.ComputeOneToAllShortestPath(vertices: RedisClusterSize.Instances, neighbors: neighbors, weight: weight, from: currentClusterSize)); }