/// <summary>
        /// Decides whether a scaling move is allowed. At this point, we don't know if Azure Cache for Redis business
        /// rules allow scaling from the current to the target size. We just decide whether it is reasonable based on
        /// our knowledge of our production workload.
        ///
        /// The autoscaler will figure out how to reach the desired plan.
        /// </summary>
        public static bool IsScalingAllowed(
            RedisClusterSize current,
            RedisClusterSize target,
            ModelContext modelContext)
        {
            // Cluster must be able to handle the amount of data we'll give it, with some overhead in case of
            // production issues. Notice we don't introduce a per-shard restriction; reason for this is that the shards
            // distribute keys evenly.
            if (target.ClusterMemorySizeMb < modelContext.MinimumAllowedClusterMemoryMb)
            {
                return(false);
            }

            // Cluster must be able to handle the amount of operations needed. Notice we don't introduce a per-shard
            // restriction; reason for this is that the shards distribute keys evenly.
            if (target.EstimatedRequestsPerSecond < modelContext.MinimumAllowedClusterRps)
            {
                return(false);
            }

            // Disallow going over the maximum allowed cluster memory
            if (modelContext.MaximumAllowedClusterMemoryMb != null && target.ClusterMemorySizeMb > modelContext.MaximumAllowedClusterMemoryMb.Value)
            {
                return(false);
            }

            return(true);
        }
        public static bool IsDownScale(RedisClusterSize from, RedisClusterSize to)
        {
            // Same tier (i.e. P3) means that we only care about shards
            if (from.Tier.Equals(to.Tier))
            {
                return(from.Shards > to.Shards);
            }

            // Distinct tier, but same number of shards means we only care about tier
            if (from.Shards == to.Shards)
            {
                return(IsDownScale(from.Tier, to.Tier));
            }

            // Distinct tier and distinct number of shards gets a bit more complicated, so we turn to looking at memory
            // capacity, server capacity, and cost

            if (from.ClusterMemorySizeMb > to.ClusterMemorySizeMb)
            {
                return(true);
            }

            if (from.EstimatedRequestsPerSecond > to.EstimatedRequestsPerSecond)
            {
                return(true);
            }

            if (from.MonthlyCostUsd > to.MonthlyCostUsd)
            {
                return(true);
            }

            return(false);
        }
 public ModelOutput(RedisClusterSize targetClusterSize, ModelContext modelContext, double cost, IReadOnlyList <RedisClusterSize> scalePath)
 {
     TargetClusterSize = targetClusterSize;
     ModelContext      = modelContext;
     Cost      = cost;
     ScalePath = scalePath;
 }
Beispiel #4
0
        private async Task <BoolResult> SubmitScaleRequestAsync(RedisClusterSize targetClusterSize, CancellationToken cancellationToken)
        {
            var instance = RedisCache.Update();

            if (!ClusterSize.Tier.Equals(targetClusterSize.Tier))
            {
                switch (targetClusterSize.Tier.Plan)
                {
                case RedisPlan.Basic:
                    instance = instance.WithBasicSku(targetClusterSize.Tier.Capacity);
                    break;

                case RedisPlan.Standard:
                    instance = instance.WithStandardSku(targetClusterSize.Tier.Capacity);
                    break;

                case RedisPlan.Premium:
                    instance = instance.WithPremiumSku(targetClusterSize.Tier.Capacity);
                    break;
                }
            }

            if (ClusterSize.Shards != targetClusterSize.Shards)
            {
                instance = instance.WithShardCount(targetClusterSize.Shards);
            }

            await instance.ApplyAsync(cancellationToken);

            return(BoolResult.Success);
        }
        public static Result <ModelOutput> Predict(RedisClusterSize currentClusterSize, ModelContext modelContext)
        {
            var shortestPaths = ComputeAllowedPaths(currentClusterSize, modelContext);

            var eligibleClusterSizes = shortestPaths
                                       .Select(kvp => (Size: kvp.Key, Node: kvp.Value))
                                       // Find all plans that we can reach from the current one via scaling operations, and that we allow scaling to
                                       .Where(entry => entry.Node.ShortestDistanceFromSource != double.PositiveInfinity && IsScalingAllowed(currentClusterSize, entry.Size, modelContext))
                                       // Compute the cost of taking the given route
                                       .Select(entry => (entry.Size, entry.Node, Cost: CostFunction(currentClusterSize, entry.Size, modelContext, shortestPaths)))
                                       .ToList();

            // Rank them by cost ascending
            var costSorted = eligibleClusterSizes
                             .OrderBy(pair => pair.Cost)
                             .ToList();

            if (costSorted.Count == 0)
            {
                return(new Result <ModelOutput>(errorMessage: "No cluster size available for scaling"));
            }

            return(new ModelOutput(
                       targetClusterSize: costSorted[0].Size,
                       modelContext: modelContext,
                       cost: costSorted[0].Cost,
                       scalePath: RedisScalingUtilities.ComputeShortestPath(shortestPaths, currentClusterSize, costSorted[0].Size)));
        }
Beispiel #6
0
        private RedisInstance(IAzure azure, string resourceId, IRedisCache redisCache, RedisClusterSize clusterSize)
        {
            Contract.RequiresNotNullOrEmpty(resourceId);

            _azure      = azure;
            _resourceId = resourceId;
            RedisCache  = redisCache;
            ClusterSize = clusterSize;
        }
Beispiel #7
0
 public async Task <BoolResult> RefreshAsync(CancellationToken cancellationToken = default)
 {
     return((await GenerateInstanceMetadataAsync(_azure, _resourceId, cancellationToken))
            .Select(result =>
     {
         RedisCache = result.Cache;
         ClusterSize = result.Size;
         return BoolResult.Success;
     }));
 }
Beispiel #8
0
        public static async Task <Result <(IRedisCache Cache, RedisClusterSize Size)> > GenerateInstanceMetadataAsync(IAzure azure, string resourceId, CancellationToken cancellationToken = default)
        {
            // TODO: error handling
            var redisCache = await azure.RedisCaches.GetByIdAsync(resourceId, cancellationToken);

            var clusterSize = RedisClusterSize.FromAzureCache(redisCache).ThrowIfFailure();

            Contract.AssertNotNull(clusterSize);
            return(new Result <(IRedisCache Cache, RedisClusterSize Size)>((redisCache, clusterSize)));
        }
Beispiel #9
0
        private Task <BoolResult> RequestScaleAsync(OperationContext context, RedisClusterSize targetClusterSize)
        {
            string extraMessage = $"CurrentClusterSize=[{ClusterSize}] TargetClusterSize=[{targetClusterSize}]";

            return(context.PerformOperationAsync(Tracer, async() =>
            {
                if (ClusterSize.Equals(targetClusterSize))
                {
                    return new BoolResult(errorMessage: $"No-op scale request attempted (`{ClusterSize}` -> `{targetClusterSize}`) on instance `{Name}`");
                }

                if (!RedisScalingUtilities.CanScale(ClusterSize, targetClusterSize))
                {
                    return new BoolResult(errorMessage: $"Scale request `{ClusterSize}` -> `{targetClusterSize}` on instance `{Name}` is disallowed by Azure Cache for Redis");
                }

                if (!IsReadyToScale)
                {
                    return new BoolResult(errorMessage: $"Redis instance `{Name}` is not ready to scale, current provisioning state is `{RedisCache.ProvisioningState}`");
                }

                var instance = RedisCache.Update();

                if (!ClusterSize.Tier.Equals(targetClusterSize.Tier))
                {
                    switch (targetClusterSize.Tier.Plan)
                    {
                    case RedisPlan.Basic:
                        instance = instance.WithBasicSku(targetClusterSize.Tier.Capacity);
                        break;

                    case RedisPlan.Standard:
                        instance = instance.WithStandardSku(targetClusterSize.Tier.Capacity);
                        break;

                    case RedisPlan.Premium:
                        instance = instance.WithPremiumSku(targetClusterSize.Tier.Capacity);
                        break;
                    }
                }

                if (ClusterSize.Shards != targetClusterSize.Shards)
                {
                    instance = instance.WithShardCount(targetClusterSize.Shards);
                }

                await instance.ApplyAsync(context.Token);

                return BoolResult.Success;
            },
                                                 extraStartMessage: extraMessage,
                                                 extraEndMessage: _ => extraMessage,
                                                 pendingOperationTracingInterval: TimeSpan.FromMinutes(1)));
        }
        public static IReadOnlyList <RedisClusterSize> ComputeShortestPath(RedisClusterSize from, RedisClusterSize to, Func <RedisClusterSize, IEnumerable <RedisClusterSize> > neighbors, Func <RedisClusterSize, RedisClusterSize, double> weight, IReadOnlyList <RedisClusterSize>?vertices = null)
        {
            if (from.Equals(to))
            {
                return(Array.Empty <RedisClusterSize>());
            }

            vertices ??= RedisClusterSize.Instances;
            var shortestPaths = ComputeOneToAllShortestPath(vertices, neighbors, weight, from);

            return(ComputeShortestPath(shortestPaths, from, to));
        }
        /// <summary>
        /// This function embodies the concept of "how much does it cost to switch from
        /// <paramref name="current"/> to <paramref name="target"/>". At this point, we can assume that:
        ///     - The two input sizes are valid states to be in
        ///     - We can reach the target from current via some amount of autoscaling operations
        /// Hence, we're just ranking amonst the many potential states.
        /// </summary>
        public static double CostFunction(RedisClusterSize current, RedisClusterSize target, ModelContext modelContext, IReadOnlyDictionary <RedisClusterSize, RedisScalingUtilities.Node> shortestPaths)
        {
            // Switching to the same size (i.e. no op) is free
            if (current.Equals(target))
            {
                return(0);
            }

            var shortestPath = RedisScalingUtilities.ComputeShortestPath(shortestPaths, current, target);

            Contract.Assert(shortestPath.Count > 0);

            // Positive if we are spending more money, negative if we are saving
            return((double)(target.MonthlyCostUsd - current.MonthlyCostUsd));
        }
Beispiel #12
0
 public static Result <IRedisInstance> FromPreloaded(IAzure azure, IRedisCache redisCache, bool readOnly)
 {
     return(RedisClusterSize
            .FromAzureCache(redisCache)
            .Select(clusterSize =>
     {
         if (readOnly)
         {
             return (IRedisInstance) new ReadOnlyRedisInstance(azure, redisCache.Id, redisCache, clusterSize);
         }
         else
         {
             return (IRedisInstance) new RedisInstance(azure, redisCache.Id, redisCache, clusterSize);
         }
     }));
 }
Beispiel #13
0
        private async Task <BoolResult> RequestScaleAsync(RedisClusterSize targetClusterSize, CancellationToken cancellationToken = default)
        {
            if (ClusterSize.Equals(targetClusterSize))
            {
                return(new BoolResult(errorMessage: $"No-op scale request attempted (`{ClusterSize}` -> `{targetClusterSize}`) on instance `{Name}`"));
            }

            if (!RedisScalingUtilities.CanScale(ClusterSize, targetClusterSize))
            {
                return(new BoolResult(errorMessage: $"Scale request `{ClusterSize}` -> `{targetClusterSize}` on instance `{Name}` is disallowed by Azure Cache for Redis"));
            }

            if (!IsReadyToScale)
            {
                return(new BoolResult(errorMessage: $"Redis instance `{Name}` is not ready to scale, current provisioning state is `{RedisCache.ProvisioningState}`"));
            }

            var instance = RedisCache.Update();

            if (!ClusterSize.Tier.Equals(targetClusterSize.Tier))
            {
                switch (targetClusterSize.Tier.Plan)
                {
                case RedisPlan.Basic:
                    instance = instance.WithBasicSku(targetClusterSize.Tier.Capacity);
                    break;

                case RedisPlan.Standard:
                    instance = instance.WithStandardSku(targetClusterSize.Tier.Capacity);
                    break;

                case RedisPlan.Premium:
                    instance = instance.WithPremiumSku(targetClusterSize.Tier.Capacity);
                    break;
                }
            }

            if (ClusterSize.Shards != targetClusterSize.Shards)
            {
                instance = instance.WithShardCount(targetClusterSize.Shards);
            }

            await instance.ApplyAsync(cancellationToken);

            return(BoolResult.Success);
        }
Beispiel #14
0
        private async Task <BoolResult> RequestScaleAsync(RedisClusterSize targetClusterSize, CancellationToken cancellationToken = default)
        {
            if (ClusterSize.Equals(targetClusterSize))
            {
                return(new BoolResult(errorMessage: $"No-op scale request attempted (`{ClusterSize}` -> `{targetClusterSize}`) on instance `{Name}`"));
            }

            if (!RedisScalingUtilities.CanScale(ClusterSize, targetClusterSize))
            {
                return(new BoolResult(errorMessage: $"Scale request `{ClusterSize}` -> `{targetClusterSize}` on instance `{Name}` is disallowed by Azure Cache for Redis"));
            }

            if (!IsReadyToScale)
            {
                return(new BoolResult(errorMessage: $"Redis instance `{Name}` is not ready to scale, current provisioning state is `{RedisCache.ProvisioningState}`"));
            }

            return(await SubmitScaleRequestAsync(targetClusterSize, cancellationToken));
        }
        public static bool CanScale(RedisClusterSize from, RedisClusterSize to)
        {
            if (from.Equals(to))
            {
                return(true);
            }

            if (!CanScale(from.Tier, to.Tier))
            {
                return(false);
            }

            if (from.Shards != to.Shards && !from.Tier.Equals(to.Tier))
            {
                // Azure can't change both shards and tiers at once, we need to do them one at a time.
                return(false);
            }

            return(true);
        }
        public static TimeSpan ExpectedScalingDelay(RedisClusterSize from, RedisClusterSize to)
        {
            Contract.Requires(CanScale(from, to));

            if (from.Equals(to))
            {
                return(TimeSpan.Zero);
            }

            if (from.Tier.Equals(to.Tier))
            {
                // The tier is the same, so autoscaling will be either adding or reducing shards
                var shardDelta = Math.Abs(from.Shards - to.Shards);
                return(TimeSpan.FromTicks(Constants.RedisScaleTimePerShard.Ticks * shardDelta));
            }
            else
            {
                // Tier changed, which means the number of shards didn't. However, we will take the same amount of time
                // as the amount of shards that need to change tier.
                Contract.Assert(from.Shards == to.Shards);
                return(TimeSpan.FromTicks(Constants.RedisScaleTimePerShard.Ticks * from.Shards));
            }
        }
Beispiel #17
0
 public static Result <RedisInstance> FromPreloaded(IAzure azure, IRedisCache redisCache)
 {
     return(RedisClusterSize
            .FromAzureCache(redisCache)
            .Select(clusterSize => new RedisInstance(azure, redisCache.Id, redisCache, clusterSize)));
 }
Beispiel #18
0
 internal RedisInstance(IAzure azure, string resourceId, IRedisCache redisCache, RedisClusterSize clusterSize)
     : base(azure, resourceId, redisCache, clusterSize)
 {
 }
 public Node(RedisClusterSize clusterSize)
 {
     ClusterSize = clusterSize;
 }
        public static Dictionary <RedisClusterSize, Node> ComputeOneToAllShortestPath(IReadOnlyList <RedisClusterSize> vertices, Func <RedisClusterSize, IEnumerable <RedisClusterSize> > neighbors, Func <RedisClusterSize, RedisClusterSize, double> weight, RedisClusterSize from)
        {
            // We need to find a valid scale order to reach the target cluster size from the current one. To find it,
            // create an implicit graph G = (V, E) where V is the set of Redis sizes, and E is the set of valid
            // scalings given by the CanScale relation. In this graph, finding a shortest path between the current and
            // target sizes is equivalent to figuring out a way to scale among them optimally, as given by whatever
            // weight function we choose.
            var translation      = new Dictionary <RedisClusterSize, Node>(capacity: vertices.Count);
            var minPriorityQueue = new SortedSet <Node>(comparer: NodeComparer.Instance);

            foreach (var vertex in vertices)
            {
                var node = new Node(vertex);
                if (vertex.Equals(from))
                {
                    node.ShortestDistanceFromSource = 0;
                }

                minPriorityQueue.Add(node);
                translation[vertex] = node;
            }

            while (minPriorityQueue.Count > 0)
            {
                var node = minPriorityQueue.Min;
                Contract.AssertNotNull(node);
                minPriorityQueue.Remove(node);

                if (node.Visited)
                {
                    continue;
                }

                node.Visited = true;
                foreach (var target in neighbors(node.ClusterSize))
                {
                    var adjacent = translation[target];
                    Contract.AssertNotNull(adjacent);

                    var distanceThroughNode = node.ShortestDistanceFromSource + weight(node.ClusterSize, target);
                    if (distanceThroughNode >= adjacent.ShortestDistanceFromSource)
                    {
                        continue;
                    }

                    // Typically, we'd like to do a decrease priority operation here. This is a work-around to avoid
                    // using a more complex data structure.
                    minPriorityQueue.Remove(adjacent);
                    adjacent.ShortestDistanceFromSource = distanceThroughNode;
                    adjacent.Predecessor = node;
                    minPriorityQueue.Add(adjacent);
                }
            }

            return(translation);
        }
        public static IReadOnlyList <RedisClusterSize> ComputeShortestPath(IReadOnlyDictionary <RedisClusterSize, Node> shortestPaths, RedisClusterSize from, RedisClusterSize to)
        {
            if (from.Equals(to))
            {
                return(Array.Empty <RedisClusterSize>());
            }

            var path    = new List <Node>();
            var current = shortestPaths[to];

            while (current.Predecessor != null)
            {
                path.Add(current);
                current = current.Predecessor;
            }

            if (!current.ClusterSize.Equals(from))
            {
                return(Array.Empty <RedisClusterSize>());
            }

            path.Reverse();
            return(path.Select(p => p.ClusterSize).ToList());
        }
        private async Task ComputeMemoryRelatedFeaturesAsync(DateTime now, string redisAzureId, RedisClusterSize currentClusterSize, ModelContext modelContext, CancellationToken cancellationToken = default)
        {
            var startTimeUtc = now - _configuration.UsedMemoryLookback;
            var endTimeUtc   = now;

            // This maximum is the maximum across all shards. There's no way to tell what the memory usage is for
            // everything precisely without fetching metrics for each shard individually.
            var usedMemoryTasks =
                Enumerable.Range(0, 10)
                .Select(shard =>
                        _monitorManagementClient.GetMetricAsync(
                            redisAzureId,
                            AzureRedisShardMetric.UsedMemory.ToMetricName(shard: shard),
                            startTimeUtc,
                            endTimeUtc,
                            _configuration.UsedMemoryAggregationInterval,
                            AggregationType.Maximum,
                            cancellationToken));
            var usedMemoryBytes = await Task.WhenAll(usedMemoryTasks);

            var groupedMetrics = usedMemoryBytes
                                 .SelectMany(measurements => measurements.Select((measurement, index) => (measurement, index)))
                                 .GroupBy(entry => entry.index)
                                 .OrderBy(group => group.Key)
                                 .Select(group => group.Sum(entry => entry.measurement.Value ?? 0))
                                 .ToList();

            // Metric is reported in bytes, we use megabytes for everything
            var expectedClusterMemoryUsageMb = groupedMetrics.Max() / 1e+6;

            modelContext.MinimumAllowedClusterMemoryMb = (1 + _configuration.MinimumExtraMemoryAvailable) * expectedClusterMemoryUsageMb;

            modelContext.MaximumAllowedClusterMemoryMb = _configuration.MaximumClusterMemoryAllowedMb;
        }
        private async Task ComputeWorkloadRelatedFeaturesAsync(DateTime now, string redisAzureId, RedisClusterSize currentClusterSize, ModelContext modelContext, CancellationToken cancellationToken)
        {
            var startTimeUtc = now - _configuration.WorkloadLookback;
            var endTimeUtc   = now;

            // This maximum is the maximum across all shards. There's no way to tell what the memory usage is for
            // everything precisely without fetching metrics for each shard individually.
            var operationsPerSecondTasks =
                Enumerable.Range(0, 10)
                .Select(shard =>
                        _monitorManagementClient.GetMetricAsync(
                            redisAzureId,
                            AzureRedisShardMetric.OperationsPerSecond.ToMetricName(shard: shard),
                            startTimeUtc,
                            endTimeUtc,
                            _configuration.WorkloadAggregationInterval,
                            AggregationType.Maximum,
                            cancellationToken));
            var operationsPerSecond = await Task.WhenAll(operationsPerSecondTasks);

            var groupedMetrics = operationsPerSecond
                                 .SelectMany(measurements => measurements.Select((measurement, index) => (measurement, index)))
                                 .GroupBy(entry => entry.index)
                                 .OrderBy(group => group.Key)
                                 .Select(group => group.Sum(entry => entry.measurement.Value ?? 0))
                                 .ToList();

            // ops/s scales linearly with shards
            var expectedClusterRps = groupedMetrics.Max();

            modelContext.MinimumAllowedClusterRps = (1 + _configuration.MinimumWorkloadExtraPct) * expectedClusterRps;
        }
        private async Task <ModelContext> ComputeFeaturesAsync(DateTime now, string redisAzureId, RedisClusterSize currentClusterSize, CancellationToken cancellationToken = default)
        {
            var modelContext = new ModelContext();

            await ComputeMemoryRelatedFeaturesAsync(now, redisAzureId, currentClusterSize, modelContext, cancellationToken);
            await ComputeWorkloadRelatedFeaturesAsync(now, redisAzureId, currentClusterSize, modelContext, cancellationToken);

            return(modelContext);
        }
        public static IReadOnlyDictionary <RedisClusterSize, RedisScalingUtilities.Node> ComputeAllowedPaths(RedisClusterSize currentClusterSize, ModelContext modelContext)
        {
            // We need to reach the target cluster size, but we can't do it in one shot because business rules won't
            // let us, so we need to compute a path to get to it. This is probably the most complex part of the
            // algorithm, there are several competing aspects we want to optimize for, in descending importance:
            //  - We want for memory to get to the target level ASAP
            //  - We want to keep the number of shards as stable as possible, given that changing them can cause build
            //    failures
            //  - We'd like to get there in the fewest amount of time possible
            //  - The route needs to be deterministic, so that if we are forced to stop and re-compute it we'll take
            //    the same route.
            //  - We'd like to minimize the cost of the route
            // Multi-constraint optimization over graphs is NP-complete and algorithms are hard to come up with, so we
            // do our best.

            Func <RedisClusterSize, IEnumerable <RedisClusterSize> > neighbors =
                currentClusterSize => currentClusterSize.ScaleEligibleSizes.Where(targetClusterSize =>
            {
                // Constrain paths to downscale at most one shard at the time. This only makes paths longer, so it
                // is safe. The reason behind this is that the service doesn't really tolerate big reductions.
                if (targetClusterSize.Shards < currentClusterSize.Shards)
                {
                    return(targetClusterSize.Shards == currentClusterSize.Shards - 1);
                }

                return(true);
            });

            Func <RedisClusterSize, RedisClusterSize, double> weight =
                (from, to) =>
            {
                // This factor is used to avoid transitioning to any kind of intermediate plan that may cause a
                // production outage. If we don't have it, we may transition into a state in which we have less
                // cluster memory available than we need. By adjusting the weight function, we guarantee that
                // this only happens iff there is no better path; moreover, we will always choose the lesser of
                // two evils if given no choice.
                double clusterMemoryPenalization = 0;

                var delta = to.ClusterMemorySizeMb - modelContext.MinimumAllowedClusterMemoryMb;
                if (delta < 0)
                {
                    // The amount of cluster memory is less than we need, so we penalize taking this path by
                    // adding the amount of memory that keeps us away from the target.
                    clusterMemoryPenalization = -delta;
                }

                // This needs to be at least one so we don't pick minimum paths that are arbitrarily long
                return(1 + clusterMemoryPenalization);
            };


            return(RedisScalingUtilities.ComputeOneToAllShortestPath(vertices: RedisClusterSize.Instances, neighbors: neighbors, weight: weight, from: currentClusterSize));
        }