        public void CanScaleBetween(string fromString, string toString)
            var from = RedisClusterSize.TryParse(fromString).ThrowIfFailure();
            var to   = RedisClusterSize.TryParse(toString).ThrowIfFailure();

            Assert.True(RedisScalingUtilities.CanScale(from, to));
        public void DownscaleManualExamples(string fromString, string toString)
            var from = RedisClusterSize.TryParse(fromString).ThrowIfFailure();
            var to   = RedisClusterSize.TryParse(toString).ThrowIfFailure();

            Assert.True(RedisScalingUtilities.IsDownScale(from, to));
        private static Result <ModelOutput> Predict(RedisClusterSize currentClusterSize, ModelContext modelContext)
            // TODO: autoscaler should consider the server load percentage as well. If a shard had a very high load
            // percentage, it means that it is for some reason receiving an uneven load. Hence, adding shards helps in
            // this situation. There is no easy way to add that to the current model. Ideas:
            //  - If any server reached a load >70% at any time in the period analyzed, we need to guarantee that
            //    there's at least as many shards as there were before (i.e. no downscales are allowed).
            var shortestPaths = ComputeAllowedPaths(currentClusterSize, modelContext);

            var eligibleClusterSizes = shortestPaths
                                       .Select(kvp => (Size: kvp.Key, Node: kvp.Value))
                                       // Find all plans that we can reach from the current one via scaling operations, and that we allow scaling to
                                       .Where(entry => entry.Node.ShortestDistanceFromSource != double.PositiveInfinity && IsScalingAllowed(currentClusterSize, entry.Size, modelContext))
                                       // Compute the cost of taking the given route
                                       .Select(entry => (entry.Size, entry.Node, Cost: CostFunction(currentClusterSize, entry.Size, modelContext, shortestPaths)))

            // Rank them by cost ascending
            var costSorted = eligibleClusterSizes
                             .OrderBy(pair => pair.Cost)

            if (costSorted.Count == 0)
                return(new Result <ModelOutput>(errorMessage: "No cluster size available for scaling"));

            return(new ModelOutput(
                       targetClusterSize: costSorted[0].Size,
                       modelContext: modelContext,
                       cost: costSorted[0].Cost,
                       scalePath: RedisScalingUtilities.ComputeShortestPath(shortestPaths, currentClusterSize, costSorted[0].Size)));
        /// <summary>
        /// Decides whether a scaling move is allowed. At this point, we don't know if Azure Cache for Redis business
        /// rules allow scaling from the current to the target size. We just decide whether it is reasonable based on
        /// our knowledge of our production workload.
        /// The autoscaler will figure out how to reach the desired plan.
        /// </summary>
        private static bool IsScalingAllowed(
            RedisClusterSize current,
            RedisClusterSize target,
            ModelContext modelContext)
            // Cluster must be able to handle the amount of data we'll give it, with some overhead in case of
            // production issues. Notice we don't introduce a per-shard restriction; reason for this is that the shards
            // distribute keys evenly.
            if (target.ClusterMemorySizeMb < modelContext.MinimumAllowedClusterMemoryMb)

            // Cluster must be able to handle the amount of operations needed. Notice we don't introduce a per-shard
            // restriction; reason for this is that the shards distribute keys evenly.
            if (target.EstimatedRequestsPerSecond < modelContext.MinimumAllowedClusterRps)

            // Disallow going over the maximum allowed cluster memory
            if (modelContext.MaximumAllowedClusterMemoryMb != null && target.ClusterMemorySizeMb > modelContext.MaximumAllowedClusterMemoryMb.Value)

 public ModelOutput(RedisClusterSize targetClusterSize, ModelContext modelContext, double cost, IReadOnlyList <RedisClusterSize> scalePath)
     TargetClusterSize = targetClusterSize;
     ModelContext      = modelContext;
     Cost      = cost;
     ScalePath = scalePath;
        public void CanFindEmptyRoute()
            var from = RedisClusterSize.Parse("P1/1");
            var to   = RedisClusterSize.Parse("P1/1");
            var path = RedisScalingUtilities.ComputeShortestPath(from, to, size => size.ScaleEligibleSizes, (f, t) => 1);

        public void FailsOnNonExistantRoute()
            var from = RedisClusterSize.Parse("P1/1");
            var to   = RedisClusterSize.Parse("P3/3");
            var path = RedisScalingUtilities.ComputeShortestPath(from, to, size => new RedisClusterSize[] { }, (f, t) => 1);

        public void SucceedsOnSimpleRoute()
            var from = RedisClusterSize.Parse("P1/1");
            var to   = RedisClusterSize.Parse("P3/3");
            var path = RedisScalingUtilities.ComputeShortestPath(from, to, size => size.ScaleEligibleSizes, (f, t) => 1);

            path.Should().BeEquivalentTo(new RedisClusterSize[] { RedisClusterSize.Parse("P3/1"), RedisClusterSize.Parse("P3/3") });
        public void CanFindSingleRoute()
            var from = RedisClusterSize.Parse("P1/1");
            var to   = RedisClusterSize.Parse("P1/2");
            var path = RedisScalingUtilities.ComputeShortestPath(from, to, size => size.ScaleEligibleSizes, (f, t) => 1);

        /// <summary>
        /// Decides whether a scaling move is allowed. At this point, we don't know if Azure Cache for Redis business
        /// rules allow scaling from the current to the target size. We just decide whether it is reasonable based on
        /// our knowledge of our production workload.
        /// The autoscaler will figure out how to reach the desired plan.
        /// </summary>
        private bool IsScalingAllowed(
            RedisClusterSize currentClusterSize,
            RedisClusterSize targetClusterSize,
            ModelContext modelContext)
            // WARNING: order matters in the following if statements. Please be careful.

            // Cluster must be able to handle the amount of data we'll give it, with some overhead in case of
            // production issues. Notice we don't introduce a per-shard restriction; reason for this is that the shards
            // distribute keys evenly.
            if (targetClusterSize.ClusterMemorySizeMb < modelContext.MinimumAllowedClusterMemoryMb)

            // Cluster must be able to handle the amount of operations needed. Notice we don't introduce a per-shard
            // restriction; reason for this is that the shards distribute keys evenly.
            if (targetClusterSize.EstimatedRequestsPerSecond < modelContext.MinimumAllowedClusterRps)

            // Disallow going over the maximum allowed cluster memory
            // NOTE: we only constrain on the target not being over the allowed size, rather than all nodes in the
            // path. The reason for this is that our ability to reach all nodes is based on being able to scale above
            // any specific memory threshold.
            if (modelContext.MaximumAllowedClusterMemoryMb != null && targetClusterSize.ClusterMemorySizeMb > modelContext.MaximumAllowedClusterMemoryMb.Value)

            // Always allow not doing anything if it's available.
            // NOTE: this is here because in downscale situations we always want to ensure we have the "status quo"
            // action available.
            if (currentClusterSize.Equals(targetClusterSize))

            // Disallow downscales that don't improve cost significantly
            if (_configuration.MinimumCostSavingForDownScaling != null)
                var monthlyCostDelta = targetClusterSize.MonthlyCostUsd - currentClusterSize.MonthlyCostUsd;
                if (RedisScalingUtilities.IsDownScale(currentClusterSize, targetClusterSize) && monthlyCostDelta <= 0 && -monthlyCostDelta < _configuration.MinimumCostSavingForDownScaling)

        /// <summary>
        /// This function embodies the concept of "how much does it cost to switch from
        /// <paramref name="current"/> to <paramref name="target"/>". At this point, we can assume that:
        ///     - The two input sizes are valid states to be in
        ///     - We can reach the target from current via some amount of autoscaling operations
        /// Hence, we're just ranking amonst the many potential states.
        /// </summary>
        private static double CostFunction(RedisClusterSize current, RedisClusterSize target, ModelContext modelContext, IReadOnlyDictionary <RedisClusterSize, RedisScalingUtilities.Node> shortestPaths)
            // Switching to the same size (i.e. no op) is free
            if (current.Equals(target))

            var shortestPath = RedisScalingUtilities.ComputeShortestPath(shortestPaths, current, target);

            Contract.Assert(shortestPath.Count > 0);

            // Positive if we are spending more money, negative if we are saving
            return((double)(target.MonthlyCostUsd - current.MonthlyCostUsd));
        public Task DisallowsLowCostDownscalesAsync()
            return(RunTestAsync(async(operationContext, redisAutoscalingAgent) =>
                var redisInstance = new MockRedisInstance(RedisClusterSize.Parse("P2/1"));
                redisAutoscalingAgent.UsedMemoryBytes.Add("7.4 GB".ToSize());

                var modelOutput = await redisAutoscalingAgent
                                  .EstimateBestClusterSizeAsync(operationContext, redisInstance)

        public Task PrefersAddingShardsWhenMemoryGrowsAsync(string initialClusterSize, IEnumerable <string> expectedPath, string usedMemoryAcrossAllShards)
            return(RunTestAsync(async(operationContext, redisAutoscalingAgent) =>

                var redisInstance = new MockRedisInstance(RedisClusterSize.Parse(initialClusterSize));
                var modelOutput = await redisAutoscalingAgent
                                  .EstimateBestClusterSizeAsync(operationContext, redisInstance)

                .BeEquivalentTo(expectedPath.Select(size => RedisClusterSize.Parse(size)));
        private async Task ComputeServerLoadFeaturesAsync(OperationContext context, DateTime now, string redisAzureId, ModelContext modelContext, RedisClusterSize currentClusterSize)
            var maximumServerLoadAcrossShardsList = await FetchMaximumServerLoadAcrossShardsAsync(context, now, redisAzureId);

            // Patch in case the list happens to be empty (might only ever happen in tests)
            var maximumServerLoadAcrossShards = maximumServerLoadAcrossShardsList.Any() ? maximumServerLoadAcrossShardsList.Max() : 0;

            if (maximumServerLoadAcrossShards < _configuration.MediumServerLoadPct)

            modelContext.MinimumNumberOfShardsAllowed = currentClusterSize.Shards;

            if (maximumServerLoadAcrossShards < _configuration.HighServerLoadPct)

            modelContext.DisallowChangingNumberOfShards = true;
        private async Task ComputeMemoryRelatedFeaturesAsync(OperationContext context, DateTime now, string redisAzureId, RedisClusterSize currentClusterSize, ModelContext modelContext)
            var startTimeUtc = now - _configuration.UsedMemoryLookback;
            var endTimeUtc   = now;

            var usedMemoryBytes = await _monitorManagementClient.GetMetricsWithDimensionAsync(
                new[] { AzureRedisShardMetric.UsedMemory.ToMetricName() },
                aggregations : new[] { AggregationType.Maximum },

            // NOTE: Measurement values may be null if we are querying for data that is not present (i.e. a shard that
            // has disappeared, or such).
            var groupedMetrics = usedMemoryBytes
                                 .SelectMany(kvp => kvp.Value.Select((measurement, index) => (measurement, index)))
                                 .GroupBy(entry => entry.index)
                                 .OrderBy(group => group.Key)
                                 .Select(group => group.Sum(entry => entry.measurement.Maximum ?? 0))

            // Metric is reported in bytes, we use megabytes for everything
            var expectedClusterMemoryUsageMb = groupedMetrics.Max() / 1e+6;

            modelContext.MinimumAllowedClusterMemoryMb = (1 + _configuration.MinimumExtraMemoryAvailable) * expectedClusterMemoryUsageMb;

            modelContext.MaximumAllowedClusterMemoryMb = _configuration.MaximumClusterMemoryAllowedMb;
        private async Task ComputeWorkloadRelatedFeaturesAsync(OperationContext context, DateTime now, string redisAzureId, RedisClusterSize currentClusterSize, ModelContext modelContext)
            var startTimeUtc = now - _configuration.WorkloadLookback;
            var endTimeUtc   = now;

            var operationsPerSecond = await _monitorManagementClient.GetMetricsWithDimensionAsync(
                new[] { AzureRedisShardMetric.OperationsPerSecond.ToMetricName() },
                aggregations : new[] { AggregationType.Maximum },

            // NOTE: Measurement values may be null if we are querying for data that is not present (i.e. a shard that
            // has disappeared, or such).
            var groupedMetrics = operationsPerSecond
                                 .SelectMany(kvp => kvp.Value.Select((measurement, index) => (measurement, index)))
                                 .GroupBy(entry => entry.index)
                                 .OrderBy(group => group.Key)
                                 .Select(group => group.Sum(entry => entry.measurement.Maximum ?? 0))

            if (groupedMetrics.Count == 0)
                // If all metrics are missing, we won't constraint plans on having a certain minimum number of
                // operations. This is used to account for an Azure Monitor API bug whereby some metrics may not be
                // reported

            // ops/s scales linearly with shards
            var expectedClusterRps = groupedMetrics.Max();

            modelContext.MinimumAllowedClusterRps = (1 + _configuration.MinimumWorkloadExtraPct) * expectedClusterRps;
        private async Task <ModelContext> ComputeFeaturesAsync(OperationContext context, DateTime now, string redisAzureId, RedisClusterSize currentClusterSize)
            var modelContext = new ModelContext();

            await Task.WhenAll(
                ComputeMemoryRelatedFeaturesAsync(context, now, redisAzureId, currentClusterSize, modelContext),
                ComputeWorkloadRelatedFeaturesAsync(context, now, redisAzureId, currentClusterSize, modelContext)

        private static IReadOnlyDictionary <RedisClusterSize, RedisScalingUtilities.Node> ComputeAllowedPaths(RedisClusterSize currentClusterSize, ModelContext modelContext)
            // We need to reach the target cluster size, but we can't do it in one shot because business rules won't
            // let us, so we need to compute a path to get to it. This is probably the most complex part of the
            // algorithm, there are several competing aspects we want to optimize for, in descending importance:
            //  - We want for memory to get to the target level ASAP
            //  - We want to keep the number of shards as stable as possible, given that changing them can cause build
            //    failures
            //  - We'd like to get there in the fewest amount of time possible
            //  - The route needs to be deterministic, so that if we are forced to stop and re-compute it we'll take
            //    the same route.
            //  - We'd like to minimize the cost of the route
            // Multi-constraint optimization over graphs is NP-complete and algorithms are hard to come up with, so we
            // do our best.

            Func <RedisClusterSize, IEnumerable <RedisClusterSize> > neighbors =
                currentClusterSize => currentClusterSize.ScaleEligibleSizes.Where(targetClusterSize =>
                // Constrain paths to downscale at most one shard at the time. This only makes paths longer, so it
                // is safe. The reason behind this is that the service doesn't really tolerate big reductions.
                if (targetClusterSize.Shards < currentClusterSize.Shards)
                    return(targetClusterSize.Shards == currentClusterSize.Shards - 1);


            Func <RedisClusterSize, RedisClusterSize, double> weight =
                (from, to) =>
                // This factor is used to avoid transitioning to any kind of intermediate plan that may cause a
                // production outage. If we don't have it, we may transition into a state in which we have less
                // cluster memory available than we need. By adjusting the weight function, we guarantee that
                // this only happens iff there is no better path; moreover, we will always choose the lesser of
                // two evils if given no choice.
                double clusterMemoryPenalization = 0;

                var delta = to.ClusterMemorySizeMb - modelContext.MinimumAllowedClusterMemoryMb;
                if (delta < 0)
                    // The amount of cluster memory is less than we need, so we penalize taking this path by
                    // adding the amount of memory that keeps us away from the target.
                    clusterMemoryPenalization = -delta;

                // This needs to be at least one so we don't pick minimum paths that are arbitrarily long
                return(1 + clusterMemoryPenalization);

            return(RedisScalingUtilities.ComputeOneToAllShortestPath(vertices: RedisClusterSize.Instances, neighbors: neighbors, weight: weight, from: currentClusterSize));
 internal ReadOnlyRedisInstance(IAzure azure, string resourceId, IRedisCache redisCache, RedisClusterSize clusterSize)
     : base(azure, resourceId, redisCache, clusterSize)
 public MockRedisInstance(RedisClusterSize clusterSize)
     ClusterSize = clusterSize;