Beispiel #1
0
        /// <summary>
        /// This function embodies the concept of "how much does it cost to switch from
        /// <paramref name="current"/> to <paramref name="target"/>". At this point, we can assume that:
        ///     - The two input sizes are valid states to be in
        ///     - We can reach the target from current via some amount of autoscaling operations
        /// Hence, we're just ranking amonst the many potential states.
        /// </summary>
        private static double CostFunction(RedisClusterSize current, RedisClusterSize target, ModelContext modelContext, IReadOnlyDictionary <RedisClusterSize, RedisScalingUtilities.Node> shortestPaths)
        {
            // Switching to the same size (i.e. no op) is free
            if (current.Equals(target))
            {
                return(0);
            }

            var shortestPath = RedisScalingUtilities.ComputeShortestPath(shortestPaths, current, target);

            Contract.Assert(shortestPath.Count > 0);

            // Positive if we are spending more money, negative if we are saving
            return((double)(target.MonthlyCostUsd - current.MonthlyCostUsd));
        }
Beispiel #2
0
        private async Task ComputeServerLoadFeaturesAsync(OperationContext context, DateTime now, string redisAzureId, ModelContext modelContext, RedisClusterSize currentClusterSize)
        {
            var maximumServerLoadAcrossShardsList = await FetchMaximumServerLoadAcrossShardsAsync(context, now, redisAzureId);

            // Patch in case the list happens to be empty (might only ever happen in tests)
            var maximumServerLoadAcrossShards = maximumServerLoadAcrossShardsList.Any() ? maximumServerLoadAcrossShardsList.Max() : 0;

            if (maximumServerLoadAcrossShards < _configuration.MediumServerLoadPct)
            {
                return;
            }

            modelContext.MinimumNumberOfShardsAllowed = currentClusterSize.Shards;

            if (maximumServerLoadAcrossShards < _configuration.HighServerLoadPct)
            {
                return;
            }

            modelContext.DisallowChangingNumberOfShards = true;
        }
Beispiel #3
0
        private async Task ComputeWorkloadFeaturesAsync(OperationContext context, DateTime now, string redisAzureId, ModelContext modelContext)
        {
            var groupedMetrics = await FetchOperationsPerSecondPerShardAsync(context, now, redisAzureId);

            if (groupedMetrics.Count == 0)
            {
                // If all metrics are missing, we won't constraint plans on having a certain minimum number of
                // operations. This is used to account for an Azure Monitor API bug whereby some metrics may not be
                // reported
                return;
            }

            // ops/s scales linearly with shards
            var expectedClusterRps = groupedMetrics.Max();

            modelContext.MinimumAllowedClusterRps = (1 + _configuration.MinimumWorkloadExtraPct) * expectedClusterRps;
        }
Beispiel #4
0
        private async Task ComputeMemoryFeaturesAsync(OperationContext context, DateTime now, string redisAzureId, ModelContext modelContext)
        {
            var groupedMetrics = await FetchMemoryUsedPerShardAsync(context, now, redisAzureId);

            // Metric is reported in bytes, we use megabytes for everything
            var expectedClusterMemoryUsageMb = groupedMetrics.Max() / 1e+6;

            modelContext.MinimumAllowedClusterMemoryMb = (1 + _configuration.MinimumExtraMemoryAvailable) * expectedClusterMemoryUsageMb;

            modelContext.MaximumAllowedClusterMemoryMb = _configuration.MaximumClusterMemoryAllowedMb;
        }
Beispiel #5
0
        private static IReadOnlyDictionary <RedisClusterSize, RedisScalingUtilities.Node> ComputeAllowedPaths(RedisClusterSize currentClusterSize, ModelContext modelContext)
        {
            // We need to reach the target cluster size, but we can't do it in one shot because business rules won't
            // let us, so we need to compute a path to get to it. This is probably the most complex part of the
            // algorithm, there are several competing aspects we want to optimize for, in descending importance:
            //  - We want for memory to get to the target level ASAP
            //  - We want to keep the number of shards as stable as possible, given that changing them can cause build
            //    failures
            //  - We'd like to get there in the fewest amount of time possible
            //  - The route needs to be deterministic, so that if we are forced to stop and re-compute it we'll take
            //    the same route.
            //  - We'd like to minimize the cost of the route
            // Multi-constraint optimization over graphs is NP-complete and algorithms are hard to come up with, so we
            // do our best.

            Func <RedisClusterSize, IEnumerable <RedisClusterSize> > neighbors =
                currentClusterSize => currentClusterSize.ScaleEligibleSizes.Where(targetClusterSize =>
            {
                if (modelContext.DisallowChangingNumberOfShards && targetClusterSize.Shards != currentClusterSize.Shards)
                {
                    return(false);
                }

                if (modelContext.MinimumNumberOfShardsAllowed != null && targetClusterSize.Shards < modelContext.MinimumNumberOfShardsAllowed.Value)
                {
                    return(false);
                }

                // Constrain paths to downscale at most one shard at the time. This only makes paths longer, so it
                // is safe. The reason behind this is that the service doesn't really tolerate big reductions.
                if (targetClusterSize.Shards < currentClusterSize.Shards)
                {
                    return(targetClusterSize.Shards == currentClusterSize.Shards - 1);
                }

                return(true);
            });

            Func <RedisClusterSize, RedisClusterSize, double> weight =
                (from, to) =>
            {
                // This factor is used to avoid transitioning to any kind of intermediate plan that may cause a
                // production outage. If we don't have it, we may transition into a state in which we have less
                // cluster memory available than we need. By adjusting the weight function, we guarantee that
                // this only happens iff there is no better path; moreover, we will always choose the lesser of
                // two evils if given no choice.
                double clusterMemoryPenalization = 0;

                var delta = to.ClusterMemorySizeMb - modelContext.MinimumAllowedClusterMemoryMb;
                if (delta < 0)
                {
                    // The amount of cluster memory is less than we need, so we penalize taking this path by
                    // adding the amount of memory that keeps us away from the target.
                    clusterMemoryPenalization = -delta;
                }

                // This needs to be at least one so we don't pick minimum paths that are arbitrarily long
                return(1 + clusterMemoryPenalization);
            };


            return(RedisScalingUtilities.ComputeOneToAllShortestPath(vertices: RedisClusterSize.Instances, neighbors: neighbors, weight: weight, from: currentClusterSize));
        }