/// <summary> /// This method finds regressions for a give source. /// Steps: /// - Query the table for the latest rows specified in the source /// - Group records by Scenario + Description (descriptor) /// - For each unique descriptor /// - Find matching rules from the source /// - Evaluate the source's probes for each record /// - Calculates the std deviation /// - Look for 2 consecutive deviations /// </summary> private static async IAsyncEnumerable <Regression> FindRegression(Source source) { if (source.Regressions == null) { yield break; } var detectionMinDateTimeUtc = DateTime.UtcNow.AddDays(0 - source.DaysToAnalyze); var detectionMaxDateTimeUtc = DateTime.UtcNow.AddDays(0 - source.DaysToSkip); var allResults = new List <BenchmarksResult>(); // Load latest records Console.Write("Loading records... "); using (var connection = new SqlConnection(_options.ConnectionString)) { using (var command = new SqlCommand(String.Format(Queries.Latest, source.Table), connection)) { // Load 14 days or data, to measure 7 days of standard deviation prior to detection command.Parameters.AddWithValue("@startDate", DateTime.UtcNow.AddDays(0 - source.DaysToLoad)); await connection.OpenAsync(); var reader = await command.ExecuteReaderAsync(); while (await reader.ReadAsync()) { allResults.Add(new BenchmarksResult { Id = Convert.ToInt32(reader["Id"]), Excluded = Convert.ToBoolean(reader["Excluded"]), DateTimeUtc = (DateTimeOffset)reader["DateTimeUtc"], Session = Convert.ToString(reader["Session"]), Scenario = Convert.ToString(reader["Scenario"]), Description = Convert.ToString(reader["Description"]), Document = Convert.ToString(reader["Document"]), }); } } } Console.WriteLine($"{allResults.Count} found"); // Reorder results chronologically allResults.Reverse(); // Compute standard deviation var resultsByScenario = allResults .GroupBy(x => x.Scenario + ":" + x.Description) .ToDictionary(x => x.Key, x => x.ToArray()) ; foreach (var descriptor in resultsByScenario.Keys) { // Does the descriptor match a rule? if (!source.Include(descriptor)) { continue; } var rules = source.Match(descriptor); // Should regressions be ignored for this descriptor? var lastIgnoreRegressionRule = rules.LastOrDefault(x => x.IgnoreRegressions != null); if (lastIgnoreRegressionRule != null && lastIgnoreRegressionRule.IgnoreRegressions.Value) { if (_options.Verbose) { Console.WriteLine("Regressions ignored"); } continue; } // Resolve path for the metric var results = resultsByScenario[descriptor]; foreach (var probe in source.Regressions.Probes) { if (_options.Verbose) { Console.WriteLine($"Evaluating probe {probe.Path} for {results.Count()} benchmarks"); } var resultSet = results .Select(x => new { Result = x, Token = x.Data.SelectTokens(probe.Path).FirstOrDefault() }) .Where(x => x.Token != null) .Select(x => new { Result = x.Result, Value = Convert.ToDouble(x.Token) }) .ToArray(); // Find regressions // Can't find a regression if there are less than 5 value if (resultSet.Length < 5) { if (_options.Verbose) { Console.ForegroundColor = ConsoleColor.Yellow; Console.WriteLine($"Not enough data ({resultSet.Length})"); Console.ResetColor(); } continue; } // Calculate standard deviation var values = resultSet.Select(x => x.Value).ToArray(); if (_options.Verbose) { Console.WriteLine($"Values: [{String.Join(",", values)}]"); } double average = values.Average(); double sumOfSquaresOfDifferences = values.Sum(val => (val - average) * (val - average)); double standardDeviation = Math.Sqrt(sumOfSquaresOfDifferences / values.Length); // Look for 2 consecutive values that are outside of the threshold, // subsequent to 3 consecutive values that are inside the threshold. for (var i = 0; i < resultSet.Length - 5; i++) { // Ignore results before the searched date and after the skipped dates if (resultSet[i].Result.DateTimeUtc < detectionMinDateTimeUtc || resultSet[i].Result.DateTimeUtc >= detectionMaxDateTimeUtc) { continue; } var value1 = Math.Abs(values[i + 1] - values[i]); var value2 = Math.Abs(values[i + 2] - values[i]); var value3 = Math.Abs(values[i + 3] - values[i + 2]); var value4 = Math.Abs(values[i + 4] - values[i + 2]); if (_options.Verbose) { Console.WriteLine($"{descriptor} {probe.Path} {resultSet[i+2].Result.DateTimeUtc} {values[i+0]} {values[i+1]} {values[i+2]} {values[i+3]} ({value3}) {values[i+4]} ({value4}) / {standardDeviation * probe.Threshold:n0}"); } var hasRegressed = false; switch (probe.Unit) { case ThresholdUnits.StDev: // factor of standard deviation hasRegressed = value1 < standardDeviation && value2 < standardDeviation && value3 >= probe.Threshold * standardDeviation && value4 >= probe.Threshold * standardDeviation && Math.Sign(value3) == Math.Sign(value4); break; case ThresholdUnits.Percent: // percentage of the average of values hasRegressed = value1 < average * (probe.Threshold / 100) && value2 < average * (probe.Threshold / 100) && value3 >= average * (probe.Threshold / 100) && value4 >= average * (probe.Threshold / 100) && Math.Sign(value3) == Math.Sign(value4); break; case ThresholdUnits.Absolute: // absolute deviation hasRegressed = value1 < probe.Threshold && value2 < probe.Threshold && value3 >= probe.Threshold && value4 >= probe.Threshold && Math.Sign(value3) == Math.Sign(value4); break; default: break; } if (hasRegressed) { if (_options.Verbose) { Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine("Regression"); Console.ResetColor(); } var regression = new Regression { PreviousResult = resultSet[i + 2].Result, CurrentResult = resultSet[i + 3].Result, Change = value3, StandardDeviation = standardDeviation, Average = average }; foreach (var rule in rules) { foreach (var label in rule.Labels) { regression.Labels.Add(label); } foreach (var owner in rule.Owners) { regression.Owners.Add(owner); } } foreach (var label in source.Regressions.Labels) { regression.Labels.Add(label); } foreach (var owner in source.Regressions.Owners) { regression.Owners.Add(owner); } // If there are subsequent measurements, detect if the benchmark has // recovered by search for a value in the limits for (var j = i + 5; j < resultSet.Length; j++) { var nextValue = Math.Abs(values[j] - values[i + 2]); var hasRecovered = false; switch (probe.Unit) { case ThresholdUnits.StDev: // factor of standard deviation hasRecovered = nextValue < probe.Threshold * standardDeviation && Math.Sign(nextValue) == Math.Sign(value4); break; case ThresholdUnits.Percent: // percentage of the average of values hasRecovered = nextValue < average * (probe.Threshold / 100) && Math.Sign(nextValue) == Math.Sign(value4); break; case ThresholdUnits.Absolute: // absolute deviation hasRecovered = nextValue < probe.Threshold && Math.Sign(nextValue) == Math.Sign(value4); break; default: break; } if (hasRecovered) { regression.RecoveredResult = resultSet[j].Result; Console.ForegroundColor = ConsoleColor.Green; Console.WriteLine($"Recovered on {regression.RecoveredResult.DateTimeUtc}"); Console.ResetColor(); break; } } yield return(regression); } } } } }
/// <summary> /// This method finds regressions for a give source. /// Steps: /// - Query the table for the latest rows specified in the source /// - Group records by Scenario + Description (descriptor) /// - For each unique descriptor /// - Find matching rules from the source /// - Evaluate the source's probes for each record /// - Calculates the std deviation /// - Look for 2 consecutive deviations /// </summary> private static async IAsyncEnumerable <Regression> FindRegression(Source source) { if (source.Regressions == null) { yield break; } var loadStartDateTimeUtc = DateTime.UtcNow.AddDays(0 - source.DaysToLoad); var detectionMaxDateTimeUtc = DateTime.UtcNow.AddDays(0 - source.DaysToSkip); var allResults = new List <BenchmarksResult>(); // Load latest records Console.Write("Loading records... "); using (var connection = new SqlConnection(_options.ConnectionString)) { using (var command = new SqlCommand(String.Format(Queries.Latest, source.Table), connection)) { command.Parameters.AddWithValue("@startDate", loadStartDateTimeUtc); await connection.OpenAsync(); var reader = await command.ExecuteReaderAsync(); while (await reader.ReadAsync()) { allResults.Add(new BenchmarksResult { Id = Convert.ToInt32(reader["Id"]), Excluded = Convert.ToBoolean(reader["Excluded"]), DateTimeUtc = (DateTimeOffset)reader["DateTimeUtc"], Session = Convert.ToString(reader["Session"]), Scenario = Convert.ToString(reader["Scenario"]), Description = Convert.ToString(reader["Description"]), Document = Convert.ToString(reader["Document"]), }); } } } Console.WriteLine($"{allResults.Count} found"); // Reorder results chronologically allResults.Reverse(); // Compute standard deviation var resultsByScenario = allResults .GroupBy(x => x.Scenario + ":" + x.Description) .ToDictionary(x => x.Key, x => x.ToArray()) ; foreach (var descriptor in resultsByScenario.Keys) { // Does the descriptor match a rule? if (!source.Include(descriptor)) { continue; } var rules = source.Match(descriptor); // Should regressions be ignored for this descriptor? var lastIgnoreRegressionRule = rules.LastOrDefault(x => x.IgnoreRegressions != null); if (lastIgnoreRegressionRule != null && lastIgnoreRegressionRule.IgnoreRegressions.Value) { if (_options.Verbose) { Console.WriteLine("Regressions ignored"); } continue; } // Resolve path for the metric var results = resultsByScenario[descriptor]; foreach (var probe in source.Regressions.Probes) { if (_options.Verbose) { Console.WriteLine(); Console.WriteLine($"Evaluating probe {descriptor} {probe.Path} with {results.Count()} results"); Console.WriteLine("============================================================================================="); Console.WriteLine(); } var resultSet = results .Select(x => new { Result = x, Token = x.Data.SelectTokens(probe.Path).FirstOrDefault() }) .Where(x => x.Token != null) .Select(x => new { Result = x.Result, Value = Convert.ToDouble(x.Token) }) .ToArray(); // Find regressions // Can't find a regression if there are less than 5 value if (resultSet.Length < 5) { if (_options.Verbose) { Console.ForegroundColor = ConsoleColor.Yellow; Console.WriteLine($"Not enough data ({resultSet.Length})"); Console.ResetColor(); } continue; } if (_options.Verbose) { Console.WriteLine($"Values: {JsonConvert.SerializeObject(resultSet.Select(x => x.Value).ToArray())}"); } var values = resultSet.Select(x => x.Value).ToArray(); // Look for 2 consecutive values that are outside of the threshold, // subsequent to 3 consecutive values that are inside the threshold. // 5 is the number of data points necessary to detect a threshold for (var i = 0; i < resultSet.Length - 5; i++) { // Skip the measurement if it's too recent if (resultSet[i].Result.DateTimeUtc >= detectionMaxDateTimeUtc) { continue; } if (_options.Verbose) { Console.WriteLine($"Checking {resultSet[i + 3].Value} at {resultSet[i + 3].Result.DateTimeUtc} with values {JsonConvert.SerializeObject(values.Skip(i).Take(5).ToArray())}"); } // Measure stdev by picking the StdevCount results before the currently checked one var stdevs = values.Take(i + 1).TakeLast(source.StdevCount).ToArray(); if (stdevs.Length < source.StdevCount && probe.Unit == ThresholdUnits.StDev) { Console.WriteLine($"Not enough values to build a standard deviation: {JsonConvert.SerializeObject(stdevs)}"); continue; } // Calculate the stdev from all values up to the verified window double average = stdevs.Average(); double sumOfSquaresOfDifferences = stdevs.Sum(val => (val - average) * (val - average)); double standardDeviation = Math.Sqrt(sumOfSquaresOfDifferences / stdevs.Length); if (_options.Verbose) { Console.WriteLine($"Building stdev ({standardDeviation}) from last {source.StdevCount} values {JsonConvert.SerializeObject(stdevs)}"); } /* checked value (included in stdev) * ^ ______/i+3---------i+4--------- * (stdev results) ----i---------i+1---------i+2/ * * <- value1 -> <- value3 -> * <------- value2 -------><------- value4 ------> * * */ if (standardDeviation == 0) { // We skip measurement with stdev of zero since it could induce divisions by zero, and any change will trigger // a regression Console.WriteLine($"Ignoring measurement with stdev = 0"); continue; } var value1 = values[i + 1] - values[i]; var value2 = values[i + 2] - values[i]; var value3 = values[i + 3] - values[i + 2]; var value4 = values[i + 4] - values[i + 2]; if (_options.Verbose) { Console.WriteLine($"Next values: {values[i + 0]} {values[i + 1]} {values[i + 2]} {values[i + 3]} {values[i + 4]}"); Console.WriteLine($"Deviations: {value1:n0} {value2:n0} {value3:n0} {value4:n0} Allowed deviation: {standardDeviation * probe.Threshold:n0}"); } var hasRegressed = false; switch (probe.Unit) { case ThresholdUnits.StDev: // factor of standard deviation hasRegressed = Math.Abs(value1) < probe.Threshold * standardDeviation && Math.Abs(value2) < probe.Threshold * standardDeviation && Math.Abs(value3) >= probe.Threshold * standardDeviation && Math.Abs(value4) >= probe.Threshold * standardDeviation && Math.Sign(value3) == Math.Sign(value4); break; case ThresholdUnits.Percent: // percentage of the average of values hasRegressed = Math.Abs(value1) < average * (probe.Threshold / 100) && Math.Abs(value2) < average * (probe.Threshold / 100) && Math.Abs(value3) >= average * (probe.Threshold / 100) && Math.Abs(value4) >= average * (probe.Threshold / 100) && Math.Sign(value3) == Math.Sign(value4); break; case ThresholdUnits.Absolute: // absolute deviation hasRegressed = Math.Abs(value1) < probe.Threshold && Math.Abs(value2) < probe.Threshold && Math.Abs(value3) >= probe.Threshold && Math.Abs(value4) >= probe.Threshold && Math.Sign(value3) == Math.Sign(value4); break; default: break; } if (hasRegressed) { var regression = new Regression { PreviousResult = resultSet[i + 2].Result, CurrentResult = resultSet[i + 3].Result, Change = value3, StandardDeviation = standardDeviation, Average = average }; if (_options.Verbose) { Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine($"Regression detected: {values[i + 2]:n0} to {values[i + 3]:n0} for {regression.Identifier}"); Console.ResetColor(); } foreach (var rule in rules) { foreach (var label in rule.Labels) { regression.Labels.Add(label); } foreach (var owner in rule.Owners) { regression.Owners.Add(owner); } } foreach (var label in source.Regressions.Labels) { regression.Labels.Add(label); } foreach (var owner in source.Regressions.Owners) { regression.Owners.Add(owner); } // If there are subsequent measurements, detect if the benchmark has // recovered by search for a value in the limits for (var j = i + 5; j < resultSet.Length; j++) { var nextValue = values[j] - values[i + 2]; var hasRecovered = false; switch (probe.Unit) { case ThresholdUnits.StDev: // factor of standard deviation hasRecovered = Math.Abs(nextValue) < probe.Threshold * standardDeviation && Math.Sign(nextValue) == Math.Sign(value4); break; case ThresholdUnits.Percent: // percentage of the average of values hasRecovered = Math.Abs(nextValue) < average * (probe.Threshold / 100) && Math.Sign(nextValue) == Math.Sign(value4); break; case ThresholdUnits.Absolute: // absolute deviation hasRecovered = Math.Abs(nextValue) < probe.Threshold && Math.Sign(nextValue) == Math.Sign(value4); break; default: break; } if (hasRecovered) { regression.RecoveredResult = resultSet[j].Result; Console.ForegroundColor = ConsoleColor.Green; Console.WriteLine($"Recovered on {regression.RecoveredResult.DateTimeUtc}"); Console.ResetColor(); break; } } yield return(regression); } } } } }