Beispiel #1
0
        // TODO: Nothing in this implementation considers anything outside of pull requests, but often commits are made directly to main
        public List <MetricSnapshot> CalculateRepositoryMetricsOverTime(IList <PullRequestMetrics> prMetrics)
        {
            var now             = _clock.DateTimeOffsetUtcNow();
            var snapshotsByDate = prMetrics
                                  .GroupBy(m => m.ClosedAt.Date)
                                  .Select(dateGroup =>
            {
                var medianCommentCount                = dateGroup.Select(pr => pr.CommentCount).CalculateMedian();
                var medianWordsPerComment             = dateGroup.Select(s => s.CommentWordCount).CalculateMedian();
                var medianTimeToClosure               = dateGroup.Select(s => s.OpenFor).CalculateMedian();
                var medianBusinessDaysToClose         = dateGroup.Select(s => s.BusinessDaysOpen).CalculateMedian();
                var commentCountPopulationVariance    = dateGroup.Select(s => (double)s.CommentCount).CalculatePopulationVariance();
                var wordsPerCommentPopulationVariance = dateGroup.Select(s => (double)s.CommentWordCount).CalculatePopulationVariance();

                var dateSnapshot = new MetricSnapshot
                {
                    UpdatedAt = now,
                    Date      = dateGroup.Key,
                    MedianSecondsToPullRequestClosure      = (int)medianTimeToClosure.TotalSeconds,
                    MedianBusinessDaysToPullRequestClosure = medianBusinessDaysToClose,
                    MedianCommentCountPerPullRequest       = medianCommentCount,
                    MedianWordsPerComment          = medianWordsPerComment,
                    CommentCountPopulationVariance = commentCountPopulationVariance,
                    CommentWordCountVariance       = wordsPerCommentPopulationVariance,
                    PullRequestMetrics             = dateGroup.ToDictionary(dg => dg.Number),
                };
                return(dateSnapshot);
            })
                                  .ToList();

            return(snapshotsByDate);
        }
Beispiel #2
0
        public async Task ExecuteAsync()
        {
            var now = _clock.DateTimeOffsetUtcNow();

            _logger.LogInformation($"{Name} work loop starting");
            var timer = Stopwatch.StartNew();

            var newPrs = await _repoManager.RefreshFromUpstreamAsync(ItemState.Closed);

            if (!newPrs.Any())
            {
                _logger.LogInformation($"{Name} has no pull requests analyze.");
                return;
            }

            _logger.LogInformation($"{Name} comment analysis starting for {newPrs.Count:N0} pull requests");
            var analysisTimer = Stopwatch.StartNew();
            var prAnalysis    = newPrs
                                .Select(pr => _prAnalyzer.CalculatePullRequestMetrics(pr))
                                .ToList();

            analysisTimer.Stop();
            _logger.LogInformation($"{Name} comment analysis completed for {newPrs.Count:N0} pull requests in {analysisTimer.Elapsed.ToMicroseconds():N0} microseconds");

            _logger.LogInformation($"{Name} repository analysis starting for {newPrs.Count:N0} pull requests");
            analysisTimer = Stopwatch.StartNew();

            var repoSnapshots = _repoAnalyzer.CalculateRepositoryMetricsOverTime(prAnalysis);

            foreach (var repoAnalysis in repoSnapshots)
            {
                repoAnalysis.Owner     = _repoManager.RepoOwner;
                repoAnalysis.Name      = _repoManager.RepoName;
                repoAnalysis.Url       = _repoManager.RepoUrl;
                repoAnalysis.CreatedAt = now;
                repoAnalysis.UpdatedAt = now;
                repoAnalysis.Scorers   = _prAnalyzer.Scorers.ToHashSet();
            }
            analysisTimer.Stop();
            _logger.LogInformation($"{Name} repository analysis completed for {newPrs.Count:N0} pull requests in {analysisTimer.Elapsed.ToMicroseconds():N0} microseconds");

            await _analysisManager.SaveAsync(repoSnapshots);

            timer.Stop();
            _logger.LogInformation($"{Name} work loop completed in {timer.ElapsedMilliseconds:N0}ms");
        }
Beispiel #3
0
        public static async Task <RepoWorker> InitializeAsync(
            IRepoManager repoManager,
            IPullRequestAnalyzer prAnalyzer,
            IRepositoryAnalyzer repoAnalyzer,
            IHistoricalAnalysisManager analysisManager,
            IClock clock,
            ILogger logger)
        {
            if (repoManager is null)
            {
                throw new ArgumentNullException(nameof(repoManager));
            }
            if (prAnalyzer is null)
            {
                throw new ArgumentNullException(nameof(prAnalyzer));
            }
            if (repoAnalyzer is null)
            {
                throw new ArgumentNullException(nameof(repoAnalyzer));
            }
            if (analysisManager is null)
            {
                throw new ArgumentNullException(nameof(analysisManager));
            }
            if (clock is null)
            {
                throw new ArgumentNullException(nameof(clock));
            }
            if (logger is null)
            {
                throw new ArgumentNullException(nameof(logger));
            }

            var name = $"{repoManager.RepoOwner}:{repoManager.RepoName}";

            // Analysis steps
            // 1a) Find metrics for where the set of analyzers has differed
            // 1b) Recompute those metrics
            // 1c) Overwrite the old metric snapshots
            // 2a) Find things known to the cache, but that haven't been analyzed
            // 2b) Analyze this new stuff
            // 2c) Write down the new snapshot

            var cachedPullRequestByNumber = (await repoManager.GetPullRequestsAsync())
                                            .ToDictionary(pr => pr.Number);

            var previousMetrics = (await analysisManager.LoadHistoryAsync(repoManager.RepoOwner, repoManager.RepoName))
                                  ?? new List <MetricSnapshot>();

            var previousPullRequestMetricsByNumber = previousMetrics
                                                     .SelectMany(m => m.PullRequestMetrics.Values)
                                                     .ToDictionary(pr => pr.Number);

            // 1a) Find metrics for where the set of analyzers has differed
            var previouslyComputedButHasChanged = previousMetrics
                                                  .Where(m => !m.Scorers.SetEquals(prAnalyzer.Scorers))
                                                  .ToList();

            // 2a) Find things known to the cache, but that haven't been analyzed
            var cachedButNotAnalyzed = cachedPullRequestByNumber
                                       .Where(pr => !previousPullRequestMetricsByNumber.ContainsKey(pr.Key))
                                       .Select(pr => pr.Key)
                                       .ToList();

            var worker = new RepoWorker(repoManager, name, prAnalyzer, repoAnalyzer, analysisManager, clock, logger);

            var analysisToBeDone = previouslyComputedButHasChanged.Any() || cachedButNotAnalyzed.Any();

            if (!analysisToBeDone)
            {
                return(worker);
            }

            // 1b) Recompute those metrics
            var toBeSaved = new List <MetricSnapshot>();

            if (previouslyComputedButHasChanged.Any())
            {
                var now = clock.DateTimeOffsetUtcNow();

                var scorers = prAnalyzer.Scorers.ToHashSet();
                foreach (var originalMetric in previouslyComputedButHasChanged)
                {
                    var prMetrics = originalMetric.PullRequestMetrics
                                    .Select(prMetric => prMetric.Value.Number)
                                    .Select(nbr => cachedPullRequestByNumber[nbr])
                                    .Select(prAnalyzer.CalculatePullRequestMetrics)
                                    .ToList();
                    var replacements = repoAnalyzer.CalculateRepositoryMetricsOverTime(prMetrics);

                    foreach (var replacement in replacements)
                    {
                        replacement.CreatedAt = originalMetric.CreatedAt;
                        replacement.UpdatedAt = now;
                        replacement.Scorers   = scorers;
                        replacement.Owner     = repoManager.RepoOwner;
                        replacement.Name      = repoManager.RepoName;
                        replacement.Url       = repoManager.RepoUrl;
                    }
                    toBeSaved.AddRange(replacements);
                }
            }

            if (cachedButNotAnalyzed.Any())
            {
                // 2b) Analyze this new stuff
                var now = clock.DateTimeOffsetUtcNow();
                var newPullRequestMetrics = cachedButNotAnalyzed
                                            .Select(nbr => cachedPullRequestByNumber[nbr])
                                            .Select(prAnalyzer.CalculatePullRequestMetrics)
                                            .ToList();
                var newRepoMetrics = repoAnalyzer.CalculateRepositoryMetricsOverTime(newPullRequestMetrics);
                var prAnalyzers    = prAnalyzer.Scorers.ToHashSet();
                foreach (var newRepoMetric in newRepoMetrics)
                {
                    newRepoMetric.CreatedAt = now;
                    newRepoMetric.UpdatedAt = now;
                    newRepoMetric.Scorers   = prAnalyzers;
                    newRepoMetric.Owner     = repoManager.RepoOwner;
                    newRepoMetric.Name      = repoManager.RepoName;
                    newRepoMetric.Url       = repoManager.RepoUrl;
                }

                toBeSaved.AddRange(newRepoMetrics);
            }

            // 1c) Overwrite the old metric snapshots
            // 2c) Write down the new snapshot
            await analysisManager.SaveAsync(toBeSaved);

            return(worker);
        }
 public static string GetBbCloudUpdatedAtFilter(this DateTimeOffset updatedAfter, IClock clock)
 {
     return(updatedAfter > clock.DateTimeOffsetUtcNow()
         ? null
         : $"updated_on>{updatedAfter:s}");
 }