private void SaveOpenReviews(KnowledgeDistributionMap knowledgeMap, LossSimulation lossSimulation) { var bulkDeveloperOpenReviews = new List <DeveloperReview>(); foreach (var pullRequestReviewerItem in knowledgeMap.PullRequestSimulatedRecommendationMap) { var pullRequest = _dbContext.PullRequests; var pullRequestNumber = pullRequestReviewerItem.Key; var pull = pullRequest.Where(a => a.Number == pullRequestNumber).FirstOrDefault(); var selectedReviewers = pullRequestReviewerItem.Value.SelectedReviewers; foreach (var reviewer in selectedReviewers) { var startDate = pull.CreatedAtDateTime ?? DateTime.MinValue; var endDate = pull.ClosedAtDateTime ?? DateTime.MinValue; var time = startDate; while (startDate < endDate) { bulkDeveloperOpenReviews.Add(new DeveloperReview() { NormalizedName = reviewer, DateTime = startDate, SimulationId = lossSimulation.Id, PullRequestId = pull.Number, }); startDate = startDate.AddDays(1); } } } _dbContext.BulkInsert(bulkDeveloperOpenReviews, new BulkConfig { BatchSize = 5000000, BulkCopyTimeout = 140000000 }); }
private void SavePullRequestReviewes(KnowledgeDistributionMap knowledgeMap, LossSimulation lossSimulation) { var bulkEntities = new List <RecommendedPullRequestReviewer>(); foreach (var pullRequestReviewerItem in knowledgeMap.PullRequestSimulatedRecommendationMap) { var pullRequestNumber = pullRequestReviewerItem.Key; foreach (var reviewer in pullRequestReviewerItem.Value.RecommendedPullRequestReviewers) { reviewer.LossSimulationId = lossSimulation.Id; bulkEntities.Add(reviewer); } } _dbContext.BulkInsert(bulkEntities, new BulkConfig { BulkCopyTimeout = 0 }); }
private void SavePullRequestSimulatedRecommendationResults(KnowledgeDistributionMap knowledgeDistributioneMap, LossSimulation lossSimulation) { var results = knowledgeDistributioneMap.PullRequestSimulatedRecommendationMap.Values; var bulkPullRequestSimulatedRecommendationResults = new List <Data.PullRequestRecommendationResult>(); var bulkRecommendedPullRequestCandidatess = new List <RecommendedPullRequestCandidate>(results.Count() * 5); foreach (var result in results) { bulkPullRequestSimulatedRecommendationResults.Add(new Data.PullRequestRecommendationResult() { ActualReviewers = result.ActualReviewers?.Count() > 0 ? result.ActualReviewers?.Aggregate((a, b) => a + ", " + b) : null, SelectedReviewers = result.SelectedReviewers?.Count() > 0 ? result.SelectedReviewers?.Aggregate((a, b) => a + ", " + b) : null, SortedCandidates = result.SortedCandidates?.Count() > 0 ? result.SortedCandidates.Take(10).Select(q => q.DeveloperName)?.Aggregate((a, b) => a + ", " + b) : null, ActualReviewersLength = result.ActualReviewers.Length, SelectedReviewersLength = result.SelectedReviewers.Length, SortedCandidatesLength = result.SortedCandidates?.Length, PullRequestNumber = result.PullRequestNumber, MeanReciprocalRank = result.MeanReciprocalRank, TopFiveIsAccurate = result.TopFiveIsAccurate, TopTenIsAccurate = result.TopTenIsAccurate, IsSimulated = result.IsSimulated, LossSimulationId = lossSimulation.Id, Expertise = result.Expertise, IsRisky = result.IsRisky, Features = result.Features }); for (int i = 0; i < result.SortedCandidates.Take(10).Count(); i++) { bulkRecommendedPullRequestCandidatess.Add(new RecommendedPullRequestCandidate( lossSimulationId: lossSimulation.Id, rank: i + 1, normalizedReviewerName: result.SortedCandidates[i].DeveloperName, score: result.SortedCandidates[i].Score, pullRequestNumber: result.PullRequestNumber)); } } _dbContext.BulkInsert(bulkRecommendedPullRequestCandidatess, new BulkConfig { BatchSize = 50000, BulkCopyTimeout = 0 }); _dbContext.BulkInsert(bulkPullRequestSimulatedRecommendationResults, new BulkConfig { BatchSize = 50000, BulkCopyTimeout = 0 }); }
private void SaveLeaversAndFilesAtRisk(LossSimulation lossSimulation, KnowledgeDistributionMap knowledgeDistributioneMap, Dictionary <long, IEnumerable <SimulatedLeaver> > leavers) { var bulkEntities = new List <SimulatedAbondonedFile>(); foreach (var period in _periods) { _logger.LogInformation("{datetime}: computing knowledge loss for period {pid}.", DateTime.Now, period.Id); var availableDevelopers = GetAvailableDevelopersOfPeriod(period); _dbContext.AddRange(leavers[period.Id]); var abandonedFiles = GetAbandonedFiles(period, leavers[period.Id], availableDevelopers, knowledgeDistributioneMap, lossSimulation); bulkEntities.AddRange(abandonedFiles); _logger.LogInformation("{datetime}: computing knowledge loss for period {pid} is done.", DateTime.Now, period.Id); } _dbContext.BulkInsert(bulkEntities); }
private void SaveFileTouches(KnowledgeDistributionMap knowledgeMap, LossSimulation lossSimulation) { var bulkEntities = new List <FileTouch>(); foreach (var detail in knowledgeMap.CommitBasedKnowledgeMap.Details) { foreach (var period in detail.Periods) { bulkEntities.Add(new FileTouch() { NormalizeDeveloperName = detail.Developer.NormalizedName, PeriodId = period.Id, CanonicalPath = detail.FilePath, TouchType = "commit", LossSimulationId = lossSimulation.Id }); } } foreach (var detail in knowledgeMap.ReviewBasedKnowledgeMap.Details) { foreach (var period in detail.Periods.Distinct()) { bulkEntities.Add(new FileTouch() { NormalizeDeveloperName = detail.Developer.NormalizedName, PeriodId = period.Id, CanonicalPath = detail.FilePath, TouchType = "review", LossSimulationId = lossSimulation.Id }); } } _dbContext.BulkInsert(bulkEntities, new BulkConfig { BatchSize = 50000 }); }
private IEnumerable <SimulatedAbondonedFile> GetAbandonedFiles(Period period, IEnumerable <SimulatedLeaver> leavers, IEnumerable <Developer> availableDevelopers, KnowledgeDistributionMap knowledgeMap, LossSimulation lossSimulation) { var leaversDic = leavers.ToDictionary(q => q.Developer.NormalizedName); var availableDevelopersDic = availableDevelopers.ToDictionary(q => q.NormalizedName); var authorsFileBlames = knowledgeMap.BlameBasedKnowledgeMap.GetSnapshopOfPeriod(period.Id); foreach (var filePath in authorsFileBlames.FilePaths) { var isFileSavedByReview = IsFileSavedByReview(filePath, knowledgeMap.ReviewBasedKnowledgeMap, period); if (isFileSavedByReview) { continue; } var fileTotalLines = (double)authorsFileBlames[filePath].Sum(q => q.Value.TotalAuditedLines); var remainingBlames = authorsFileBlames[filePath].Values.Where(q => availableDevelopersDic.ContainsKey(q.NormalizedDeveloperName)) .OrderByDescending(q => q.TotalAuditedLines) .ToArray(); var abandonedBlames = remainingBlames.Where(q => leaversDic.ContainsKey(q.NormalizedDeveloperName)).ToArray(); var remainingTotalLines = remainingBlames.Sum(q => q.TotalAuditedLines); var abandonedTotalLines = abandonedBlames.Sum(q => q.TotalAuditedLines); var remainingPercentage = remainingTotalLines / fileTotalLines; var abandonedPercentage = abandonedTotalLines / fileTotalLines; var leftKnowledgePercentage = 1 - (remainingPercentage - abandonedPercentage); if (leftKnowledgePercentage >= lossSimulation.FilesAtRiksOwnershipThreshold) { yield return(new SimulatedAbondonedFile() { FilePath = filePath, PeriodId = period.Id, TotalLinesInPeriod = remainingTotalLines, LossSimulationId = lossSimulation.Id, AbandonedLinesInPeriod = abandonedTotalLines, SavedLinesInPeriod = remainingTotalLines - abandonedTotalLines, RiskType = "abandoned" }); } var nonAbandonedBlames = remainingBlames.Where(q => !leaversDic.ContainsKey(q.NormalizedDeveloperName)).ToArray(); var totalNonAbandonedLines = (double)nonAbandonedBlames.Sum(q => q.TotalAuditedLines); var topOwnedPortion = 0.0; for (var i = 0; i < nonAbandonedBlames.Count() && i < lossSimulation.FilesAtRiksOwnersThreshold; i++) { topOwnedPortion += nonAbandonedBlames[i].TotalAuditedLines / totalNonAbandonedLines; } if (topOwnedPortion >= lossSimulation.FilesAtRiksOwnershipThreshold) { yield return(new SimulatedAbondonedFile() { FilePath = filePath, PeriodId = period.Id, TotalLinesInPeriod = remainingTotalLines, LossSimulationId = lossSimulation.Id, AbandonedLinesInPeriod = abandonedTotalLines, SavedLinesInPeriod = remainingTotalLines - abandonedTotalLines, RiskType = "hoarded" }); } } }
private void SaveOwnershipDistribution(KnowledgeDistributionMap knowledgeDistributioneMap, LossSimulation lossSimulation, Dictionary <long, IEnumerable <SimulatedLeaver> > leavers) { var bulkFileTouches = new List <FileTouch>(); var bulkFileKnowledgeable = new List <FileKnowledgeable>(); foreach (var period in _periods) { // get the final list of files by the end of period and also their blame information to that point of time. var blameSnapshot = knowledgeDistributioneMap.BlameBasedKnowledgeMap.GetSnapshopOfPeriod(period.Id); // when we have not extracted the related blame information. if (blameSnapshot == null) { continue; } // getting the list of people who were active in this period and also who have left the project by the end of this period var availableDevelopersOfPeriod = GetAvailableDevelopersOfPeriod(period).Select(q => q.NormalizedName).ToHashSet(); var leaversOfPeriod = leavers[period.Id].Select(q => q.NormalizedName).ToHashSet(); foreach (var filePath in blameSnapshot.FilePaths) { // we are counting all developers regardless of their owenership >0 var committers = blameSnapshot[filePath].Where(q => q.Value.OwnedPercentage > lossSimulation.FilesAtRiksOwnershipThreshold).Select(q => q.Value.NormalizedDeveloperName) .Where(q => !lossSimulation.MegaDevelopers.Contains(q)).ToHashSet(); var fileReviewDetails = knowledgeDistributioneMap.ReviewBasedKnowledgeMap[filePath]?.Where(q => q.Value.Periods.Any(p => p.Id <= period.Id)); // reviewers shouldn't be null. Just for convinience. var reviewers = fileReviewDetails?.Select(q => q.Value.Developer.NormalizedName).ToHashSet() ?? new HashSet <string>(); var availableCommitters = committers.Where(q => availableDevelopersOfPeriod.Contains(q) && !leaversOfPeriod.Contains(q)).ToArray(); var availableReviewers = reviewers.Where(q => availableDevelopersOfPeriod.Contains(q) && !leaversOfPeriod.Contains(q)).ToArray(); var knowledgeables = availableReviewers.Union(availableCommitters).ToArray(); var totalPullRequests = fileReviewDetails? .SelectMany(q => q.Value.PullRequests .Where(pr => pr.CreatedAtDateTime.Value < period.ToDateTime)) .Select(q => q.Number) .Distinct() .Count(); bulkFileKnowledgeable.Add(new FileKnowledgeable() { CanonicalPath = filePath, PeriodId = period.Id, TotalAvailableCommitters = availableCommitters.Count(), TotalAvailableReviewers = availableReviewers.Count(), TotalAvailableReviewOnly = availableReviewers.Where(q => !availableCommitters.Contains(q)).Count(), TotalAvailableCommitOnly = availableCommitters.Where(q => !availableReviewers.Contains(q)).Count(), TotalKnowledgeables = availableReviewers.Union(availableCommitters).Count(), Knowledgeables = knowledgeables.Count() > 0 ? knowledgeables.Aggregate((a, b) => a + "," + b) : null, AvailableCommitters = availableCommitters.Count() > 0 ? availableCommitters.Aggregate((a, b) => a + "," + b) : null, AvailableReviewers = availableReviewers.Count() > 0 ? availableReviewers.Aggregate((a, b) => a + "," + b) : null, LossSimulationId = lossSimulation.Id, HasReviewed = reviewers.Count > 0, TotalReviewers = reviewers.Count(), TotalCommitters = committers.Count(), TotalPullRequests = totalPullRequests.GetValueOrDefault(0) }); /*bulkFileTouches.AddRange(availableCommitters.Select(q => new FileTouch() * { * CanonicalPath = filePath, * LossSimulationId = lossSimulation.Id, * NormalizeDeveloperName = q, * PeriodId = period.Id, * TouchType = "commit", * })); * * bulkFileTouches.AddRange(availableReviewers.Select(q => new FileTouch() * { * CanonicalPath = filePath, * LossSimulationId = lossSimulation.Id, * NormalizeDeveloperName = q, * PeriodId = period.Id, * TouchType = "review", * }));*/ } } _dbContext.BulkInsert(bulkFileTouches, new BulkConfig { BatchSize = 50000, BulkCopyTimeout = 0 }); _dbContext.BulkInsert(bulkFileKnowledgeable, new BulkConfig { BatchSize = 50000, BulkCopyTimeout = 0 }); }