private void ComputeStatisticsForAlgorithmAndActualReviews(int algorithmId, AbstractSourceOfBugs source) { string[] originalData = File.ReadAllLines($"{basepath}stats_{algorithmId}.txt"); List <StatisticsResult> workingSet = originalData.Select(StatisticsResult.FromCSVLine) .Where(sr => source.findBugs().Contains(sr.BugId)).ToList(); int count = workingSet.Count(); StringBuilder batchSb = new StringBuilder(); StringBuilder batchDerivationSb = new StringBuilder(); for (int i = 0; i < count; i += BATCHSIZE) { ComputeBatchStatistic(batchSb, workingSet.Take(i).ToList()); int from = i; if (from < 0) { from = 0; } int take = BATCHSIZE; if (from + take > count) { take = count - from; } ComputeBatchDerivationStatistic(batchDerivationSb, workingSet.Skip(from).Take(take).ToList()); } ComputeBatchStatistic(batchSb, workingSet.Take(count).ToList()); File.WriteAllText($"{basepath}stats_{algorithmId}_analyzed{source.Postfix}-batch.csv", batchSb.ToString()); File.WriteAllText($"{basepath}stats_{algorithmId}_analyzed{source.Postfix}-deriv-batch.csv", batchDerivationSb.ToString()); int foundNo = workingSet.Count(sr => sr.IsMatch); int[] expertPlacements = new int[StatisticsResult.NUMBER_OF_EXPERTS]; for (int i = 0; i < StatisticsResult.NUMBER_OF_EXPERTS; i++) { expertPlacements[i] = workingSet.Count(sr => sr.Matches[i]); } StringBuilder sb = new StringBuilder(); sb.AppendLine($"Expert was found: {foundNo} / {count} ({(double)foundNo / count:P})"); for (int i = 0; i < StatisticsResult.NUMBER_OF_EXPERTS; i++) { sb.AppendLine($"Expert was No {i+1}: {expertPlacements[i]} / {count} ({(double)expertPlacements[i] / (double)count:P})"); } int top1 = expertPlacements[0]; int top3 = top1 + expertPlacements[1] + expertPlacements[2]; int top5 = top3 + expertPlacements[3] + expertPlacements[4]; sb.AppendLine().AppendLine($"Top-1: {(double)top1 / (double)count:P}"); sb.AppendLine($"Top-3: {(double)top3 / (double)count:P}"); sb.AppendLine($"Top-5: {(double)top5 / (double)count:P}"); File.WriteAllText($"{basepath}stats_{algorithmId}_analyzed{source.Postfix}.txt", sb.ToString()); }
/// <summary> /// Iterates through the list of bugs and checks for each bug which algorithms suggested correct reviewers and which did not. /// Results are written to stats_x.txt: A CSV with bugId and correctly predicted reviewers in each entry (StatisticsResult strings) /// </summary> public void AnalyzeActualReviews(AbstractSourceOfBugs sourceOfActualReviewers) { IEnumerable <int> allBugIds = sourceOfActualReviewers.findBugs(); int someRandomBugId = allBugIds.First(); List <int> algorithmIds; using (ExpertiseDBEntities context = new ExpertiseDBEntities()) algorithmIds = context.Algorithms .Select(a => a.AlgorithmId) .Where(algoId => context.ComputedReviewers.Any(cr => cr.BugId == someRandomBugId && cr.AlgorithmId == algoId)) // filter algorithms for which no calculation has been done .ToList(); int count = 0; int errorCount = 0; double elapsed = 0d; int maxCount = allBugIds.Count(); Stopwatch sw = new Stopwatch(); sw.Start(); IDictionary <int, List <StatisticsResult> > output = algorithmIds.ToDictionary(algorithmId => algorithmId, algorithmId => new List <StatisticsResult>()); foreach (int bugId in allBugIds) { if (++count % 1000 == 0 && count > 0) { sw.Stop(); elapsed += sw.Elapsed.TotalSeconds; double avg = elapsed / count; TimeSpan remaining = TimeSpan.FromSeconds(avg * (maxCount - count)); log.DebugFormat("Now at: {0} - (act: {1} | avg: {2:N}s | remaining: {3})", count, sw.Elapsed, avg, remaining); sw.Restart(); } try { List <int> actualReviewerIds; using (ExpertiseDBEntities context = new ExpertiseDBEntities()) actualReviewerIds = context.ActualReviewers .Where(ar => ar.BugId == bugId) .Select(ar => context.Developers.FirstOrDefault(dev => dev.Name == ar.Reviewer && dev.RepositoryId == RepositoryId).DeveloperId) .ToList(); Debug.Assert(actualReviewerIds.Count > 0); // All bugs must have reviewers foreach (int algorithmId in algorithmIds) { output[algorithmId].Add(CalculateResultForOneAlgorithmAndBug(algorithmId, bugId, actualReviewerIds)); } } catch (Exception ex) { if (++errorCount > 10) { log.Fatal("10 errors while computing statistics, BugID=" + bugId + ", giving up.", ex); throw new Exception("10 errors while computing statistics, giving up.", ex); } log.Error("Error #" + errorCount + " on BugID " + bugId, ex); } } foreach (int algorithmId in output.Keys) { List <StatisticsResult> algoStats = output[algorithmId]; var sb = new StringBuilder(); foreach (StatisticsResult statisticsResult in algoStats) { sb.AppendLine(statisticsResult.ToCSV()); } File.WriteAllText(string.Format(basepath + "stats_{0}.txt", algorithmId), sb.ToString()); } }