Пример #1
0
        private void ComputeStatisticsForAlgorithmAndActualReviews(int algorithmId, AbstractSourceOfBugs source)
        {
            string[] originalData = File.ReadAllLines($"{basepath}stats_{algorithmId}.txt");
            List <StatisticsResult> workingSet = originalData.Select(StatisticsResult.FromCSVLine)
                                                 .Where(sr => source.findBugs().Contains(sr.BugId)).ToList();

            int count = workingSet.Count();

            StringBuilder batchSb           = new StringBuilder();
            StringBuilder batchDerivationSb = new StringBuilder();

            for (int i = 0; i < count; i += BATCHSIZE)
            {
                ComputeBatchStatistic(batchSb, workingSet.Take(i).ToList());

                int from = i;
                if (from < 0)
                {
                    from = 0;
                }
                int take = BATCHSIZE;
                if (from + take > count)
                {
                    take = count - from;
                }
                ComputeBatchDerivationStatistic(batchDerivationSb, workingSet.Skip(from).Take(take).ToList());
            }

            ComputeBatchStatistic(batchSb, workingSet.Take(count).ToList());

            File.WriteAllText($"{basepath}stats_{algorithmId}_analyzed{source.Postfix}-batch.csv", batchSb.ToString());
            File.WriteAllText($"{basepath}stats_{algorithmId}_analyzed{source.Postfix}-deriv-batch.csv", batchDerivationSb.ToString());

            int foundNo = workingSet.Count(sr => sr.IsMatch);

            int[] expertPlacements = new int[StatisticsResult.NUMBER_OF_EXPERTS];

            for (int i = 0; i < StatisticsResult.NUMBER_OF_EXPERTS; i++)
            {
                expertPlacements[i] = workingSet.Count(sr => sr.Matches[i]);
            }

            StringBuilder sb = new StringBuilder();

            sb.AppendLine($"Expert was found: {foundNo} / {count} ({(double)foundNo / count:P})");
            for (int i = 0; i < StatisticsResult.NUMBER_OF_EXPERTS; i++)
            {
                sb.AppendLine($"Expert was No {i+1}:  {expertPlacements[i]} / {count} ({(double)expertPlacements[i] / (double)count:P})");
            }

            int top1 = expertPlacements[0];
            int top3 = top1 + expertPlacements[1] + expertPlacements[2];
            int top5 = top3 + expertPlacements[3] + expertPlacements[4];

            sb.AppendLine().AppendLine($"Top-1:  {(double)top1 / (double)count:P}");
            sb.AppendLine($"Top-3:  {(double)top3 / (double)count:P}");
            sb.AppendLine($"Top-5:  {(double)top5 / (double)count:P}");

            File.WriteAllText($"{basepath}stats_{algorithmId}_analyzed{source.Postfix}.txt", sb.ToString());
        }
Пример #2
0
        /// <summary>
        /// Iterates through the list of bugs and checks for each bug which algorithms suggested correct reviewers and which did not.
        /// Results are written to stats_x.txt: A CSV with bugId and correctly predicted reviewers in each entry (StatisticsResult strings)
        /// </summary>
        public void AnalyzeActualReviews(AbstractSourceOfBugs sourceOfActualReviewers)
        {
            IEnumerable <int> allBugIds = sourceOfActualReviewers.findBugs();

            int        someRandomBugId = allBugIds.First();
            List <int> algorithmIds;

            using (ExpertiseDBEntities context = new ExpertiseDBEntities())
                algorithmIds = context.Algorithms
                               .Select(a => a.AlgorithmId)
                               .Where(algoId => context.ComputedReviewers.Any(cr => cr.BugId == someRandomBugId && cr.AlgorithmId == algoId)) // filter algorithms for which no calculation has been done
                               .ToList();


            int       count      = 0;
            int       errorCount = 0;
            double    elapsed    = 0d;
            int       maxCount   = allBugIds.Count();
            Stopwatch sw         = new Stopwatch();

            sw.Start();

            IDictionary <int, List <StatisticsResult> > output = algorithmIds.ToDictionary(algorithmId => algorithmId, algorithmId => new List <StatisticsResult>());

            foreach (int bugId in allBugIds)
            {
                if (++count % 1000 == 0 && count > 0)
                {
                    sw.Stop();
                    elapsed += sw.Elapsed.TotalSeconds;
                    double   avg       = elapsed / count;
                    TimeSpan remaining = TimeSpan.FromSeconds(avg * (maxCount - count));
                    log.DebugFormat("Now at: {0} - (act: {1} | avg: {2:N}s | remaining: {3})", count, sw.Elapsed, avg, remaining);
                    sw.Restart();
                }

                try
                {
                    List <int> actualReviewerIds;
                    using (ExpertiseDBEntities context = new ExpertiseDBEntities())
                        actualReviewerIds = context.ActualReviewers
                                            .Where(ar => ar.BugId == bugId)
                                            .Select(ar => context.Developers.FirstOrDefault(dev => dev.Name == ar.Reviewer && dev.RepositoryId == RepositoryId).DeveloperId)
                                            .ToList();

                    Debug.Assert(actualReviewerIds.Count > 0);  // All bugs must have reviewers

                    foreach (int algorithmId in algorithmIds)
                    {
                        output[algorithmId].Add(CalculateResultForOneAlgorithmAndBug(algorithmId, bugId, actualReviewerIds));
                    }
                }
                catch (Exception ex)
                {
                    if (++errorCount > 10)
                    {
                        log.Fatal("10 errors while computing statistics, BugID=" + bugId + ", giving up.", ex);
                        throw new Exception("10 errors while computing statistics, giving up.", ex);
                    }

                    log.Error("Error #" + errorCount + " on BugID " + bugId, ex);
                }
            }

            foreach (int algorithmId in output.Keys)
            {
                List <StatisticsResult> algoStats = output[algorithmId];
                var sb = new StringBuilder();
                foreach (StatisticsResult statisticsResult in algoStats)
                {
                    sb.AppendLine(statisticsResult.ToCSV());
                }

                File.WriteAllText(string.Format(basepath + "stats_{0}.txt", algorithmId), sb.ToString());
            }
        }