Beispiel #1
0
        public void SkippedBinsAreIgnoredInStats()
        {
            var outputDirectory = Path.GetRandomFileName().ToDirectoryInfo();
            var alternatingBins = ImmutableList <(uint size, bool skip)> .Empty
                                  .Add((1, true))
                                  .Add((100000, false))
                                  .Add((200000, true))
                                  .Add((300000, false));

            var inputSpecs = InputSpec.GenerateDefaultInputSpecs(true)
                             .Select(i => InputSpec.Create(i.VariantType, alternatingBins,
                                                           1, 1, i.ExcludedFilters, i.IncludedFilters, i.IncludedRegions))
                             .ToDictionary(i => i.VariantType, i => i);
            var wittyerSettings = WittyerSettings.Create(outputDirectory, TinyTruth, TinyQuery,
                                                         ImmutableList <ISamplePair> .Empty, EvaluationMode.SimpleCounting,
                                                         inputSpecs);

            var(_, query, truth) = MainLauncher.GenerateResults(wittyerSettings).EnumerateSuccesses().First();
            var results = MainLauncher.GenerateSampleMetrics(truth, query, false, inputSpecs);

            Assert.Equal(1U, results.OverallStats[StatsType.Event].QueryStats.TrueCount);
            Assert.Equal(1U, results.OverallStats[StatsType.Event].QueryStats.FalseCount);
            Assert.Equal(0.5, results.EventLevelRecallOverall.First(typeRecallTuple => typeRecallTuple.type == WittyerType.CopyNumberGain).recall);

            var numberOfBinsReportedOn = results.EventLevelRecallPerBin.First().perBinRecall.Count();

            Assert.Equal(2, numberOfBinsReportedOn);
        }
Beispiel #2
0
        public void CrossType_Works()
        {
            var outputDirectory = Path.GetRandomFileName().ToDirectoryInfo();
            var inputSpecs      = InputSpec.GenerateDefaultInputSpecs(false).Select(i => InputSpec.Create(i.VariantType, i.BinSizes,
                                                                                                          10000, i.PercentDistance, i.ExcludedFilters, i.IncludedFilters, i.IncludedRegions))
                                  .ToDictionary(i => i.VariantType, i => i);
            var wittyerSettings = WittyerSettings.Create(outputDirectory, TinyTruth, TinyQuery,
                                                         ImmutableList <ISamplePair> .Empty, EvaluationMode.CrossTypeAndSimpleCounting,
                                                         inputSpecs);

            var(_, query, truth) = MainLauncher.GenerateResults(wittyerSettings).EnumerateSuccesses().First();
            var results = MainLauncher.GenerateSampleMetrics(truth, query, false, inputSpecs);

            Assert.Equal(4U, results.OverallStats[StatsType.Event].QueryStats.TrueCount);
        }
Beispiel #3
0
        public void SvWorksWithDefault()
        {
            if (MiscUtils.IsRunningAnyLinux)
            {
                return;                              // currently failing on linux :(
            }
            var outputDirectory = Path.GetRandomFileName().ToDirectoryInfo();
            var wittyerSettings = WittyerSettings.Create(outputDirectory, GermlineTruth, GermlineQuery,
                                                         ImmutableList <ISamplePair> .Empty, EvaluationMode.Default, InputSpecs);

            var json = MainLauncher.GenerateJson(wittyerSettings, MainLauncher.GenerateResults(wittyerSettings).EnumerateSuccesses(), EmptyCmd);
            //var str = JsonConvert.SerializeObject(json, Formatting.Indented);
            var stats         = json.GetOrThrow().PerSampleStats.First();
            var expectedStats = JsonConvert.DeserializeObject <GeneralStats>(File.ReadAllText(SvJsonGt.FullName))
                                .PerSampleStats.First();

            var expectedOverallEventStats = expectedStats.OverallStats.Single(x => x.StatsType.Equals(StatsType.Event));
            var actualOverallEventStats   = stats.OverallStats.Single(x => x.StatsType.Equals(StatsType.Event));

            MultiAssert.Equal(expectedOverallEventStats, actualOverallEventStats);

            foreach (var type in WittyerType.AllTypes)
            {
                var typeString         = type.Name;
                var expectedTypedStats = expectedStats.DetailedStats.Single(x => x.VariantType.Equals(typeString));
                var actualTypedStats   = stats.DetailedStats.Single(x => x.VariantType.Equals(typeString));

                var expectedTypedEventStats =
                    expectedTypedStats.OverallStats.Single(x => x.StatsType == StatsType.Event);
                var actualTypedEventStats = actualTypedStats.OverallStats.Single(x => x.StatsType == StatsType.Event);
                MultiAssert.Equal(expectedTypedEventStats, actualTypedEventStats);

                if (!type.HasBaseLevelStats)
                {
                    continue;
                }

                var expectedTypedBaseStats = expectedTypedStats.OverallStats.Single(x => x.StatsType == StatsType.Base);
                var actualTypedBaseStats   = actualTypedStats.OverallStats.Single(x => x.StatsType == StatsType.Base);
                MultiAssert.Equal(expectedTypedBaseStats, actualTypedBaseStats);
            }

            MultiAssert.AssertAll();
        }
Beispiel #4
0
        public void CnvWorksWithCrossType()
        {
            if (MiscUtils.IsRunningAnyLinux)
            {
                return;                              // currently failing on linux :(
            }
            var outputDirectory = Path.GetRandomFileName().ToDirectoryInfo();
            var wittyerSettings = WittyerSettings.Create(outputDirectory, CnvTruth, CnvQuery,
                                                         ImmutableList <ISamplePair> .Empty, EvaluationMode.CrossTypeAndSimpleCounting, InputSpecs);

            var results = MainLauncher.GenerateResults(wittyerSettings).EnumerateSuccesses().ToList();

            var(_, query, truth) = results.First();
            var testStrings = WittyerVcfWriter.GenerateVcfStrings(query, null, null).Where(line => !line.StartsWith(VcfConstants.Header.Prefix));

            MultiAssert.True(testStrings.All(s => ParseVariantGetTag(s, WitDecision.FalsePositive)));
            testStrings = WittyerVcfWriter.GenerateVcfStrings(null, truth, null).Where(line => !line.StartsWith(VcfConstants.Header.Prefix));
            MultiAssert.True(testStrings.All(s => ParseVariantGetTag(s, WitDecision.FalseNegative)));


            var stats = MainLauncher
                        .GenerateJson(wittyerSettings, results,
                                      EmptyCmd).GetOrThrow().PerSampleStats.First();

            // make sure to check for null
            MultiAssert.True(stats.QuerySampleName != null);
            MultiAssert.True(stats.TruthSampleName != null);
            var expectedStats = JsonConvert.DeserializeObject <GeneralStats>(File.ReadAllText(CnvJsonCts.FullName))
                                .PerSampleStats.First();

            var expectedOverallEventStats = expectedStats.OverallStats.Single(x => x.StatsType.Equals(StatsType.Event));
            var actualOverallEventStats   = stats.OverallStats.Single(x => x.StatsType.Equals(StatsType.Event));

            MultiAssert.Equal(expectedOverallEventStats.QueryFpCount, actualOverallEventStats.QueryFpCount);
            MultiAssert.Equal(expectedOverallEventStats.QueryTpCount, actualOverallEventStats.QueryTpCount);
            MultiAssert.Equal(expectedOverallEventStats.QueryTotalCount, actualOverallEventStats.QueryTotalCount);
            MultiAssert.Equal(expectedOverallEventStats.TruthTpCount, actualOverallEventStats.TruthTpCount);
            MultiAssert.Equal(expectedOverallEventStats.TruthFnCount, actualOverallEventStats.TruthFnCount);
            MultiAssert.Equal(expectedOverallEventStats.TruthTotalCount, actualOverallEventStats.TruthTotalCount);

            MultiAssert.AssertAll();
        }
Beispiel #5
0
        public void Bed_Counts_Bases_Of_FP_Events()
        {
            var outputDirectory = Path.GetRandomFileName().ToDirectoryInfo();
            var inputSpecs      = InputSpec.GenerateDefaultInputSpecs(false).Select(i => InputSpec.Create(i.VariantType, i.BinSizes,
                                                                                                          10000, i.PercentDistance, i.ExcludedFilters, i.IncludedFilters, IncludeBedFile.CreateFromBedFile(Bed)))
                                  .ToDictionary(i => i.VariantType, i => i);
            var wittyerSettings = WittyerSettings.Create(outputDirectory, Truth, Query,
                                                         ImmutableList <ISamplePair> .Empty, EvaluationMode.CrossTypeAndSimpleCounting,
                                                         inputSpecs);

            var(_, query, truth) = MainLauncher.GenerateResults(wittyerSettings).EnumerateSuccesses().First();
            var results = MainLauncher.GenerateSampleMetrics(truth, query, false, inputSpecs);

            // should be end of bed - start of query + 1 = 149835000 - 145395620 + 1 = 4439381
            MultiAssert.Equal(4439381U, results.OverallStats[StatsType.Base].QueryStats.TrueCount);
            MultiAssert.Equal(4439381U, results.OverallStats[StatsType.Base].TruthStats.TrueCount);
            MultiAssert.Equal(0U, results.OverallStats[StatsType.Event].QueryStats.TrueCount);
            MultiAssert.Equal(0U, results.OverallStats[StatsType.Event].TruthStats.TrueCount);
            MultiAssert.AssertAll();
        }
Beispiel #6
0
        public void CrossType_ComplexBed_Works()
        {
            var outputDirectory = Path.GetRandomFileName().ToDirectoryInfo();
            var inputSpecs      = InputSpec.CreateSpecsFromString(
                File.ReadAllText(Config.FullName), IncludeBedFile.CreateFromBedFile(Bed))
                                  ?.ToDictionary(i => i.VariantType, i => i)
                                  ?? new Dictionary <WittyerType, InputSpec>();
            var wittyerSettings = WittyerSettings.Create(outputDirectory, Truth, Query,
                                                         ImmutableList <ISamplePair> .Empty, EvaluationMode.CrossTypeAndSimpleCounting,
                                                         inputSpecs);

            var(_, query, truth) = MainLauncher.GenerateResults(wittyerSettings)
                                   .EnumerateSuccesses().First();
            var results = MainLauncher
                          .GenerateSampleMetrics(truth, query, false, inputSpecs);
            var baseStats = results.DetailedStats[WittyerType.Deletion].OverallStats[StatsType.Base];

            MultiAssert.Equal(206678U, baseStats.QueryStats.TrueCount);
            MultiAssert.Equal(206678U, baseStats.TruthStats.TrueCount);
            MultiAssert.AssertAll();
        }
Beispiel #7
0
        public void SvWorksWithSimpleCount()
        {
            if (MiscUtils.IsRunningAnyLinux)
            {
                return;                              // currently failing on linux :(
            }
            //var insertionSpec = InputSpecs[WittyerVariantType.Insertion];
            var insertionSpec = InputSpec.Create(WittyerType.Insertion,
                                                 WittyerConstants.DefaultBins.SetItem(0, (50, false)),
                                                 WittyerConstants.DefaultBpOverlap, 0.05, WittyerConstants.DefaultExcludeFilters,
                                                 WittyerConstants.DefaultIncludeFilters, null);
            var outputDirectory = Path.GetRandomFileName().ToDirectoryInfo();
            var wittyerSettings = WittyerSettings.Create(outputDirectory, SomaticTruth,
                                                         SomaticQuery, ImmutableList <ISamplePair> .Empty, EvaluationMode.SimpleCounting,
                                                         InputSpecs.SetItem(WittyerType.Insertion, insertionSpec));

            var results = MainLauncher.GenerateResults(wittyerSettings).Select(i => i.GetOrThrow()).ToList();

            var(_, query, truth) = results.First();

            MultiAssert.Equal(((IMutableWittyerResult)query).NumEntries,
                              (uint)WittyerVcfWriter.ProcessVariants(query, false).Count());
            MultiAssert.Equal(((IMutableWittyerResult)truth).NumEntries,
                              (uint)WittyerVcfWriter.ProcessVariants(truth, true).Count());

            var testStrings = WittyerVcfWriter.GenerateVcfStrings(query, null, null).Where(line => !line.StartsWith(VcfConstants.Header.Prefix));

            MultiAssert.True(testStrings.All(s => ParseVariantGetTag(s, WitDecision.FalsePositive)));
            testStrings = WittyerVcfWriter.GenerateVcfStrings(null, truth, null).Where(line => !line.StartsWith(VcfConstants.Header.Prefix));
            MultiAssert.True(testStrings.All(s => ParseVariantGetTag(s, WitDecision.FalseNegative)));

            var actualStats = GeneralStats.Create(results,
                                                  wittyerSettings.Mode == EvaluationMode.Default, wittyerSettings.InputSpecs, EmptyCmd).PerSampleStats
                              .First();
            var expectedStats = JsonConvert.DeserializeObject <GeneralStats>(File.ReadAllText(SvJsonSc.FullName))
                                .PerSampleStats.First();

            var expectedOverallEventStats = expectedStats.OverallStats.Single(x => x.StatsType.Equals(StatsType.Event));
            var actualOverallEventStats   = actualStats.OverallStats.Single(x => x.StatsType.Equals(StatsType.Event));

            MultiAssert.Equal(expectedOverallEventStats.QueryFpCount, actualOverallEventStats.QueryFpCount);
            MultiAssert.Equal(expectedOverallEventStats.QueryTpCount, actualOverallEventStats.QueryTpCount);
            MultiAssert.Equal(expectedOverallEventStats.QueryTotalCount, actualOverallEventStats.QueryTotalCount);
            MultiAssert.Equal(expectedOverallEventStats.TruthTpCount, actualOverallEventStats.TruthTpCount);
            MultiAssert.Equal(expectedOverallEventStats.TruthFnCount, actualOverallEventStats.TruthFnCount);
            MultiAssert.Equal(expectedOverallEventStats.TruthTotalCount, actualOverallEventStats.TruthTotalCount);

            var expectedInsertionStats = expectedStats.DetailedStats
                                         .Single(s => s.VariantType == WittyerType.Insertion.Name).PerBinStats;
            var actualInsertionStats = actualStats.DetailedStats
                                       .Single(s => s.VariantType == WittyerType.Insertion.Name).PerBinStats;

            foreach (var(expectedInsBinStat, actualInsBinStat) in expectedInsertionStats.Zip(actualInsertionStats,
                                                                                             (a, b) => (a, b)))
            {
                var expectedSingleStat = expectedInsBinStat.Stats.Single();
                var actualSingleStat   = actualInsBinStat.Stats.Single();
                if (!expectedSingleStat.Equals(actualSingleStat))
                {
                    MultiAssert.Equal(string.Empty, expectedInsBinStat.Bin);
                }
                MultiAssert.Equal(expectedSingleStat, actualSingleStat);
            }

            MultiAssert.AssertAll();
        }
Beispiel #8
0
        public void CnvWorksWithSimpleCounting()
        {
            if (MiscUtils.IsRunningAnyLinux)
            {
                return;                              // currently failing on linux :(
            }
            var outputDirectory = Path.GetRandomFileName().ToDirectoryInfo();
            var wittyerSettings = WittyerSettings.Create(outputDirectory, CnvTruth, CnvQuery,
                                                         ImmutableList <ISamplePair> .Empty, EvaluationMode.SimpleCounting, InputSpecs);

            var actualStats = MainLauncher
                              .GenerateJson(wittyerSettings, MainLauncher.GenerateResults(wittyerSettings).EnumerateSuccesses(),
                                            EmptyCmd).GetOrThrow().PerSampleStats.First();
            //var str = JsonConvert.SerializeObject(actualStats, Formatting.Indented);
            var jsonText      = File.ReadAllText(CnvJsonSc.FullName);
            var expectedStats = JsonConvert.DeserializeObject <GeneralStats>(jsonText)
                                .PerSampleStats.First();

            var expectedOverall           = expectedStats.OverallStats;
            var actualOverall             = actualStats.OverallStats;
            var expectedOverallEventStats = expectedOverall.Single(x => x.StatsType.Equals(StatsType.Event));
            var actualOverallEventStats   = actualOverall.Single(x => x.StatsType.Equals(StatsType.Event));

            MultiAssert.Equal(expectedOverallEventStats.QueryFpCount, actualOverallEventStats.QueryFpCount);
            MultiAssert.Equal(expectedOverallEventStats.QueryTpCount, actualOverallEventStats.QueryTpCount);
            MultiAssert.Equal(expectedOverallEventStats.QueryTotalCount, actualOverallEventStats.QueryTotalCount);
            MultiAssert.Equal(expectedOverallEventStats.TruthTpCount, actualOverallEventStats.TruthTpCount);
            MultiAssert.Equal(expectedOverallEventStats.TruthFnCount, actualOverallEventStats.TruthFnCount);
            MultiAssert.Equal(expectedOverallEventStats.TruthTotalCount, actualOverallEventStats.TruthTotalCount);

            var expectedCnvTypeOverallStats = expectedStats.DetailedStats
                                              .Single(x => x.VariantType == WittyerType.CopyNumberGain.Name).OverallStats.Concat(expectedStats
                                                                                                                                 .DetailedStats.Single(x => x.VariantType == WittyerType.CopyNumberLoss.Name).OverallStats)
                                              .ToReadOnlyList();
            var actualCnvTypeOverallStats = actualStats.DetailedStats
                                            .Single(x => x.VariantType == WittyerType.CopyNumberGain.Name).OverallStats.Concat(actualStats
                                                                                                                               .DetailedStats.Single(x => x.VariantType == WittyerType.CopyNumberLoss.Name).OverallStats)
                                            .ToReadOnlyList();
            var expectedOverallCnvEventStats = expectedCnvTypeOverallStats
                                               .Where(x => x.StatsType.Equals(StatsType.Event))
                                               .Aggregate(BasicJsonStats.Create(StatsType.Event, 0, 0, 0, 0), (acc, target) => acc + target);
            var actualOverallCnvEventStats = actualCnvTypeOverallStats.Where(x => x.StatsType.Equals(StatsType.Event))
                                             .Aggregate(BasicJsonStats.Create(StatsType.Event, 0, 0, 0, 0), (acc, target) => acc + target);

            MultiAssert.Equal(expectedOverallCnvEventStats.QueryFpCount, actualOverallCnvEventStats.QueryFpCount);
            MultiAssert.Equal(expectedOverallCnvEventStats.QueryTpCount, actualOverallCnvEventStats.QueryTpCount);
            MultiAssert.Equal(expectedOverallCnvEventStats.QueryTotalCount, actualOverallCnvEventStats.QueryTotalCount);
            MultiAssert.Equal(expectedOverallCnvEventStats.TruthTpCount, actualOverallCnvEventStats.TruthTpCount);
            MultiAssert.Equal(expectedOverallCnvEventStats.TruthFnCount, actualOverallCnvEventStats.TruthFnCount);
            MultiAssert.Equal(expectedOverallCnvEventStats.TruthTotalCount, actualOverallCnvEventStats.TruthTotalCount);

            var expectedOverallCnvBaseStats = expectedCnvTypeOverallStats.Where(x => x.StatsType.Equals(StatsType.Base))
                                              .Aggregate(BasicJsonStats.Create(StatsType.Base, 0, 0, 0, 0), (acc, target) => acc + target);
            var actualOverallCnvBaseStats = actualCnvTypeOverallStats.Where(x => x.StatsType.Equals(StatsType.Base))
                                            .Aggregate(BasicJsonStats.Create(StatsType.Base, 0, 0, 0, 0), (acc, target) => acc + target);

            MultiAssert.Equal(expectedOverallCnvBaseStats.QueryFpCount, actualOverallCnvBaseStats.QueryFpCount);
            MultiAssert.Equal(expectedOverallCnvBaseStats.QueryTpCount, actualOverallCnvBaseStats.QueryTpCount);
            MultiAssert.Equal(expectedOverallCnvBaseStats.QueryTotalCount, actualOverallCnvBaseStats.QueryTotalCount);
            MultiAssert.Equal(expectedOverallCnvBaseStats.TruthTpCount, actualOverallCnvBaseStats.TruthTpCount);
            MultiAssert.Equal(expectedOverallCnvBaseStats.TruthFnCount, actualOverallCnvBaseStats.TruthFnCount);
            MultiAssert.Equal(expectedOverallCnvBaseStats.TruthTotalCount, actualOverallCnvBaseStats.TruthTotalCount);

            #region test CNVs w/o Refs

            // ReSharper disable ConditionIsAlwaysTrueOrFalse
            var refs = false;
            var(expectedOrthogonalTruthBaseTotal, expectedOrthogonalTruthEventTotal) = GetTotalCnvs(CnvTruth, refs);
            var(expectedOrthogonalQueryBaseTotal, expectedOrthogonalQueryEventTotal) = GetTotalCnvs(CnvQuery, refs);

            var actualCnvBaseStatsBinned   = GetCnvStats(actualStats, refs);
            var expectedCnvBaseStatsBinned = GetCnvStats(expectedStats, refs);

            var(actualTruthBaseTotal, actualQueryBaseTotal, actualTruthEventTotal, actualQueryEventTotal) =
                GetActualTotalStatsFromBins(expectedCnvBaseStatsBinned, actualCnvBaseStatsBinned, refs);

            // expected truth is off by 6, five of which is because of overlap. Last off by 1 is unexplained.  Not sure why, but there could be a hidden bug somewhere.
            MultiAssert.Equal(expectedOrthogonalTruthBaseTotal, actualTruthBaseTotal - 6);
            MultiAssert.Equal(expectedOrthogonalQueryBaseTotal, actualQueryBaseTotal);

            MultiAssert.Equal(expectedOrthogonalTruthEventTotal, actualTruthEventTotal);
            MultiAssert.Equal(expectedOrthogonalQueryEventTotal, actualQueryEventTotal);

            MultiAssert.Equal(expectedOverallCnvBaseStats.TruthTotalCount, actualTruthBaseTotal - 5);
            MultiAssert.Equal(expectedOverallCnvBaseStats.QueryTotalCount, actualQueryBaseTotal);

            MultiAssert.Equal(expectedOrthogonalTruthBaseTotal, actualOverallCnvBaseStats.TruthTotalCount - 1);
            MultiAssert.Equal(expectedOrthogonalQueryBaseTotal, actualOverallCnvBaseStats.QueryTotalCount);

            #endregion

            #region test CNVs w/ Refs

            refs = true;
            (expectedOrthogonalTruthBaseTotal, expectedOrthogonalTruthEventTotal) = GetTotalCnvs(CnvTruth, refs);
            (expectedOrthogonalQueryBaseTotal, expectedOrthogonalQueryEventTotal) = GetTotalCnvs(CnvQuery, refs);

            actualCnvBaseStatsBinned   = GetCnvStats(actualStats, refs);
            expectedCnvBaseStatsBinned = GetCnvStats(expectedStats, refs);

            (actualTruthBaseTotal, actualQueryBaseTotal, actualTruthEventTotal, actualQueryEventTotal) =
                GetActualTotalStatsFromBins(expectedCnvBaseStatsBinned, actualCnvBaseStatsBinned, refs);
            // ReSharper restore ConditionIsAlwaysTrueOrFalse

            // expected truth is off by 6, five of which is because of overlap. Last off by 1 is unexplained.  Not sure why, but there could be a hidden bug somewhere.
            MultiAssert.Equal(expectedOrthogonalTruthBaseTotal, actualTruthBaseTotal - 6);
            MultiAssert.Equal(expectedOrthogonalQueryBaseTotal, actualQueryBaseTotal);

            MultiAssert.Equal(expectedOverall.Single(j => j.StatsType == StatsType.Base).TruthTotalCount,
                              actualTruthBaseTotal - 6);
            MultiAssert.Equal(expectedOverall.Single(j => j.StatsType == StatsType.Base).QueryTotalCount,
                              actualQueryBaseTotal);

            MultiAssert.Equal(expectedOrthogonalTruthEventTotal, actualTruthEventTotal);
            MultiAssert.Equal(expectedOrthogonalQueryEventTotal, actualQueryEventTotal);

            MultiAssert.Equal(expectedOrthogonalTruthBaseTotal,
                              actualOverall.Single(s => s.StatsType == StatsType.Base).TruthTotalCount);
            MultiAssert.Equal(expectedOrthogonalQueryBaseTotal,
                              actualOverall.Single(s => s.StatsType == StatsType.Base).QueryTotalCount);

            #endregion

            MultiAssert.AssertAll();

            Dictionary <string, Dictionary <StatsType, BasicJsonStats> > GetCnvStats(SampleStats sampleStats,
                                                                                     bool includeRef)
            => sampleStats.DetailedStats
            .Where(x => x.VariantType.Equals(WittyerType.CopyNumberGain.Name) ||
                   x.VariantType.Equals(WittyerType.CopyNumberLoss.Name) ||
                   includeRef && x.VariantType.Equals(WittyerType.CopyNumberReference.Name))
            .SelectMany(v => v.PerBinStats)
            .GroupBy(s => s.Bin).ToDictionary(binGroups => binGroups.Key,
                                              binGroups => binGroups.SelectMany(binStats => binStats.Stats).GroupBy(s => s.StatsType)
                                              .ToDictionary(statsGroup => statsGroup.Key,
                                                            statsGroup => statsGroup.Aggregate(BasicJsonStats.Create(statsGroup.Key, 0, 0, 0, 0),
                                                                                               (acc, stat) => acc + stat)));

            (ulong totalLength, uint numEvents) GetTotalCnvs(FileInfo vcf, bool includeRefs)
            {
                var trees = new ConcurrentDictionary <IContigInfo, MergedIntervalTree <uint> >();

                // DO NOT delete: the line below are left there case we want to test without overlapping variants for test tweaking etc. i.e. it's debug code.
                // IContigAndInterval lastInterval = null;
                var numEvents = 0U;

                foreach (var variant in VcfReader.TryCreate(vcf).GetOrThrow().Select(v => v.GetOrThrow()))
                {
                    if (!IsCountedCnv(variant))
                    {
                        continue;
                    }
                    numEvents++;
                    var tree = trees.GetOrAdd(variant.Contig, _ => MergedIntervalTree.Create <uint>());

                    IContigAndInterval interval;
                    var start = variant.Position;
                    if (variant.Info.TryGetValue(VcfConstants.EndTagKey, out var end))
                    {
                        if (uint.TryParse(end, out var endVal))
                        {
                            interval = GetInterval(variant.Contig, start, endVal);
                        }
                        else
                        {
                            throw new ParserException($"couldn't parse {end} into END!");
                        }
                    }
                    else if (variant.Info.TryGetValue(VcfConstants.SvLenKey, out var svLen))
                    {
                        if (int.TryParse(svLen, out var len))
                        {
                            interval = len < 0
                                ? GetInterval(variant.Contig, start - (uint)-len, start)
                                : GetInterval(variant.Contig, start, start + (uint)len);
                        }
                        else
                        {
                            throw new ParserException($"couldn't parse {svLen} into svLen!");
                        }
                    }
                    else
                    {
                        throw new NotImplementedException(
                                  "Parsing using anything but svlen is not supported, should probably add it.");
                    }

                    tree.Add(interval);

                    // DO NOT delete: the line below are left there case we want to test without overlapping variants for test tweaking etc. i.e. it's debug code.
                    //lastInterval = interval;
                }

                return(trees.GetTotalLength(), numEvents);

                IContigAndInterval GetInterval(IContigInfo contig, uint position, uint end)
                {
                    var interval = ContigAndInterval.Create(contig, position, end);

                    return(interval);

                    // DO NOT delete: the remaining lines below are left there case we want to test without overlapping variants for test tweaking etc. i.e. it's debug code.
                    //if (lastInterval == null || !interval.Contig.Equals(lastInterval.Contig)) return interval;

                    //// adjust for possible overlaps between bins. (see https://jira.illumina.com/browse/WIT-84)
                    //var overlap = interval.TryGetOverlap(lastInterval).Select(o => o.GetLength()).GetOrDefault();
                    //if (overlap > 0)
                    //    interval = ContigAndInterval.Create(interval.Contig, interval.Start + overlap,
                    //        interval.Stop + overlap);

                    //return interval;
                }

                bool IsCountedCnv(IVcfVariant variant)
                {
                    if (variant.Filters.SingleOrDefault() != VcfConstants.PassFilter)
                    {
                        // if not single or not PASS return false
                        return(false);
                    }
                    var isRef = variant.Alts.SingleOrDefault() == VcfConstants.MissingValueString;

                    if (isRef)
                    {
                        return(includeRefs);
                    }

                    if (variant.Samples.Count == 0)
                    {
                        return(false);
                    }

                    var hasCn = variant.Samples[0].SampleDictionary
                                .TryGetValue(VcfConstants.CnSampleFieldKey, out var cn);

                    var hasGt = variant.Samples[0].SampleDictionary.TryGetValue(VcfConstants.GenotypeKey, out var gt);

                    if (!hasCn)
                    {
                        return(includeRefs && // no cn means only true if we include Refs
                               hasGt && gt.Split(VcfConstants.GtPhasedValueDelimiter[0],
                                                 VcfConstants.GtUnphasedValueDelimiter[0])
                               .All(x => x == "0"));
                    }

                    if (!variant.Info.TryGetValue(VcfConstants.SvTypeKey, out var svType) ||
                        !WittyerConstants.BaseLevelStatsTypeStrings.Contains(svType))
                    {
                        return(false);
                    }
                    if (!int.TryParse(cn, out var ploidy))
                    {
                        return(false);
                    }

                    isRef = (hasGt
                                ? gt.Split(VcfConstants.GtPhasedValueDelimiter[0],
                                           VcfConstants.GtUnphasedValueDelimiter[0]).Length
                                : 2) == ploidy;
                    return(!isRef || includeRefs);
                }
            }

            (ulong, ulong, ulong, ulong) GetActualTotalStatsFromBins(
                Dictionary <string, Dictionary <StatsType, BasicJsonStats> > expectedBinned,
                Dictionary <string, Dictionary <StatsType, BasicJsonStats> > actualBinned, bool includeRefs)
            {
                var actualTruthBase  = 0UL;
                var actualQueryBase  = 0UL;
                var actualTruthEvent = 0UL;
                var actualQueryEvent = 0UL;

                foreach (var(bin, binStats) in expectedBinned)
                {
                    foreach (var(type, expectedCnvStats) in binStats)
                    {
                        var actualCnvStats = actualBinned[bin][type];
                        if (type == StatsType.Base)
                        {
                            actualTruthBase += actualCnvStats.TruthTotalCount;
                            actualQueryBase += actualCnvStats.QueryTotalCount;
                        }
                        else
                        {
                            actualTruthEvent += actualCnvStats.TruthTotalCount;
                            actualQueryEvent += actualCnvStats.QueryTotalCount;
                        }

                        if (!expectedCnvStats.Equals(actualCnvStats))
                        {
                            MultiAssert.Equal(expectedCnvStats, actualCnvStats);
                            MultiAssert.Equal("Expected ", includeRefs.ToString());
                        }
                    }
                }

                return(actualTruthBase, actualQueryBase, actualTruthEvent, actualQueryEvent);
            }
        }