public void SvWorksWithDefault() { if (MiscUtils.IsRunningAnyLinux) { return; // currently failing on linux :( } var outputDirectory = Path.GetRandomFileName().ToDirectoryInfo(); var wittyerSettings = WittyerSettings.Create(outputDirectory, GermlineTruth, GermlineQuery, ImmutableList <ISamplePair> .Empty, EvaluationMode.Default, InputSpecs); var json = MainLauncher.GenerateJson(wittyerSettings, MainLauncher.GenerateResults(wittyerSettings).EnumerateSuccesses(), EmptyCmd); //var str = JsonConvert.SerializeObject(json, Formatting.Indented); var stats = json.GetOrThrow().PerSampleStats.First(); var expectedStats = JsonConvert.DeserializeObject <GeneralStats>(File.ReadAllText(SvJsonGt.FullName)) .PerSampleStats.First(); var expectedOverallEventStats = expectedStats.OverallStats.Single(x => x.StatsType.Equals(StatsType.Event)); var actualOverallEventStats = stats.OverallStats.Single(x => x.StatsType.Equals(StatsType.Event)); MultiAssert.Equal(expectedOverallEventStats, actualOverallEventStats); foreach (var type in WittyerType.AllTypes) { var typeString = type.Name; var expectedTypedStats = expectedStats.DetailedStats.Single(x => x.VariantType.Equals(typeString)); var actualTypedStats = stats.DetailedStats.Single(x => x.VariantType.Equals(typeString)); var expectedTypedEventStats = expectedTypedStats.OverallStats.Single(x => x.StatsType == StatsType.Event); var actualTypedEventStats = actualTypedStats.OverallStats.Single(x => x.StatsType == StatsType.Event); MultiAssert.Equal(expectedTypedEventStats, actualTypedEventStats); if (!type.HasBaseLevelStats) { continue; } var expectedTypedBaseStats = expectedTypedStats.OverallStats.Single(x => x.StatsType == StatsType.Base); var actualTypedBaseStats = actualTypedStats.OverallStats.Single(x => x.StatsType == StatsType.Base); MultiAssert.Equal(expectedTypedBaseStats, actualTypedBaseStats); } MultiAssert.AssertAll(); }
public void CnvWorksWithCrossType() { if (MiscUtils.IsRunningAnyLinux) { return; // currently failing on linux :( } var outputDirectory = Path.GetRandomFileName().ToDirectoryInfo(); var wittyerSettings = WittyerSettings.Create(outputDirectory, CnvTruth, CnvQuery, ImmutableList <ISamplePair> .Empty, EvaluationMode.CrossTypeAndSimpleCounting, InputSpecs); var results = MainLauncher.GenerateResults(wittyerSettings).EnumerateSuccesses().ToList(); var(_, query, truth) = results.First(); var testStrings = WittyerVcfWriter.GenerateVcfStrings(query, null, null).Where(line => !line.StartsWith(VcfConstants.Header.Prefix)); MultiAssert.True(testStrings.All(s => ParseVariantGetTag(s, WitDecision.FalsePositive))); testStrings = WittyerVcfWriter.GenerateVcfStrings(null, truth, null).Where(line => !line.StartsWith(VcfConstants.Header.Prefix)); MultiAssert.True(testStrings.All(s => ParseVariantGetTag(s, WitDecision.FalseNegative))); var stats = MainLauncher .GenerateJson(wittyerSettings, results, EmptyCmd).GetOrThrow().PerSampleStats.First(); // make sure to check for null MultiAssert.True(stats.QuerySampleName != null); MultiAssert.True(stats.TruthSampleName != null); var expectedStats = JsonConvert.DeserializeObject <GeneralStats>(File.ReadAllText(CnvJsonCts.FullName)) .PerSampleStats.First(); var expectedOverallEventStats = expectedStats.OverallStats.Single(x => x.StatsType.Equals(StatsType.Event)); var actualOverallEventStats = stats.OverallStats.Single(x => x.StatsType.Equals(StatsType.Event)); MultiAssert.Equal(expectedOverallEventStats.QueryFpCount, actualOverallEventStats.QueryFpCount); MultiAssert.Equal(expectedOverallEventStats.QueryTpCount, actualOverallEventStats.QueryTpCount); MultiAssert.Equal(expectedOverallEventStats.QueryTotalCount, actualOverallEventStats.QueryTotalCount); MultiAssert.Equal(expectedOverallEventStats.TruthTpCount, actualOverallEventStats.TruthTpCount); MultiAssert.Equal(expectedOverallEventStats.TruthFnCount, actualOverallEventStats.TruthFnCount); MultiAssert.Equal(expectedOverallEventStats.TruthTotalCount, actualOverallEventStats.TruthTotalCount); MultiAssert.AssertAll(); }
private static void LaunchWittyerMain(string[] args) { var settings = WittyerSettings.Parser.Parse(args); var outputDir = settings.OutputDirectory; if (!outputDir.ExistsNow()) { outputDir.Create(); } else if (outputDir.EnumerateFileSystemInfosSafe().Any()) { Console.Error.WriteLine( $"Specified a output directory that's not empty: {settings.OutputDirectory.FullName}\n, witty.er requires an empty and clean output folder!"); Environment.Exit(1); } var cmd = Environment.CommandLine; var results = MainLauncher.GenerateResults(settings).Select(r => { var tuple = r.GetOrThrow(); var file = Path.Combine(settings.OutputDirectory.FullName, GenerateOutputFile(tuple.query, tuple.truth)) .ToFileInfo(); MainLauncher.GenerateMergedVcfStrings(tuple.query, tuple.truth, cmd).WriteToCompressedFile( file); var _ = file.TryEnsureTabixed().GetOrDefault(); return(tuple); }); var result = MainLauncher.GenerateJson(settings, results, cmd).GetOrThrow(); using (var sw = new StreamWriter(Path.Combine(settings.OutputDirectory.FullName, "Wittyer.Stats.json"))) sw.Write(JsonConvert.SerializeObject(result, Formatting.Indented)); using (var sw = new StreamWriter(Path.Combine(settings.OutputDirectory.FullName, "Wittyer.ConfigFileUsed.json"))) sw.Write(settings.InputSpecs.Values.SerializeToString()); Console.WriteLine("--------------------------------"); Console.WriteLine("Overall Stats:"); Console.WriteLine($"Overall EventPrecision: {result.EventPrecision:P3}"); Console.WriteLine($"Overall EventRecall: {result.EventRecall:P3}"); Console.WriteLine($"Overall EventFscore: {result.EventFscore:P3}"); Console.WriteLine("--------------------------------"); Console.WriteLine( "QuerySample\tTruthSample\tQueryTotal\tQueryTp\tQueryFp\tPrecision\tTruthTotal\tTruthTp\tTruthFn\tRecall\tFscore\t" + "BaseQueryTotal\tBaseQueryTp\tBaseQueryFp\tBasePrecision\tBaseTruthTotal\tBaseTruthTp\tBaseTruthFn\tBaseRecall\tBaseFscore\n"); foreach (var stats in result.PerSampleStats) { var overallEventStats = stats.OverallStats.Single(x => x.StatsType.Equals(StatsType.Event)); var overallBaseStats = stats.OverallStats.Single(x => x.StatsType.Equals(StatsType.Base)); Console.WriteLine( $"{stats.QuerySampleName}\t{stats.TruthSampleName}\t{overallEventStats.QueryTotalCount}\t{overallEventStats.QueryTpCount}\t" + $"{overallEventStats.QueryFpCount}\t{overallEventStats.Precision:P3}\t{overallEventStats.TruthTotalCount}\t" + $"{overallEventStats.TruthTpCount}\t{overallEventStats.TruthFnCount}\t{overallEventStats.Recall:P3}\t" + $"{overallEventStats.Fscore:P3}\t" + $"{overallBaseStats.QueryTotalCount}\t{overallBaseStats.QueryTpCount}\t" + $"{overallBaseStats.QueryFpCount}\t{overallBaseStats.Precision:P3}\t{overallBaseStats.TruthTotalCount}\t" + $"{overallBaseStats.TruthTpCount}\t{overallBaseStats.TruthFnCount}\t{overallBaseStats.Recall:P3}\t" + $"{overallBaseStats.Fscore:P3}"); Console.WriteLine(); Console.WriteLine(); foreach (var type in stats.DetailedStats) { Console.WriteLine("--------------------------------"); Console.WriteLine(type.VariantType); Console.WriteLine("--------------------------------"); Console.WriteLine( "QuerySample\tTruthSample\tQueryTotal\tQueryTp\tQueryFp\tPrecision\tTruthTotal\tTruthTp\tTruthFn\tRecall\tFscore\t"); overallEventStats = type.OverallStats.Single(x => x.StatsType.Equals(StatsType.Event)); Console.WriteLine( $"{stats.QuerySampleName}\t{stats.TruthSampleName}\t{overallEventStats.QueryTotalCount}\t{overallEventStats.QueryTpCount}\t" + $"{overallEventStats.QueryFpCount}\t{overallEventStats.Precision:P3}\t{overallEventStats.TruthTotalCount}\t" + $"{overallEventStats.TruthTpCount}\t{overallEventStats.TruthFnCount}\t{overallEventStats.Recall:P3}\t" + $"{overallEventStats.Fscore:P3}\t"); Console.WriteLine(); Console.WriteLine(); } } string GenerateOutputFile(IWittyerResult queryResult, IWittyerResult truthResult) => "Wittyer." + (truthResult == null ? queryResult.SampleName : queryResult == null ? truthResult.SampleName : $"{truthResult.SampleName}.Vs.{queryResult.SampleName}") + WittyerConstants.VcfGzSuffix; }
public void CnvWorksWithSimpleCounting() { if (MiscUtils.IsRunningAnyLinux) { return; // currently failing on linux :( } var outputDirectory = Path.GetRandomFileName().ToDirectoryInfo(); var wittyerSettings = WittyerSettings.Create(outputDirectory, CnvTruth, CnvQuery, ImmutableList <ISamplePair> .Empty, EvaluationMode.SimpleCounting, InputSpecs); var actualStats = MainLauncher .GenerateJson(wittyerSettings, MainLauncher.GenerateResults(wittyerSettings).EnumerateSuccesses(), EmptyCmd).GetOrThrow().PerSampleStats.First(); //var str = JsonConvert.SerializeObject(actualStats, Formatting.Indented); var jsonText = File.ReadAllText(CnvJsonSc.FullName); var expectedStats = JsonConvert.DeserializeObject <GeneralStats>(jsonText) .PerSampleStats.First(); var expectedOverall = expectedStats.OverallStats; var actualOverall = actualStats.OverallStats; var expectedOverallEventStats = expectedOverall.Single(x => x.StatsType.Equals(StatsType.Event)); var actualOverallEventStats = actualOverall.Single(x => x.StatsType.Equals(StatsType.Event)); MultiAssert.Equal(expectedOverallEventStats.QueryFpCount, actualOverallEventStats.QueryFpCount); MultiAssert.Equal(expectedOverallEventStats.QueryTpCount, actualOverallEventStats.QueryTpCount); MultiAssert.Equal(expectedOverallEventStats.QueryTotalCount, actualOverallEventStats.QueryTotalCount); MultiAssert.Equal(expectedOverallEventStats.TruthTpCount, actualOverallEventStats.TruthTpCount); MultiAssert.Equal(expectedOverallEventStats.TruthFnCount, actualOverallEventStats.TruthFnCount); MultiAssert.Equal(expectedOverallEventStats.TruthTotalCount, actualOverallEventStats.TruthTotalCount); var expectedCnvTypeOverallStats = expectedStats.DetailedStats .Single(x => x.VariantType == WittyerType.CopyNumberGain.Name).OverallStats.Concat(expectedStats .DetailedStats.Single(x => x.VariantType == WittyerType.CopyNumberLoss.Name).OverallStats) .ToReadOnlyList(); var actualCnvTypeOverallStats = actualStats.DetailedStats .Single(x => x.VariantType == WittyerType.CopyNumberGain.Name).OverallStats.Concat(actualStats .DetailedStats.Single(x => x.VariantType == WittyerType.CopyNumberLoss.Name).OverallStats) .ToReadOnlyList(); var expectedOverallCnvEventStats = expectedCnvTypeOverallStats .Where(x => x.StatsType.Equals(StatsType.Event)) .Aggregate(BasicJsonStats.Create(StatsType.Event, 0, 0, 0, 0), (acc, target) => acc + target); var actualOverallCnvEventStats = actualCnvTypeOverallStats.Where(x => x.StatsType.Equals(StatsType.Event)) .Aggregate(BasicJsonStats.Create(StatsType.Event, 0, 0, 0, 0), (acc, target) => acc + target); MultiAssert.Equal(expectedOverallCnvEventStats.QueryFpCount, actualOverallCnvEventStats.QueryFpCount); MultiAssert.Equal(expectedOverallCnvEventStats.QueryTpCount, actualOverallCnvEventStats.QueryTpCount); MultiAssert.Equal(expectedOverallCnvEventStats.QueryTotalCount, actualOverallCnvEventStats.QueryTotalCount); MultiAssert.Equal(expectedOverallCnvEventStats.TruthTpCount, actualOverallCnvEventStats.TruthTpCount); MultiAssert.Equal(expectedOverallCnvEventStats.TruthFnCount, actualOverallCnvEventStats.TruthFnCount); MultiAssert.Equal(expectedOverallCnvEventStats.TruthTotalCount, actualOverallCnvEventStats.TruthTotalCount); var expectedOverallCnvBaseStats = expectedCnvTypeOverallStats.Where(x => x.StatsType.Equals(StatsType.Base)) .Aggregate(BasicJsonStats.Create(StatsType.Base, 0, 0, 0, 0), (acc, target) => acc + target); var actualOverallCnvBaseStats = actualCnvTypeOverallStats.Where(x => x.StatsType.Equals(StatsType.Base)) .Aggregate(BasicJsonStats.Create(StatsType.Base, 0, 0, 0, 0), (acc, target) => acc + target); MultiAssert.Equal(expectedOverallCnvBaseStats.QueryFpCount, actualOverallCnvBaseStats.QueryFpCount); MultiAssert.Equal(expectedOverallCnvBaseStats.QueryTpCount, actualOverallCnvBaseStats.QueryTpCount); MultiAssert.Equal(expectedOverallCnvBaseStats.QueryTotalCount, actualOverallCnvBaseStats.QueryTotalCount); MultiAssert.Equal(expectedOverallCnvBaseStats.TruthTpCount, actualOverallCnvBaseStats.TruthTpCount); MultiAssert.Equal(expectedOverallCnvBaseStats.TruthFnCount, actualOverallCnvBaseStats.TruthFnCount); MultiAssert.Equal(expectedOverallCnvBaseStats.TruthTotalCount, actualOverallCnvBaseStats.TruthTotalCount); #region test CNVs w/o Refs // ReSharper disable ConditionIsAlwaysTrueOrFalse var refs = false; var(expectedOrthogonalTruthBaseTotal, expectedOrthogonalTruthEventTotal) = GetTotalCnvs(CnvTruth, refs); var(expectedOrthogonalQueryBaseTotal, expectedOrthogonalQueryEventTotal) = GetTotalCnvs(CnvQuery, refs); var actualCnvBaseStatsBinned = GetCnvStats(actualStats, refs); var expectedCnvBaseStatsBinned = GetCnvStats(expectedStats, refs); var(actualTruthBaseTotal, actualQueryBaseTotal, actualTruthEventTotal, actualQueryEventTotal) = GetActualTotalStatsFromBins(expectedCnvBaseStatsBinned, actualCnvBaseStatsBinned, refs); // expected truth is off by 6, five of which is because of overlap. Last off by 1 is unexplained. Not sure why, but there could be a hidden bug somewhere. MultiAssert.Equal(expectedOrthogonalTruthBaseTotal, actualTruthBaseTotal - 6); MultiAssert.Equal(expectedOrthogonalQueryBaseTotal, actualQueryBaseTotal); MultiAssert.Equal(expectedOrthogonalTruthEventTotal, actualTruthEventTotal); MultiAssert.Equal(expectedOrthogonalQueryEventTotal, actualQueryEventTotal); MultiAssert.Equal(expectedOverallCnvBaseStats.TruthTotalCount, actualTruthBaseTotal - 5); MultiAssert.Equal(expectedOverallCnvBaseStats.QueryTotalCount, actualQueryBaseTotal); MultiAssert.Equal(expectedOrthogonalTruthBaseTotal, actualOverallCnvBaseStats.TruthTotalCount - 1); MultiAssert.Equal(expectedOrthogonalQueryBaseTotal, actualOverallCnvBaseStats.QueryTotalCount); #endregion #region test CNVs w/ Refs refs = true; (expectedOrthogonalTruthBaseTotal, expectedOrthogonalTruthEventTotal) = GetTotalCnvs(CnvTruth, refs); (expectedOrthogonalQueryBaseTotal, expectedOrthogonalQueryEventTotal) = GetTotalCnvs(CnvQuery, refs); actualCnvBaseStatsBinned = GetCnvStats(actualStats, refs); expectedCnvBaseStatsBinned = GetCnvStats(expectedStats, refs); (actualTruthBaseTotal, actualQueryBaseTotal, actualTruthEventTotal, actualQueryEventTotal) = GetActualTotalStatsFromBins(expectedCnvBaseStatsBinned, actualCnvBaseStatsBinned, refs); // ReSharper restore ConditionIsAlwaysTrueOrFalse // expected truth is off by 6, five of which is because of overlap. Last off by 1 is unexplained. Not sure why, but there could be a hidden bug somewhere. MultiAssert.Equal(expectedOrthogonalTruthBaseTotal, actualTruthBaseTotal - 6); MultiAssert.Equal(expectedOrthogonalQueryBaseTotal, actualQueryBaseTotal); MultiAssert.Equal(expectedOverall.Single(j => j.StatsType == StatsType.Base).TruthTotalCount, actualTruthBaseTotal - 6); MultiAssert.Equal(expectedOverall.Single(j => j.StatsType == StatsType.Base).QueryTotalCount, actualQueryBaseTotal); MultiAssert.Equal(expectedOrthogonalTruthEventTotal, actualTruthEventTotal); MultiAssert.Equal(expectedOrthogonalQueryEventTotal, actualQueryEventTotal); MultiAssert.Equal(expectedOrthogonalTruthBaseTotal, actualOverall.Single(s => s.StatsType == StatsType.Base).TruthTotalCount); MultiAssert.Equal(expectedOrthogonalQueryBaseTotal, actualOverall.Single(s => s.StatsType == StatsType.Base).QueryTotalCount); #endregion MultiAssert.AssertAll(); Dictionary <string, Dictionary <StatsType, BasicJsonStats> > GetCnvStats(SampleStats sampleStats, bool includeRef) => sampleStats.DetailedStats .Where(x => x.VariantType.Equals(WittyerType.CopyNumberGain.Name) || x.VariantType.Equals(WittyerType.CopyNumberLoss.Name) || includeRef && x.VariantType.Equals(WittyerType.CopyNumberReference.Name)) .SelectMany(v => v.PerBinStats) .GroupBy(s => s.Bin).ToDictionary(binGroups => binGroups.Key, binGroups => binGroups.SelectMany(binStats => binStats.Stats).GroupBy(s => s.StatsType) .ToDictionary(statsGroup => statsGroup.Key, statsGroup => statsGroup.Aggregate(BasicJsonStats.Create(statsGroup.Key, 0, 0, 0, 0), (acc, stat) => acc + stat))); (ulong totalLength, uint numEvents) GetTotalCnvs(FileInfo vcf, bool includeRefs) { var trees = new ConcurrentDictionary <IContigInfo, MergedIntervalTree <uint> >(); // DO NOT delete: the line below are left there case we want to test without overlapping variants for test tweaking etc. i.e. it's debug code. // IContigAndInterval lastInterval = null; var numEvents = 0U; foreach (var variant in VcfReader.TryCreate(vcf).GetOrThrow().Select(v => v.GetOrThrow())) { if (!IsCountedCnv(variant)) { continue; } numEvents++; var tree = trees.GetOrAdd(variant.Contig, _ => MergedIntervalTree.Create <uint>()); IContigAndInterval interval; var start = variant.Position; if (variant.Info.TryGetValue(VcfConstants.EndTagKey, out var end)) { if (uint.TryParse(end, out var endVal)) { interval = GetInterval(variant.Contig, start, endVal); } else { throw new ParserException($"couldn't parse {end} into END!"); } } else if (variant.Info.TryGetValue(VcfConstants.SvLenKey, out var svLen)) { if (int.TryParse(svLen, out var len)) { interval = len < 0 ? GetInterval(variant.Contig, start - (uint)-len, start) : GetInterval(variant.Contig, start, start + (uint)len); } else { throw new ParserException($"couldn't parse {svLen} into svLen!"); } } else { throw new NotImplementedException( "Parsing using anything but svlen is not supported, should probably add it."); } tree.Add(interval); // DO NOT delete: the line below are left there case we want to test without overlapping variants for test tweaking etc. i.e. it's debug code. //lastInterval = interval; } return(trees.GetTotalLength(), numEvents); IContigAndInterval GetInterval(IContigInfo contig, uint position, uint end) { var interval = ContigAndInterval.Create(contig, position, end); return(interval); // DO NOT delete: the remaining lines below are left there case we want to test without overlapping variants for test tweaking etc. i.e. it's debug code. //if (lastInterval == null || !interval.Contig.Equals(lastInterval.Contig)) return interval; //// adjust for possible overlaps between bins. (see https://jira.illumina.com/browse/WIT-84) //var overlap = interval.TryGetOverlap(lastInterval).Select(o => o.GetLength()).GetOrDefault(); //if (overlap > 0) // interval = ContigAndInterval.Create(interval.Contig, interval.Start + overlap, // interval.Stop + overlap); //return interval; } bool IsCountedCnv(IVcfVariant variant) { if (variant.Filters.SingleOrDefault() != VcfConstants.PassFilter) { // if not single or not PASS return false return(false); } var isRef = variant.Alts.SingleOrDefault() == VcfConstants.MissingValueString; if (isRef) { return(includeRefs); } if (variant.Samples.Count == 0) { return(false); } var hasCn = variant.Samples[0].SampleDictionary .TryGetValue(VcfConstants.CnSampleFieldKey, out var cn); var hasGt = variant.Samples[0].SampleDictionary.TryGetValue(VcfConstants.GenotypeKey, out var gt); if (!hasCn) { return(includeRefs && // no cn means only true if we include Refs hasGt && gt.Split(VcfConstants.GtPhasedValueDelimiter[0], VcfConstants.GtUnphasedValueDelimiter[0]) .All(x => x == "0")); } if (!variant.Info.TryGetValue(VcfConstants.SvTypeKey, out var svType) || !WittyerConstants.BaseLevelStatsTypeStrings.Contains(svType)) { return(false); } if (!int.TryParse(cn, out var ploidy)) { return(false); } isRef = (hasGt ? gt.Split(VcfConstants.GtPhasedValueDelimiter[0], VcfConstants.GtUnphasedValueDelimiter[0]).Length : 2) == ploidy; return(!isRef || includeRefs); } } (ulong, ulong, ulong, ulong) GetActualTotalStatsFromBins( Dictionary <string, Dictionary <StatsType, BasicJsonStats> > expectedBinned, Dictionary <string, Dictionary <StatsType, BasicJsonStats> > actualBinned, bool includeRefs) { var actualTruthBase = 0UL; var actualQueryBase = 0UL; var actualTruthEvent = 0UL; var actualQueryEvent = 0UL; foreach (var(bin, binStats) in expectedBinned) { foreach (var(type, expectedCnvStats) in binStats) { var actualCnvStats = actualBinned[bin][type]; if (type == StatsType.Base) { actualTruthBase += actualCnvStats.TruthTotalCount; actualQueryBase += actualCnvStats.QueryTotalCount; } else { actualTruthEvent += actualCnvStats.TruthTotalCount; actualQueryEvent += actualCnvStats.QueryTotalCount; } if (!expectedCnvStats.Equals(actualCnvStats)) { MultiAssert.Equal(expectedCnvStats, actualCnvStats); MultiAssert.Equal("Expected ", includeRefs.ToString()); } } } return(actualTruthBase, actualQueryBase, actualTruthEvent, actualQueryEvent); } }