public void GenerateVcfStrings_IncludeHeaders() { if (MiscUtils.IsRunningAnyLinux) { return; // currently failing on linux :( } var parser = VcfVariantParserSettings.Create(ImmutableList.Create(SampleName)); var variants = VcfVariant.TryParse(Bnd1, parser).FollowedBy(VcfVariant.TryParse(Bnd2, parser)).EnumerateSuccesses().ToList(); var wittyerVariant = WittyerBndInternal.Create(variants[0], variants[0].ToTryOfGenotypedVcfVariant(VariantNormalizer.TrimCommonBases).GetOrThrow().Samples.Values.First(), WittyerType.IntraChromosomeBreakend, new List <uint>(), uint.MinValue, null, variants[1]); var headerLines = WittyerVcfWriter.GenerateVcfStrings( WittyerResult.Create(VcfHeader.CreateBuilder(VcfVersion.FourPointOne).Build(), SampleName, variants.Select(v => v.Contig).Distinct().ToList(), false, new Dictionary <WittyerType, IReadOnlyList <IWittyerVariant> >(), new Dictionary <WittyerType, IReadOnlyList <IWittyerBnd> > { { WittyerType.IntraChromosomeBreakend, new List <IWittyerBnd> { wittyerVariant } } }, new List <IVcfVariant>()), null, null) .TakeWhile(line => line.StartsWith(VcfConstants.Header.Prefix)).ToList(); // 11 = VcfVersion, WHO, WHAT, WHERE, WHY, WIT, WIN, WOW, date, version, column names Assert.Equal(11, headerLines.Count); }
public void OverlapWorks_DupDel([NotNull] string truthVar, [NotNull] string queryVar, string type, bool isTp) { const string sampleName = "blah"; var vcfVariantParserSettings = VcfVariantParserSettings.Create(new List <string> { sampleName }); var baseVariant = VcfVariant.TryParse(truthVar, vcfVariantParserSettings).GetOrThrow(); const bool isCrossTypeOn = true; var wittyerType = WittyerType.Parse(type); var inputSpecs = InputSpec.GenerateCustomInputSpecs(!isCrossTypeOn, new[] { wittyerType }, percentDistance: PercentDistance).ToDictionary(s => s.VariantType, s => s); var bndSet = new Dictionary <IGeneralBnd, IVcfVariant>(); var errorList = new List <string>(); var truthV = (IMutableWittyerSimpleVariant)WittyerVcfReader.CreateVariant(baseVariant, baseVariant.Samples.First().Value, true, sampleName, inputSpecs, bndSet, errorList, isCrossTypeOn); baseVariant = VcfVariant.TryParse(queryVar, vcfVariantParserSettings).GetOrThrow(); var queryV = (IMutableWittyerVariant)WittyerVcfReader.CreateVariant(baseVariant, baseVariant.Samples.First().Value, false, sampleName, inputSpecs, bndSet, errorList, isCrossTypeOn); var tree = TruthForest.Create(sampleName, VcfHeader.CreateBuilder(VcfVersion.FourPointOne).Build()); tree.AddTarget(truthV); OverlappingUtils.DoOverlapping(tree.VariantTrees, queryV, OverlappingUtils.IsVariantAlleleMatch, isCrossTypeOn, true); queryV.Finalize(WitDecision.FalsePositive, EvaluationMode.CrossTypeAndSimpleCounting, null); truthV.Finalize(WitDecision.FalseNegative, EvaluationMode.CrossTypeAndSimpleCounting, null); Assert.Equal(isTp ? WitDecision.TruePositive : WitDecision.FalsePositive, queryV.Sample.Wit); Assert.Equal(isTp ? WitDecision.TruePositive : WitDecision.FalseNegative, truthV.Sample.Wit); }
internal static IEnumerable <IBasicMetaLine> MergeMetaLines([NotNull] this IVcfHeader truthHeader, [CanBeNull] string cmdLine, [NotNull] IVcfHeader queryHeader) { var truthMetaLines = truthHeader.ColumnMetaInfoLines; var queryMetaLines = queryHeader.ColumnMetaInfoLines; var altLines = MergeMetaInfoLines(truthMetaLines.AltLines, Alt, queryMetaLines.AltLines); var filterLines = MergeMetaInfoLines(truthMetaLines.FilterLines, Filter, queryMetaLines.FilterLines); var contigLines = MergeContigLines(queryMetaLines.ContigLines, truthMetaLines.ContigLines); var infoLines = MergeTypedMetaLines(truthMetaLines.InfoLines, Info, queryMetaLines.InfoLines); var sampleFormatLines = MergeTypedMetaLines(truthMetaLines.SampleFormatLines, SampleFormat, queryMetaLines.SampleFormatLines); var sampleMetaLines = MergeTypedMetaLines(truthMetaLines.SampleMetaLines, Meta, queryMetaLines.SampleMetaLines); var builder = VcfHeader.CreateBuilder(truthHeader.Version) .AddSampleColumn(DefaultTruthSampleName).AddSampleColumn(DefaultQuerySampleName); truthHeader.ReferenceGenome.DoOnSuccess(r => builder.SetReference(r)); foreach (var line in altLines.Concat(filterLines).Concat(contigLines).Concat(sampleMetaLines) .Concat(GenerateWittyerLines(infoLines, sampleFormatLines, cmdLine))) { yield return(line); } }
public void MergeHeaderWorks() { string GetPrefix(string line) => line.Split(new[] { VcfConstants.Header.MetaInfoLines.KeyValueDelimiter[0] }, 4) .Where(it => it.Length > 2).Skip(2).FirstOrDefault(); var queryHeader = VcfHeader .TryCreate(Path.Combine("Resources", "VcfHeaders", "query.wit-141.vcf").ToFileInfo()).GetOrThrow(); var truthHeader = VcfHeader .TryCreate(Path.Combine("Resources", "VcfHeaders", "truth.wit-141.vcf").ToFileInfo()).GetOrThrow(); var vcfLines = truthHeader.MergedWith(queryHeader, SamplePair.Default, null).ToList(); var merged = VcfHeader.TryCreate(vcfLines) .GetOrThrow(); MultiAssert.True( merged.ColumnMetaInfoLines.SampleFormatLines.ContainsKey(WittyerConstants.WittyerMetaInfoLineKeys .What)); MultiAssert.True( merged.ColumnMetaInfoLines.SampleFormatLines.ContainsKey(WittyerConstants.WittyerMetaInfoLineKeys .Why)); MultiAssert.True( merged.ColumnMetaInfoLines.SampleFormatLines.ContainsKey(WittyerConstants.WittyerMetaInfoLineKeys .Wit)); MultiAssert.True( merged.ColumnMetaInfoLines.InfoLines.ContainsKey(WittyerConstants.WittyerMetaInfoLineKeys .Who)); MultiAssert.True( merged.ColumnMetaInfoLines.InfoLines.ContainsKey(WittyerConstants.WittyerMetaInfoLineKeys .Where)); MultiAssert.True( merged.ColumnMetaInfoLines.InfoLines.ContainsKey(WittyerConstants.WittyerMetaInfoLineKeys .Win)); MultiAssert.True( merged.ColumnMetaInfoLines.InfoLines.ContainsKey(WittyerConstants.WittyerMetaInfoLineKeys .Wow)); // 1 different because of date in // ##bcftools_viewCommand=view -h /home/hking/manta_2by250_sv_dragen/manta_2by250_sv_dragen.sv.vcf.gz; Date=Fri Jul 19 07:39:26 2019 var diffs = queryHeader.Select(GetPrefix).Where(it => it != null).ToImmutableHashSet() .Except(merged.Select(GetPrefix).Where(it => it != null)); MultiAssert.True(diffs.Count == 1); // 1 different because of date in // ##bcftools_viewCommand=view -h NA12878_pbmm_v1.0.0_pbsv_v2.2.0_hg38_20190430_witty_format.vcf.gz; Date=Fri Jul 19 07:38:56 2019 diffs = truthHeader.Select(GetPrefix).Where(it => it != null).ToImmutableHashSet() .Except(merged.Select(GetPrefix).Where(it => it != null)); MultiAssert.True(diffs.Count == 1); MultiAssert.AssertAll(); }
public void ComparerWorks() { if (MiscUtils.IsRunningAnyLinux) { return; // currently failing on linux :( } IReadOnlyList <IContigInfo> CreateHeader(string fileName) => VcfHeader .TryCreate(Path.Combine("Resources", "VcfHeaders", fileName).ToFileInfo()).GetOrThrow() .ColumnMetaInfoLines.ContigLines.Select(kvp => kvp.Value.Contig).ToReadOnlyList(); var comparer = WittyerVcfWriter.CreateComparer(CreateHeader("query.vcf.gz"), CreateHeader("truth.vcf.gz")); var parser = VcfVariantParserSettings.Create(ImmutableList.Create(SampleName)); var bnd1 = VcfVariant.TryParse(Bnd1, parser).GetOrThrow(); var bnd2 = VcfVariant.TryParse(Bnd2, parser).GetOrThrow(); Assert.True(comparer.Compare(bnd1, bnd2) < 0); }
internal static IEnumerable <string> MergedWith([NotNull] this IVcfHeader truthHeader, [NotNull] IVcfHeader queryHeader, [NotNull] ISamplePair pair, [CanBeNull] string cmdLine) { var mergedMetaLines = truthHeader.MergeMetaLines(cmdLine, queryHeader); return(ToWittyBuilder() .AddSampleMetaInfo(truthHeader, pair, queryHeader, mergedMetaLines) .Build().ToStrings()); VcfHeader.Builder ToWittyBuilder() { var builder = VcfHeader.CreateBuilder(truthHeader.Version) .AddSampleColumn(DefaultTruthSampleName).AddSampleColumn(DefaultQuerySampleName); truthHeader.ReferenceGenome.DoOnSuccess(r => builder.SetReference(r)); return(builder); } }