public void VariantGenerator_ForceGenotype_ConsistentAllele_Recompose() { var mockSequenceProvider = new Mock <ISequenceProvider>(); mockSequenceProvider.SetupGet(x => x.RefNameToChromosome) .Returns(ChromosomeUtilities.RefNameToChromosome); mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAATCGCGA")); var sequenceProvider = mockSequenceProvider.Object; var position1 = AnnotationUtilities.GetSimplePosition("chr1 2 . A T . PASS . GT 1|1 0/0", sequenceProvider.RefNameToChromosome); var position2 = AnnotationUtilities.GetSimplePosition("chr1 4 . C G . PASS . GT 0|1 0|1", sequenceProvider.RefNameToChromosome); var position3 = AnnotationUtilities.GetSimplePosition("chr1 4 . C G,A . PASS . GT 0|1 0/0", sequenceProvider.RefNameToChromosome); var functionBlockRanges = new List <int> { 4, 6, 6 }; var recomposer = new VariantGenerator(sequenceProvider, _vidCreator); var recomposedPositions = recomposer.Recompose(new List <ISimplePosition> { position1, position2, position3 }, functionBlockRanges).ToList(); Assert.Single(recomposedPositions); Assert.Equal("chr1 2 . AGC TGC,TGG . PASS RECOMPOSED GT 1|2 .", string.Join("\t", recomposedPositions[0].VcfFields)); }
public void VariantGenerator_MinGQUsed_DotAndNullIgnored() { var mockSequenceProvider = new Mock <ISequenceProvider>(); mockSequenceProvider.SetupGet(x => x.RefNameToChromosome) .Returns(new Dictionary <string, IChromosome> { { "chr1", new Chromosome("chr1", "1", 0) } }); mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAA")); var sequenceProvider = mockSequenceProvider.Object; var position1 = AnnotationUtilities.GetSimplePosition("chr1 2 . A T,G . PASS . GT:PS:GQ 0|1:123:. 2/2:.:14.2 0|2:456:.", sequenceProvider.RefNameToChromosome); var position2 = AnnotationUtilities.GetSimplePosition("chr1 4 . C A,G . PASS . GT:PS:GQ 1|1:301:. 1|2:.:18 1|2:456:15.6", sequenceProvider.RefNameToChromosome); var position3 = AnnotationUtilities.GetSimplePosition("chr1 6 . G C . PASS . GT . 1|0 0/1", sequenceProvider.RefNameToChromosome); var functionBlockRanges = new List <int> { 4, 6, 8 }; var recomposer = new VariantGenerator(sequenceProvider); var recomposedPositions = recomposer.Recompose(new List <ISimplePosition> { position1, position2, position3 }, functionBlockRanges).ToList(); Assert.Equal(2, recomposedPositions.Count); Assert.Equal("chr1 2 . AGC AGA,GGG,TGA . PASS RECOMPOSED GT:GQ:PS 1|3:.:123 . 1|2:15.6:456", string.Join("\t", recomposedPositions[0].VcfFields)); Assert.Equal("chr1 2 . AGCTG GGATC,GGGTG . PASS RECOMPOSED GT:GQ . 1|2:14.2 .", string.Join("\t", recomposedPositions[1].VcfFields)); }
public void VariantGenerator_FilterTag_DotTreatedAsPass() { var mockSequenceProvider = new Mock <ISequenceProvider>(); mockSequenceProvider.SetupGet(x => x.RefNameToChromosome) .Returns(new Dictionary <string, IChromosome> { { "chr1", new Chromosome("chr1", "1", 0) } }); mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAA")); var sequenceProvider = mockSequenceProvider.Object; var position1 = AnnotationUtilities.GetSimplePosition("chr1 2 . A T,G . PASS . GT:PS 0|1:123 2/2:. 0|2:456", sequenceProvider.RefNameToChromosome); var position2 = AnnotationUtilities.GetSimplePosition("chr1 4 . C A,G . . . GT:PS 1|1:301 1|2:. 1|2:456", sequenceProvider.RefNameToChromosome); var position3 = AnnotationUtilities.GetSimplePosition("chr1 6 . G C . FailedForSomeReason . GT:PS . 1|0:. 0/1:456", sequenceProvider.RefNameToChromosome); var functionBlockRanges = new List <int> { 4, 6, 8 }; var recomposer = new VariantGenerator(sequenceProvider); var recomposedPositions = recomposer.Recompose(new List <ISimplePosition> { position1, position2, position3 }, functionBlockRanges).ToList(); Assert.Equal(2, recomposedPositions.Count); Assert.Equal("chr1 2 . AGC AGA,GGG,TGA . PASS RECOMPOSED GT:PS 1|3:123 . 1|2:456", string.Join("\t", recomposedPositions[0].VcfFields)); Assert.Equal("chr1 2 . AGCTG GGATC,GGGTG . FilteredVariantsRecomposed RECOMPOSED GT . 1|2 .", string.Join("\t", recomposedPositions[1].VcfFields)); }
public void VariantGenerator_FilterTag_PassedMnvOverridesFailedOne() { var mockSequenceProvider = new Mock <ISequenceProvider>(); mockSequenceProvider.SetupGet(x => x.RefNameToChromosome) .Returns(new Dictionary <string, IChromosome> { { "chr1", new Chromosome("chr1", "1", 0) } }); mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAA")); var sequenceProvider = mockSequenceProvider.Object; var position1 = AnnotationUtilities.GetSimplePosition("chr1 2 . A T . PASS . GT 0|1 0|1", sequenceProvider.RefNameToChromosome); var position2 = AnnotationUtilities.GetSimplePosition("chr1 4 . C A . FailedForSomeReason . GT 0|0 0|1", sequenceProvider.RefNameToChromosome); var position3 = AnnotationUtilities.GetSimplePosition("chr1 6 . G C . PASS . GT 0|1 0|1", sequenceProvider.RefNameToChromosome); var functionBlockRanges = new List <int> { 6, 8, 10 }; var recomposer = new VariantGenerator(sequenceProvider); var recomposedPositions = recomposer.Recompose(new List <ISimplePosition> { position1, position2, position3 }, functionBlockRanges).ToList(); Assert.Single(recomposedPositions); Assert.Equal("chr1 2 . AGCTG TGATC,TGCTC . PASS RECOMPOSED GT 0|2 0|1", string.Join("\t", recomposedPositions[0].VcfFields)); }
public void VariantGenerator_HomoReferenceGenotype_Output() { var mockSequenceProvider = new Mock <ISequenceProvider>(); mockSequenceProvider.SetupGet(x => x.RefNameToChromosome) .Returns(new Dictionary <string, IChromosome> { { "chr1", new Chromosome("chr1", "1", 0) } }); mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAA")); var sequenceProvider = mockSequenceProvider.Object; var position1 = AnnotationUtilities.GetSimplePosition("chr1 1 . C A . PASS . GT:PS 0|1:1584593 0/0:.", sequenceProvider.RefNameToChromosome); var position2 = AnnotationUtilities.GetSimplePosition("chr1 2 . A C . PASS . GT:PS 0|1:1584593 0/0:.", sequenceProvider.RefNameToChromosome); var functionBlockRanges = new List <int> { 3, 4 }; var recomposer = new VariantGenerator(sequenceProvider); var recomposedPositions = recomposer.Recompose(new List <ISimplePosition> { position1, position2 }, functionBlockRanges).ToList(); Assert.Single(recomposedPositions); Assert.Equal("chr1 1 . CA AC . PASS RECOMPOSED GT:PS 0|1:1584593 0|0", string.Join("\t", recomposedPositions[0].VcfFields)); }
public void VariantGenerator_OverlappingDeletionAtTheEnd_Ignored() { var mockSequenceProvider = new Mock <ISequenceProvider>(); mockSequenceProvider.SetupGet(x => x.RefNameToChromosome) .Returns(new Dictionary <string, IChromosome> { { "chr1", new Chromosome("chr1", "1", 0) } }); mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAATCGCGA")); var sequenceProvider = mockSequenceProvider.Object; var position1 = AnnotationUtilities.GetSimplePosition("chr1 2 . A T . PASS . GT 0|1 0/0", sequenceProvider.RefNameToChromosome); var position2 = AnnotationUtilities.GetSimplePosition("chr1 4 . C A . PASS . GT 1|1 0/0", sequenceProvider.RefNameToChromosome); var position3 = AnnotationUtilities.GetSimplePosition("chr1 4 . CTGAATCGCGA C . PASS . GT 0/0 0|1", sequenceProvider.RefNameToChromosome); var functionBlockRanges = new List <int> { 4, 6, 6 }; var recomposer = new VariantGenerator(sequenceProvider); var recomposedPositions = recomposer.Recompose(new List <ISimplePosition> { position1, position2, position3 }, functionBlockRanges).ToList(); Assert.Single(recomposedPositions); Assert.Equal("chr1 2 . AGC AGA,TGA . PASS RECOMPOSED GT 1|2 0|0", string.Join("\t", recomposedPositions[0].VcfFields)); }
public void VariantGenerator_MinQualUsed_DotIgnored() { var mockSequenceProvider = new Mock <ISequenceProvider>(); mockSequenceProvider.SetupGet(x => x.RefNameToChromosome) .Returns(ChromosomeUtilities.RefNameToChromosome); mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAA")); var sequenceProvider = mockSequenceProvider.Object; var position1 = AnnotationUtilities.GetSimplePosition("chr1 2 . A T,G . PASS . GT:PS 0|1:123 2/2:. 0|2:456", sequenceProvider.RefNameToChromosome); var position2 = AnnotationUtilities.GetSimplePosition("chr1 4 . C A,G 45 PASS . GT:PS 1|1:301 1|2:. 1|2:456", sequenceProvider.RefNameToChromosome); var position3 = AnnotationUtilities.GetSimplePosition("chr1 6 . G C 30.1 PASS . GT . 1|0 0/1", sequenceProvider.RefNameToChromosome); var functionBlockRanges = new List <int> { 4, 6, 8 }; var recomposer = new VariantGenerator(sequenceProvider, _vidCreator); var recomposedPositions = recomposer.Recompose(new List <ISimplePosition> { position1, position2, position3 }, functionBlockRanges).ToList(); Assert.Equal(2, recomposedPositions.Count); Assert.Equal("chr1 2 . AGC AGA,GGG,TGA 45 PASS RECOMPOSED GT:PS 1|3:123 . 1|2:456", string.Join("\t", recomposedPositions[0].VcfFields)); Assert.Equal("chr1 2 . AGCTG GGATC,GGGTG 30.1 PASS RECOMPOSED GT . 1|2 .", string.Join("\t", recomposedPositions[1].VcfFields)); }
public void ParseVcfLine_line_with_only_NonRef_is_refMinor() { const string vcfLine = "1 10628385 . C <NON_REF> . LowGQX;HighDPFRatio END=10628385;BLOCKAVG_min30p3a GT:GQX:DP:DPF 0/0:24:9:18"; var chromosome = new Chromosome("chr1", "1", 0); var refMinorProvider = new Mock <IRefMinorProvider>(); refMinorProvider.Setup(x => x.GetGlobalMajorAllele(chromosome, 10628385)).Returns("T"); var seqProvider = ParserTestUtils.GetSequenceProvider(10628385, "C", 'A', new Dictionary <string, IChromosome> { ["1"] = chromosome }); var refNameToChromosome = seqProvider.RefNameToChromosome; var variantFactory = new VariantFactory(seqProvider); var position = AnnotationUtilities.ParseVcfLine(vcfLine, refMinorProvider.Object, variantFactory, refNameToChromosome); var annotatedVariants = Annotator.GetAnnotatedVariants(position.Variants); Assert.Equal("C", position.RefAllele); Assert.Equal(new[] { "<NON_REF>" }, position.AltAlleles); Assert.Equal("T", position.Variants[0].RefAllele); Assert.Equal("C", position.Variants[0].AltAllele); // Variants Assert.Equal(new[] { "C" }, annotatedVariants.Select(x => x.Variant.AltAllele).ToArray()); }
public void VariantGenerator_AllTrailingMissingValuesDroped() { var mockSequenceProvider = new Mock <ISequenceProvider>(); mockSequenceProvider.SetupGet(x => x.RefNameToChromosome) .Returns(new Dictionary <string, IChromosome> { { "chr1", new Chromosome("chr1", "1", 0) } }); mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAA")); var sequenceProvider = mockSequenceProvider.Object; var position1 = AnnotationUtilities.GetSimplePosition("chr1 2 . A T,G . PASS . GT:GQ:PS 0|1:.:123 2/2 1|1:17:456", sequenceProvider.RefNameToChromosome); var position2 = AnnotationUtilities.GetSimplePosition("chr1 4 . C A,G . PASS . GT:GQ:PS ./. 1|2 1|2:15.6:456", sequenceProvider.RefNameToChromosome); var position3 = AnnotationUtilities.GetSimplePosition("chr1 6 . G C . PASS . GT:GQ:PS ./. 1|0 1|1:13:456", sequenceProvider.RefNameToChromosome); var functionBlockRanges = new List <int> { 4, 6, 8 }; var recomposer = new VariantGenerator(sequenceProvider); var recomposedPositions = recomposer.Recompose(new List <ISimplePosition> { position1, position2, position3 }, functionBlockRanges).ToList(); Assert.Single(recomposedPositions); Assert.Equal("chr1 2 . AGCTG GGATC,GGGTG,TGATC,TGGTC . PASS RECOMPOSED GT:GQ:PS . 1|2 3|4:13:456", string.Join("\t", recomposedPositions[0].VcfFields)); }
public void VariantGenerator_RefAllelesAddedToMergeAlleles() { var mockSequenceProvider = new Mock <ISequenceProvider>(); mockSequenceProvider.SetupGet(x => x.RefNameToChromosome) .Returns(new Dictionary <string, IChromosome> { { "chr1", new Chromosome("chr1", "1", 0) } }); mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAACT")); var sequenceProvider = mockSequenceProvider.Object; var position1 = AnnotationUtilities.GetSimplePosition("chr1 2 . A T . PASS . GT 0|1 0|0 0|1 0|0", sequenceProvider.RefNameToChromosome); var position2 = AnnotationUtilities.GetSimplePosition("chr1 4 . C A . PASS . GT 1|1 1|1 1|1 1|1", sequenceProvider.RefNameToChromosome); var position3 = AnnotationUtilities.GetSimplePosition("chr1 6 . G C . PASS . GT 1|1 1|1 1|1 1|1", sequenceProvider.RefNameToChromosome); var position4 = AnnotationUtilities.GetSimplePosition("chr1 8 . A G . PASS . GT 0|1 0|1 0|0 0|0", sequenceProvider.RefNameToChromosome); var functionBlockRanges = new List <int> { 4, 6, 8, 10 }; var recomposer = new VariantGenerator(sequenceProvider); var recomposedPositions = recomposer.Recompose(new List <ISimplePosition> { position1, position2, position3, position4 }, functionBlockRanges).ToList(); Assert.Single(recomposedPositions); Assert.Equal("chr1 2 . AGCTGAA AGATCAA,AGATCAG,TGATCAA,TGATCAG . PASS RECOMPOSED GT 1|4 1|2 1|3 1|1", string.Join("\t", recomposedPositions[0].VcfFields)); }
public void VariantGenerator_HomozygousSitesAndPhasedSites_Recomposed() { var mockSequenceProvider = new Mock <ISequenceProvider>(); mockSequenceProvider.SetupGet(x => x.RefNameToChromosome) .Returns(new Dictionary <string, IChromosome> { { "chr1", new Chromosome("chr1", "1", 0) } }); mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAA")); var sequenceProvider = mockSequenceProvider.Object; var position1 = AnnotationUtilities.GetSimplePosition("chr1 2 . A T . PASS . GT 1/1 1/1 1/1", sequenceProvider.RefNameToChromosome); var position2 = AnnotationUtilities.GetSimplePosition("chr1 3 . G A,G 45 PASS . GT:PS 1|1:2 1|2:2 1|2:2", sequenceProvider.RefNameToChromosome); var position3 = AnnotationUtilities.GetSimplePosition("chr1 4 . C A 45 PASS . GT 1/1 1/1 1/1", sequenceProvider.RefNameToChromosome); var position4 = AnnotationUtilities.GetSimplePosition("chr1 5 . T A,G 45 PASS . GT:PS 1|1:4 1|2:2 1|2:4", sequenceProvider.RefNameToChromosome); var position5 = AnnotationUtilities.GetSimplePosition("chr1 6 . G C 30 PASS . GT 1/1 1/1 1/1", sequenceProvider.RefNameToChromosome); var functionBlockRanges = new List <int> { 4, 5, 6, 7, 8 }; var recomposer = new VariantGenerator(sequenceProvider); var recomposedPositions = recomposer.Recompose(new List <ISimplePosition> { position1, position2, position3, position4, position5 }, functionBlockRanges).ToList(); Assert.Equal(3, recomposedPositions.Count); Assert.Equal("chr1 2 . AGC TAA,TGA 45 PASS RECOMPOSED GT:PS . . 1|2:2", string.Join("\t", recomposedPositions[0].VcfFields)); Assert.Equal("chr1 2 . AGCTG TAAAC,TGAGC 30 PASS RECOMPOSED GT:PS 1|1 1|2:2 .", string.Join("\t", recomposedPositions[1].VcfFields)); Assert.Equal("chr1 4 . CTG AAC,AGC 30 PASS RECOMPOSED GT:PS . . 1|2:4", string.Join("\t", recomposedPositions[2].VcfFields)); }
public void VariantGenerator_ConflictAltAlleles_AlleleBlockStartInTheMiddle_NoRecompositionNoException() { var mockSequenceProvider = new Mock <ISequenceProvider>(); mockSequenceProvider.SetupGet(x => x.RefNameToChromosome) .Returns(ChromosomeUtilities.RefNameToChromosome); mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAATCGCGA")); var sequenceProvider = mockSequenceProvider.Object; var position1 = AnnotationUtilities.GetSimplePosition("chr1 1 . C T . PASS . GT 0/1 0|1", sequenceProvider.RefNameToChromosome); var position2 = AnnotationUtilities.GetSimplePosition("chr1 2 . A T . PASS . GT 0/1 0|0", sequenceProvider.RefNameToChromosome); var position3 = AnnotationUtilities.GetSimplePosition("chr1 4 . C G . PASS . GT 1|1 1|1", sequenceProvider.RefNameToChromosome); var position4 = AnnotationUtilities.GetSimplePosition("chr1 4 . C A . PASS . GT 1|1 0|1", sequenceProvider.RefNameToChromosome); var functionBlockRanges = new List <int> { 3, 4, 6, 6 }; var recomposer = new VariantGenerator(sequenceProvider, _vidCreator); var recomposedPositions = recomposer.Recompose(new List <ISimplePosition> { position1, position2, position3, position4 }, functionBlockRanges).ToList(); Assert.Empty(recomposedPositions); }
public void GenerateOutput_NothingRecomposed_ReturnOriginalVcfFieldList() { var mockSequenceProvider = new Mock <ISequenceProvider>(); mockSequenceProvider.SetupGet(x => x.RefNameToChromosome) .Returns(new Dictionary <string, IChromosome> { { "chr1", new Chromosome("chr1", "1", 0) } }); mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAA")); var sequenceProvider = mockSequenceProvider.Object; var variantGenerator = new VariantGenerator(sequenceProvider); var position1 = AnnotationUtilities.GetSimplePosition("chr1 2 . A T . PASS . GT:PS 0|1:123", sequenceProvider.RefNameToChromosome); var position2 = AnnotationUtilities.GetSimplePosition("chr1 4 . C G . PASS . GT 0/1", sequenceProvider.RefNameToChromosome); var position3 = AnnotationUtilities.GetSimplePosition("chr1 6 . G C . PASS . GT 0|1", sequenceProvider.RefNameToChromosome); var positions = new List <ISimplePosition> { position1, position2, position3 }; var recomposable = new List <bool> { true, true, true }; var functionBlockRanges = new List <int> { 4, 6, 8 }; var bufferedPositions = new BufferedPositions(positions, recomposable, functionBlockRanges); var positionProcessor = new PositionProcessor(_positionBufferMock.Object, variantGenerator); var output = positionProcessor.GenerateOutput(bufferedPositions).ToArray(); for (int i = 0; i < output.Length; i++) { Assert.True(positions[i].VcfFields.SequenceEqual(output[i].VcfFields)); } }
public void ParseVcfLine_NonInformativeAlleles_WithNormalAllele_NotFiltered() { const string vcfLine1 = "chr1 13133 . T <*>,G 36.00 PASS SNVSB=0.0;SNVHPOL=4 GT:GQ:GQX:DP:DPF:AD 0/1:62:20:7:1:3,4"; const string vcfLine2 = "chr1 13133 . T *,C 36.00 PASS SNVSB=0.0;SNVHPOL=4 GT:GQ:GQX:DP:DPF:AD 0/1:62:20:7:1:3,4"; const string vcfLine3 = "chr1 13133 . T <M>,A 36.00 PASS SNVSB=0.0;SNVHPOL=4 GT:GQ:GQX:DP:DPF:AD 0/1:62:20:7:1:3,4"; const string vcfLine4 = "chr1 13133 . T A,<NON_REF> 36.00 PASS SNVSB=0.0;SNVHPOL=4 GT:GQ:GQX:DP:DPF:AD 0/1:62:20:7:1:3,4"; var refMinorProvider = new Mock <IRefMinorProvider>(); var seqProvider = ParserTestUtils.GetSequenceProvider(13133, "T", 'A', ChromosomeUtilities.RefNameToChromosome); var refNameToChromosome = seqProvider.RefNameToChromosome; var variantFactory = new VariantFactory(seqProvider); var position1 = AnnotationUtilities.ParseVcfLine(vcfLine1, refMinorProvider.Object, variantFactory, refNameToChromosome); var position2 = AnnotationUtilities.ParseVcfLine(vcfLine2, refMinorProvider.Object, variantFactory, refNameToChromosome); var position3 = AnnotationUtilities.ParseVcfLine(vcfLine3, refMinorProvider.Object, variantFactory, refNameToChromosome); var position4 = AnnotationUtilities.ParseVcfLine(vcfLine4, refMinorProvider.Object, variantFactory, refNameToChromosome); var annotatedVariants1 = Annotator.GetAnnotatedVariants(position1.Variants); var annotatedVariants2 = Annotator.GetAnnotatedVariants(position2.Variants); var annotatedVariants3 = Annotator.GetAnnotatedVariants(position3.Variants); var annotatedVariants4 = Annotator.GetAnnotatedVariants(position4.Variants); // SimplePositions Assert.Equal(new[] { "<*>", "G" }, position1.AltAlleles); Assert.Equal(new[] { "*", "C" }, position2.AltAlleles); Assert.Equal(new[] { "<M>", "A" }, position3.AltAlleles); Assert.Equal(new[] { "A", "<NON_REF>" }, position4.AltAlleles); // Variants Assert.Equal(new[] { "<*>", "G" }, annotatedVariants1.Select(x => x.Variant.AltAllele).ToArray()); Assert.Equal(new[] { "*", "C" }, annotatedVariants2.Select(x => x.Variant.AltAllele).ToArray()); Assert.Equal(new[] { "<M>", "A" }, annotatedVariants3.Select(x => x.Variant.AltAllele).ToArray()); Assert.Equal(new[] { "A", "<NON_REF>" }, annotatedVariants4.Select(x => x.Variant.AltAllele).ToArray()); }
public void ParseVcfLine_NonInformativeAlleles_Alone_NotFiltered() { const string vcfLine1 = "chr1 13133 . T <*> 36.00 PASS SNVSB=0.0;SNVHPOL=4 GT:GQ:GQX:DP:DPF:AD 0/1:62:20:7:1:3,4"; const string vcfLine2 = "chr1 13133 . T * 36.00 PASS SNVSB=0.0;SNVHPOL=4 GT:GQ:GQX:DP:DPF:AD 0/1:62:20:7:1:3,4"; const string vcfLine3 = "chr1 13133 . T <M> 36.00 PASS SNVSB=0.0;SNVHPOL=4 GT:GQ:GQX:DP:DPF:AD 0/1:62:20:7:1:3,4"; var refMinorProvider = new Mock <IRefMinorProvider>(); var seqProvider = ParserTestUtils.GetSequenceProvider(13133, "T", 'A', ChromosomeUtilities.RefNameToChromosome); var variantFactory = new VariantFactory(seqProvider); var position1 = AnnotationUtilities.ParseVcfLine(vcfLine1, refMinorProvider.Object, variantFactory, seqProvider.RefNameToChromosome); var position2 = AnnotationUtilities.ParseVcfLine(vcfLine2, refMinorProvider.Object, variantFactory, seqProvider.RefNameToChromosome); var position3 = AnnotationUtilities.ParseVcfLine(vcfLine3, refMinorProvider.Object, variantFactory, seqProvider.RefNameToChromosome); var annotatedVariants1 = Annotator.GetAnnotatedVariants(position1.Variants); var annotatedVariants2 = Annotator.GetAnnotatedVariants(position2.Variants); var annotatedVariants3 = Annotator.GetAnnotatedVariants(position3.Variants); // SimplePositions unchanged Assert.Equal("<*>", position1.AltAlleles[0]); Assert.Equal("*", position2.AltAlleles[0]); Assert.Equal("<M>", position3.AltAlleles[0]); // Variants not filtered Assert.Equal("<*>", annotatedVariants1[0].Variant.AltAllele); Assert.Equal("*", annotatedVariants2[0].Variant.AltAllele); Assert.Equal("<M>", annotatedVariants3[0].Variant.AltAllele); }
public void ParseVcfLine_line_with_only_NonRef_is_not_refMinor() { const string vcfLine = "1 10005 . C <NON_REF> . LowGQX END=10034;BLOCKAVG_min30p3a GT:GQX:DP:DPF 0/0:3:1:0"; var refMinorProvider = new Mock <IRefMinorProvider>(); var seqProvider = ParserTestUtils.GetSequenceProvider(10005, "C", 'A', ChromosomeUtilities.RefNameToChromosome); var variantFactory = new VariantFactory(seqProvider.Sequence, _vidCreator); var position = AnnotationUtilities.ParseVcfLine(vcfLine, refMinorProvider.Object, seqProvider, null, variantFactory); var annotatedVariants = Annotator.GetAnnotatedVariants(position.Variants); Assert.Equal("C", position.RefAllele); Assert.Equal(new[] { "<NON_REF>" }, position.AltAlleles); Assert.Null(position.Variants); Assert.Null(annotatedVariants); }
public void GetJsonString_fisherStrand() { const string vcfLine = "21\t9411410\t.\tC\tT\t9.51\tDRAGENSnpHardQUAL\tAC=2;AF=1.000;AN=2;DP=2;FS=0.000;MQ=100.00;QD=9.51;SOR=1.609"; var refMinorProvider = new Mock <IRefMinorProvider>(); var seqProvider = ParserTestUtils.GetSequenceProvider(9411410, "C", 'A', ChromosomeUtilities.RefNameToChromosome); var variantFactory = new VariantFactory(seqProvider.Sequence, new VariantId()); var position = AnnotationUtilities.ParseVcfLine(vcfLine, refMinorProvider.Object, seqProvider, null, variantFactory); IVariant[] variants = GetVariants(); IAnnotatedVariant[] annotatedVariants = Annotator.GetAnnotatedVariants(variants); var annotatedPosition = new AnnotatedPosition(position, annotatedVariants); string observedResult = annotatedPosition.GetJsonString(); Assert.NotNull(observedResult); Assert.Contains("\"fisherStrandBias\":0", observedResult); }
public void Test_crash_caused_by_variant_trimming() { const string vcfLine1 = "chr1 8021910 rs373653682 GGTGCTGGACGGTGTCCCT G . . ."; var refMinorProvider = new Mock <IRefMinorProvider>(); var seqProvider = ParserTestUtils.GetSequenceProvider(8021910, "GGTGCTGGACGGTGTCCCT", 'A', ChromosomeUtilities.RefNameToChromosome); var variantFactory = new VariantFactory(seqProvider.Sequence, _vidCreator); var position1 = AnnotationUtilities.ParseVcfLine(vcfLine1, refMinorProvider.Object, seqProvider, null, variantFactory); var annotatedVariants1 = Annotator.GetAnnotatedVariants(position1.Variants); // SimplePositions Assert.Equal(new[] { "G" }, position1.AltAlleles); // Variants Assert.Equal(new[] { "" }, annotatedVariants1.Select(x => x.Variant.AltAllele).ToArray()); }
public void GetJsonString_BreakEndEventId() { const string vcfLine = "1\t38432782\tMantaBND:2312:0:1:0:0:0:0\tG\tG]6:28863899]\t971\tPASS\tSVTYPE=BND;MATEID=MantaBND:2312:0:1:0:0:0:1;EVENT=MantaBND:2312:0:1:0:0:0:0;JUNCTION_QUAL=716;BND_DEPTH=52;MATE_BND_DEPTH=56"; var refMinorProvider = new Mock <IRefMinorProvider>(); var seqProvider = ParserTestUtils.GetSequenceProvider(38432782, "G", 'C', ChromosomeUtilities.RefNameToChromosome); var variantFactory = new VariantFactory(seqProvider.Sequence, new VariantId()); var position = AnnotationUtilities.ParseVcfLine(vcfLine, refMinorProvider.Object, seqProvider, null, variantFactory); IVariant[] variants = GetVariants(); IAnnotatedVariant[] annotatedVariants = Annotator.GetAnnotatedVariants(variants); var annotatedPosition = new AnnotatedPosition(position, annotatedVariants); string observedResult = annotatedPosition.GetJsonString(); Assert.NotNull(observedResult); Assert.Contains("\"breakendEventId\":\"MantaBND:2312:0:1:0:0:0:0\"", observedResult); }
public void GenerateOutput_Return_OriginalAndRecomposed_VcfFieldList() { var mockSequenceProvider = new Mock <ISequenceProvider>(); mockSequenceProvider.SetupGet(x => x.RefNameToChromosome) .Returns(new Dictionary <string, IChromosome> { { "chr1", new Chromosome("chr1", "1", 0) } }); mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAA")); var sequenceProvider = mockSequenceProvider.Object; var variantGenerator = new VariantGenerator(sequenceProvider); var position1 = AnnotationUtilities.GetSimplePosition("chr1 2 . A T . PASS . GT:PS 0|1:.", sequenceProvider.RefNameToChromosome); var position2 = AnnotationUtilities.GetSimplePosition("chr1 4 . C G . PASS . GT 1/1", sequenceProvider.RefNameToChromosome); var position3 = AnnotationUtilities.GetSimplePosition("chr1 6 . G C . PASS . GT 0|1", sequenceProvider.RefNameToChromosome); var positions = new List <ISimplePosition> { position1, position2, position3 }; var recomposable = new List <bool> { true, true, true }; var functionBlockRanges = new List <int> { 4, 6, 8 }; var bufferedPositions = new BufferedPositions(positions, recomposable, functionBlockRanges); var positionProcessor = new PositionProcessor(_positionBufferMock.Object, variantGenerator); var output = positionProcessor.GenerateOutput(bufferedPositions).ToArray(); var expectedOutput = new string[4][]; expectedOutput[0] = position1.VcfFields; expectedOutput[1] = new[] { "chr1", "2", ".", "AGCTG", "AGGTG,TGGTC", ".", "PASS", "RECOMPOSED", "GT", "1|2" }; expectedOutput[2] = position2.VcfFields; expectedOutput[3] = position3.VcfFields; for (int i = 0; i < output.Length; i++) { Assert.True(expectedOutput[i].SequenceEqual(output[i].VcfFields)); } }
public void GetJsonString_StrelkaSomatic() { const string vcfLine = "chr1 13813 . T G . LowQscore SOMATIC;QSS=33;TQSS=1;NT=ref;QSS_NT=16;TQSS_NT=1;SGT=TT->GT;DP=266;MQ=23.89;MQ0=59;ALTPOS=69;ALTMAP=37;ReadPosRankSum=1.22;SNVSB=5.92;PNOISE=0.00;PNOISE2=0.00;VQSR=1.93"; var refMinorProvider = new Mock <IRefMinorProvider>(); var seqProvider = ParserTestUtils.GetSequenceProvider(13813, "T", 'C', ChromosomeUtilities.RefNameToChromosome); var variantFactory = new VariantFactory(seqProvider.Sequence, new VariantId()); var position = AnnotationUtilities.ParseVcfLine(vcfLine, refMinorProvider.Object, seqProvider, variantFactory); IVariant[] variants = GetVariants(); IAnnotatedVariant[] annotatedVariants = Annotator.GetAnnotatedVariants(variants); var annotatedPosition = new AnnotatedPosition(position, annotatedVariants); string observedResult = annotatedPosition.GetJsonString(); Assert.NotNull(observedResult); Assert.Contains("\"jointSomaticNormalQuality\":16", observedResult); Assert.Contains("\"recalibratedQuality\":1.93", observedResult); }
public void VariantGenerator_NoMnvAfterTrimming_NotRecompose() { var mockSequenceProvider = new Mock <ISequenceProvider>(); mockSequenceProvider.SetupGet(x => x.RefNameToChromosome) .Returns(ChromosomeUtilities.RefNameToChromosome); mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAA")); var sequenceProvider = mockSequenceProvider.Object; var position1 = AnnotationUtilities.GetSimplePosition("chr1 1 . C A . PASS . GT:PS 1|0:1584593 1|1:. 0|1:.", sequenceProvider.RefNameToChromosome); var position2 = AnnotationUtilities.GetSimplePosition("chr1 2 . A C . PASS . GT:PS 0|1:1584593 0/0:. 0/0:.", sequenceProvider.RefNameToChromosome); var functionBlockRanges = new List <int> { 3, 4 }; var recomposer = new VariantGenerator(sequenceProvider, _vidCreator); var recomposedPositions = recomposer.Recompose(new List <ISimplePosition> { position1, position2 }, functionBlockRanges).ToList(); Assert.Empty(recomposedPositions); }
public void VariantGenerator_SampleColumnCorrectlyProcessed_WhenTrailingMissingValuesDroped() { var mockSequenceProvider = new Mock <ISequenceProvider>(); mockSequenceProvider.SetupGet(x => x.RefNameToChromosome) .Returns(ChromosomeUtilities.RefNameToChromosome); mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAA")); var sequenceProvider = mockSequenceProvider.Object; var position1 = AnnotationUtilities.GetSimplePosition("chr1 2 . A T,G . PASS . GT:PS:GQ 0|1:123 2/2:.:14.2 ./.", sequenceProvider.RefNameToChromosome); var position2 = AnnotationUtilities.GetSimplePosition("chr1 4 . C A,G . PASS . GT:PS:GQ ./. 1|2:.:18 1|2:456:15.6", sequenceProvider.RefNameToChromosome); var position3 = AnnotationUtilities.GetSimplePosition("chr1 6 . G C . PASS . GT ./. 1|0 ./.", sequenceProvider.RefNameToChromosome); var functionBlockRanges = new List <int> { 4, 6, 8 }; var recomposer = new VariantGenerator(sequenceProvider, _vidCreator); var recomposedPositions = recomposer.Recompose(new List <ISimplePosition> { position1, position2, position3 }, functionBlockRanges).ToList(); Assert.Single(recomposedPositions); Assert.Equal("chr1 2 . AGCTG GGATC,GGGTG . PASS RECOMPOSED GT:GQ . 1|2:14.2 .", string.Join("\t", recomposedPositions[0].VcfFields)); }
public void VariantGenerator_FilterTag_OnlyDecomposedVariantsConsidered() { var mockSequenceProvider = new Mock <ISequenceProvider>(); mockSequenceProvider.SetupGet(x => x.RefNameToChromosome) .Returns(ChromosomeUtilities.RefNameToChromosome); mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAA")); var sequenceProvider = mockSequenceProvider.Object; var position1 = AnnotationUtilities.GetSimplePosition("chr1 2 . A T . PASS . GT 0|1 0/1 0|0", sequenceProvider.RefNameToChromosome); var position2 = AnnotationUtilities.GetSimplePosition("chr1 4 . C A . FailedForSomeReason . GT 0|0 0/1 0|0", sequenceProvider.RefNameToChromosome); var position3 = AnnotationUtilities.GetSimplePosition("chr1 6 . G C . PASS . GT 1|1 0/1 0|0", sequenceProvider.RefNameToChromosome); var functionBlockRanges = new List <int> { 6, 8, 10 }; var recomposer = new VariantGenerator(sequenceProvider, _vidCreator); var recomposedPositions = recomposer.Recompose(new List <ISimplePosition> { position1, position2, position3 }, functionBlockRanges).ToList(); Assert.Single(recomposedPositions); Assert.Equal("chr1 2 . AGCTG AGCTC,TGCTC . PASS RECOMPOSED GT 1|2 . 0|0", string.Join("\t", recomposedPositions[0].VcfFields)); }
public void VariantGenerator_AsExpected() { var mockSequenceProvider = new Mock <ISequenceProvider>(); mockSequenceProvider.SetupGet(x => x.RefNameToChromosome) .Returns(new Dictionary <string, IChromosome> { { "chr1", new Chromosome("chr1", "1", 0) } }); mockSequenceProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence("CAGCTGAA")); var sequenceProvider = mockSequenceProvider.Object; var position1 = AnnotationUtilities.GetSimplePosition("chr1 2 . A T,G . PASS . GT:PS 0|1:123 2/2:789 0|2:456", sequenceProvider.RefNameToChromosome); var position2 = AnnotationUtilities.GetSimplePosition("chr1 4 . C A,G . PASS . GT:PS 1|1:301 1|2:789 1|2:456", sequenceProvider.RefNameToChromosome); var position3 = AnnotationUtilities.GetSimplePosition("chr1 6 . G C . PASS . GT:PS . 1|0:789 0/1:.", sequenceProvider.RefNameToChromosome); var functionBlockRanges = new List <int> { 4, 6, 8 }; var recomposer = new VariantGenerator(sequenceProvider); var recomposedPositions = recomposer.Recompose(new List <ISimplePosition> { position1, position2, position3 }, functionBlockRanges).ToList(); Assert.Equal(2, recomposedPositions.Count); Assert.Equal("chr1 2 . AGC AGA,GGG,TGA . PASS RECOMPOSED GT:PS 1|3:123 . 1|2:456", string.Join("\t", recomposedPositions[0].VcfFields)); Assert.Equal("chr1 2 . AGCTG GGATC,GGGTG . PASS RECOMPOSED GT:PS . 1|2:789 .", string.Join("\t", recomposedPositions[1].VcfFields)); //Check LinkedVids //SNVs Assert.Equal(2, position1.LinkedVids.Length); Assert.Equal(new List <string> { "1:2:4:TGA" }, position1.LinkedVids[0]); position1.LinkedVids[1].Sort(); Assert.Equal(new List <string> { "1:2:4:GGG", "1:2:6:GGATC", "1:2:6:GGGTG" }, position1.LinkedVids[1]); Assert.Equal(2, position2.LinkedVids.Length); position2.LinkedVids[0].Sort(); Assert.Equal(new List <string> { "1:2:4:AGA", "1:2:4:TGA", "1:2:6:GGATC" }, position2.LinkedVids[0]); position2.LinkedVids[1].Sort(); Assert.Equal(new List <string> { "1:2:4:GGG", "1:2:6:GGGTG" }, position2.LinkedVids[1]); Assert.Single(position3.LinkedVids); Assert.Equal(new List <string> { "1:2:6:GGATC" }, position3.LinkedVids[0]); //MNVs Assert.Equal(3, recomposedPositions[0].LinkedVids.Length); Assert.Equal(new List <string> { "1:4:A" }, recomposedPositions[0].LinkedVids[0]); Assert.Equal(new List <string> { "1:2:G", "1:4:G" }, recomposedPositions[0].LinkedVids[1]); Assert.Equal(new List <string> { "1:2:T", "1:4:A" }, recomposedPositions[0].LinkedVids[2]); Assert.Equal(2, recomposedPositions[1].LinkedVids.Length); Assert.Equal(new List <string> { "1:2:G", "1:4:A", "1:6:C" }, recomposedPositions[1].LinkedVids[0]); Assert.Equal(new List <string> { "1:2:G", "1:4:G" }, recomposedPositions[1].LinkedVids[1]); }