public void ParseVcfLine_NonInformativeAlleles_WithNormalAllele_NotFiltered() { const string vcfLine1 = "chr1 13133 . T <*>,G 36.00 PASS SNVSB=0.0;SNVHPOL=4 GT:GQ:GQX:DP:DPF:AD 0/1:62:20:7:1:3,4"; const string vcfLine2 = "chr1 13133 . T *,C 36.00 PASS SNVSB=0.0;SNVHPOL=4 GT:GQ:GQX:DP:DPF:AD 0/1:62:20:7:1:3,4"; const string vcfLine3 = "chr1 13133 . T <M>,A 36.00 PASS SNVSB=0.0;SNVHPOL=4 GT:GQ:GQX:DP:DPF:AD 0/1:62:20:7:1:3,4"; const string vcfLine4 = "chr1 13133 . T A,<NON_REF> 36.00 PASS SNVSB=0.0;SNVHPOL=4 GT:GQ:GQX:DP:DPF:AD 0/1:62:20:7:1:3,4"; var refMinorProvider = new Mock <IRefMinorProvider>(); var seqProvider = ParserTestUtils.GetSequenceProvider(13133, "T", 'A', ChromosomeUtilities.RefNameToChromosome); var refNameToChromosome = seqProvider.RefNameToChromosome; var variantFactory = new VariantFactory(seqProvider); var position1 = AnnotationUtilities.ParseVcfLine(vcfLine1, refMinorProvider.Object, variantFactory, refNameToChromosome); var position2 = AnnotationUtilities.ParseVcfLine(vcfLine2, refMinorProvider.Object, variantFactory, refNameToChromosome); var position3 = AnnotationUtilities.ParseVcfLine(vcfLine3, refMinorProvider.Object, variantFactory, refNameToChromosome); var position4 = AnnotationUtilities.ParseVcfLine(vcfLine4, refMinorProvider.Object, variantFactory, refNameToChromosome); var annotatedVariants1 = Annotator.GetAnnotatedVariants(position1.Variants); var annotatedVariants2 = Annotator.GetAnnotatedVariants(position2.Variants); var annotatedVariants3 = Annotator.GetAnnotatedVariants(position3.Variants); var annotatedVariants4 = Annotator.GetAnnotatedVariants(position4.Variants); // SimplePositions Assert.Equal(new[] { "<*>", "G" }, position1.AltAlleles); Assert.Equal(new[] { "*", "C" }, position2.AltAlleles); Assert.Equal(new[] { "<M>", "A" }, position3.AltAlleles); Assert.Equal(new[] { "A", "<NON_REF>" }, position4.AltAlleles); // Variants Assert.Equal(new[] { "<*>", "G" }, annotatedVariants1.Select(x => x.Variant.AltAllele).ToArray()); Assert.Equal(new[] { "*", "C" }, annotatedVariants2.Select(x => x.Variant.AltAllele).ToArray()); Assert.Equal(new[] { "<M>", "A" }, annotatedVariants3.Select(x => x.Variant.AltAllele).ToArray()); Assert.Equal(new[] { "A", "<NON_REF>" }, annotatedVariants4.Select(x => x.Variant.AltAllele).ToArray()); }
public void ParseVcfLine_line_with_only_non_informative_alleles_position_unchanged_but_variants_ignored() { const string vcfLine1 = "chr1 13133 . T <*> 36.00 PASS SNVSB=0.0;SNVHPOL=4 GT:GQ:GQX:DP:DPF:AD 0/1:62:20:7:1:3,4"; const string vcfLine2 = "chr1 13133 . T * 36.00 PASS SNVSB=0.0;SNVHPOL=4 GT:GQ:GQX:DP:DPF:AD 0/1:62:20:7:1:3,4"; const string vcfLine3 = "chr1 13133 . T <M> 36.00 PASS SNVSB=0.0;SNVHPOL=4 GT:GQ:GQX:DP:DPF:AD 0/1:62:20:7:1:3,4"; var chromosome = new Chromosome("chr1", "1", 0); var refMinorProvider = new Mock <IRefMinorProvider>(); refMinorProvider.Setup(x => x.IsReferenceMinor(chromosome, 13133)).Returns(false); var refNameToChromosome = new Dictionary <string, IChromosome> { ["chr1"] = chromosome }; var variantFactory = new VariantFactory(refNameToChromosome, refMinorProvider.Object, false); var position1 = VcfReaderUtils.ParseVcfLine(vcfLine1, variantFactory, refNameToChromosome); var position2 = VcfReaderUtils.ParseVcfLine(vcfLine2, variantFactory, refNameToChromosome); var position3 = VcfReaderUtils.ParseVcfLine(vcfLine3, variantFactory, refNameToChromosome); var annotatedVariants1 = Annotator.GetAnnotatedVariants(position1.Variants); var annotatedVariants2 = Annotator.GetAnnotatedVariants(position2.Variants); var annotatedVariants3 = Annotator.GetAnnotatedVariants(position3.Variants); // SimplePositions unchanged Assert.Equal("<*>", position1.AltAlleles[0]); Assert.Equal("*", position2.AltAlleles[0]); Assert.Equal("<M>", position3.AltAlleles[0]); // Variants are null Assert.Null(annotatedVariants1); Assert.Null(annotatedVariants2); Assert.Null(annotatedVariants3); }
public void ParseVcfLine_NonInformativeAlleles_Alone_NotFiltered() { const string vcfLine1 = "chr1 13133 . T <*> 36.00 PASS SNVSB=0.0;SNVHPOL=4 GT:GQ:GQX:DP:DPF:AD 0/1:62:20:7:1:3,4"; const string vcfLine2 = "chr1 13133 . T * 36.00 PASS SNVSB=0.0;SNVHPOL=4 GT:GQ:GQX:DP:DPF:AD 0/1:62:20:7:1:3,4"; const string vcfLine3 = "chr1 13133 . T <M> 36.00 PASS SNVSB=0.0;SNVHPOL=4 GT:GQ:GQX:DP:DPF:AD 0/1:62:20:7:1:3,4"; var refMinorProvider = new Mock <IRefMinorProvider>(); var seqProvider = ParserTestUtils.GetSequenceProvider(13133, "T", 'A', ChromosomeUtilities.RefNameToChromosome); var variantFactory = new VariantFactory(seqProvider); var position1 = AnnotationUtilities.ParseVcfLine(vcfLine1, refMinorProvider.Object, variantFactory, seqProvider.RefNameToChromosome); var position2 = AnnotationUtilities.ParseVcfLine(vcfLine2, refMinorProvider.Object, variantFactory, seqProvider.RefNameToChromosome); var position3 = AnnotationUtilities.ParseVcfLine(vcfLine3, refMinorProvider.Object, variantFactory, seqProvider.RefNameToChromosome); var annotatedVariants1 = Annotator.GetAnnotatedVariants(position1.Variants); var annotatedVariants2 = Annotator.GetAnnotatedVariants(position2.Variants); var annotatedVariants3 = Annotator.GetAnnotatedVariants(position3.Variants); // SimplePositions unchanged Assert.Equal("<*>", position1.AltAlleles[0]); Assert.Equal("*", position2.AltAlleles[0]); Assert.Equal("<M>", position3.AltAlleles[0]); // Variants not filtered Assert.Equal("<*>", annotatedVariants1[0].Variant.AltAllele); Assert.Equal("*", annotatedVariants2[0].Variant.AltAllele); Assert.Equal("<M>", annotatedVariants3[0].Variant.AltAllele); }
public void ParseVcfLine_line_with_only_NonRef_is_refMinor() { const string vcfLine = "1 10628385 . C <NON_REF> . LowGQX;HighDPFRatio END=10628385;BLOCKAVG_min30p3a GT:GQX:DP:DPF 0/0:24:9:18"; var chromosome = new Chromosome("chr1", "1", 0); var refMinorProvider = new Mock <IRefMinorProvider>(); refMinorProvider.Setup(x => x.IsReferenceMinor(chromosome, 10628385)).Returns(true); refMinorProvider.Setup(x => x.GetGlobalMajorAlleleForRefMinor(chromosome, 10628385)).Returns("T"); var refNameToChromosome = new Dictionary <string, IChromosome> { ["1"] = chromosome }; var variantFactory = new VariantFactory(refNameToChromosome, refMinorProvider.Object, false); var position = VcfReaderUtils.ParseVcfLine(vcfLine, variantFactory, refNameToChromosome); var annotatedVariants = Annotator.GetAnnotatedVariants(position.Variants); Assert.Equal("C", position.RefAllele); Assert.Equal(new[] { "<NON_REF>" }, position.AltAlleles); Assert.Equal("T", position.Variants[0].RefAllele); Assert.Equal("C", position.Variants[0].AltAllele); // Variants Assert.Equal(new[] { "C" }, annotatedVariants.Select(x => x.Variant.AltAllele).ToArray()); }
public void GetJsonString_DifferentOriginalChromosomeName() { const string originalChromosomeName = "originalChr1"; IVariant[] variants = GetVariants(); ISample[] samples = GetSamples(); IAnnotatedVariant[] annotatedVariants = Annotator.GetAnnotatedVariants(variants); var position = GetPosition(originalChromosomeName, variants, samples); var annotatedPosition = new AnnotatedPosition(position, annotatedVariants); string observedResult = annotatedPosition.GetJsonString(); Assert.NotNull(observedResult); Assert.Contains($"\"chromosome\":\"{originalChromosomeName}\"", observedResult); }
public void ParseVcfLine_line_with_only_NonRef_is_not_refMinor() { const string vcfLine = "1 10005 . C <NON_REF> . LowGQX END=10034;BLOCKAVG_min30p3a GT:GQX:DP:DPF 0/0:3:1:0"; var refMinorProvider = new Mock <IRefMinorProvider>(); var seqProvider = ParserTestUtils.GetSequenceProvider(10005, "C", 'A', ChromosomeUtilities.RefNameToChromosome); var variantFactory = new VariantFactory(seqProvider.Sequence, _vidCreator); var position = AnnotationUtilities.ParseVcfLine(vcfLine, refMinorProvider.Object, seqProvider, null, variantFactory); var annotatedVariants = Annotator.GetAnnotatedVariants(position.Variants); Assert.Equal("C", position.RefAllele); Assert.Equal(new[] { "<NON_REF>" }, position.AltAlleles); Assert.Null(position.Variants); Assert.Null(annotatedVariants); }
public void GetJsonString_fisherStrand() { const string vcfLine = "21\t9411410\t.\tC\tT\t9.51\tDRAGENSnpHardQUAL\tAC=2;AF=1.000;AN=2;DP=2;FS=0.000;MQ=100.00;QD=9.51;SOR=1.609"; var refMinorProvider = new Mock <IRefMinorProvider>(); var seqProvider = ParserTestUtils.GetSequenceProvider(9411410, "C", 'A', ChromosomeUtilities.RefNameToChromosome); var variantFactory = new VariantFactory(seqProvider.Sequence, new VariantId()); var position = AnnotationUtilities.ParseVcfLine(vcfLine, refMinorProvider.Object, seqProvider, null, variantFactory); IVariant[] variants = GetVariants(); IAnnotatedVariant[] annotatedVariants = Annotator.GetAnnotatedVariants(variants); var annotatedPosition = new AnnotatedPosition(position, annotatedVariants); string observedResult = annotatedPosition.GetJsonString(); Assert.NotNull(observedResult); Assert.Contains("\"fisherStrandBias\":0", observedResult); }
public void Test_crash_caused_by_variant_trimming() { const string vcfLine1 = "chr1 8021910 rs373653682 GGTGCTGGACGGTGTCCCT G . . ."; var refMinorProvider = new Mock <IRefMinorProvider>(); var seqProvider = ParserTestUtils.GetSequenceProvider(8021910, "GGTGCTGGACGGTGTCCCT", 'A', ChromosomeUtilities.RefNameToChromosome); var variantFactory = new VariantFactory(seqProvider.Sequence, _vidCreator); var position1 = AnnotationUtilities.ParseVcfLine(vcfLine1, refMinorProvider.Object, seqProvider, null, variantFactory); var annotatedVariants1 = Annotator.GetAnnotatedVariants(position1.Variants); // SimplePositions Assert.Equal(new[] { "G" }, position1.AltAlleles); // Variants Assert.Equal(new[] { "" }, annotatedVariants1.Select(x => x.Variant.AltAllele).ToArray()); }
public void GetJsonString_BreakEndEventId() { const string vcfLine = "1\t38432782\tMantaBND:2312:0:1:0:0:0:0\tG\tG]6:28863899]\t971\tPASS\tSVTYPE=BND;MATEID=MantaBND:2312:0:1:0:0:0:1;EVENT=MantaBND:2312:0:1:0:0:0:0;JUNCTION_QUAL=716;BND_DEPTH=52;MATE_BND_DEPTH=56"; var refMinorProvider = new Mock <IRefMinorProvider>(); var seqProvider = ParserTestUtils.GetSequenceProvider(38432782, "G", 'C', ChromosomeUtilities.RefNameToChromosome); var variantFactory = new VariantFactory(seqProvider.Sequence, new VariantId()); var position = AnnotationUtilities.ParseVcfLine(vcfLine, refMinorProvider.Object, seqProvider, null, variantFactory); IVariant[] variants = GetVariants(); IAnnotatedVariant[] annotatedVariants = Annotator.GetAnnotatedVariants(variants); var annotatedPosition = new AnnotatedPosition(position, annotatedVariants); string observedResult = annotatedPosition.GetJsonString(); Assert.NotNull(observedResult); Assert.Contains("\"breakendEventId\":\"MantaBND:2312:0:1:0:0:0:0\"", observedResult); }
public void GetJsonString_StrelkaSomatic() { const string vcfLine = "chr1 13813 . T G . LowQscore SOMATIC;QSS=33;TQSS=1;NT=ref;QSS_NT=16;TQSS_NT=1;SGT=TT->GT;DP=266;MQ=23.89;MQ0=59;ALTPOS=69;ALTMAP=37;ReadPosRankSum=1.22;SNVSB=5.92;PNOISE=0.00;PNOISE2=0.00;VQSR=1.93"; var refMinorProvider = new Mock <IRefMinorProvider>(); var seqProvider = ParserTestUtils.GetSequenceProvider(13813, "T", 'C', ChromosomeUtilities.RefNameToChromosome); var variantFactory = new VariantFactory(seqProvider.Sequence, new VariantId()); var position = AnnotationUtilities.ParseVcfLine(vcfLine, refMinorProvider.Object, seqProvider, variantFactory); IVariant[] variants = GetVariants(); IAnnotatedVariant[] annotatedVariants = Annotator.GetAnnotatedVariants(variants); var annotatedPosition = new AnnotatedPosition(position, annotatedVariants); string observedResult = annotatedPosition.GetJsonString(); Assert.NotNull(observedResult); Assert.Contains("\"jointSomaticNormalQuality\":16", observedResult); Assert.Contains("\"recalibratedQuality\":1.93", observedResult); }
public void ParseVcfLine_line_with_only_NonRef_is_refMinor() { const string vcfLine = "1 10628385 . C <NON_REF> . LowGQX;HighDPFRatio END=10628385;BLOCKAVG_min30p3a GT:GQX:DP:DPF 0/0:24:9:18"; var refMinorProvider = new Mock <IRefMinorProvider>(); refMinorProvider.Setup(x => x.GetGlobalMajorAllele(ChromosomeUtilities.Chr1, 10628385)).Returns("T"); var seqProvider = ParserTestUtils.GetSequenceProvider(10628385, "C", 'A', ChromosomeUtilities.RefNameToChromosome); var variantFactory = new VariantFactory(seqProvider.Sequence, _vidCreator); var position = AnnotationUtilities.ParseVcfLine(vcfLine, refMinorProvider.Object, seqProvider, null, variantFactory); var annotatedVariants = Annotator.GetAnnotatedVariants(position.Variants); Assert.Equal("C", position.RefAllele); Assert.Equal(new[] { "<NON_REF>" }, position.AltAlleles); Assert.Equal("T", position.Variants[0].RefAllele); Assert.Equal("C", position.Variants[0].AltAllele); // Variants Assert.Equal(new[] { "C" }, annotatedVariants.Select(x => x.Variant.AltAllele).ToArray()); }
public void Test_crash_caused_by_variant_trimming() { const string vcfLine1 = "chr1 8021910 rs373653682 GGTGCTGGACGGTGTCCCT G . . ."; var chromosome = new Chromosome("chr1", "1", 0); var refMinorProvider = new Mock <IRefMinorProvider>(); refMinorProvider.Setup(x => x.IsReferenceMinor(chromosome, 8021910)).Returns(false); var refNameToChromosome = new Dictionary <string, IChromosome> { ["chr1"] = chromosome }; var variantFactory = new VariantFactory(refNameToChromosome, refMinorProvider.Object, false); var position1 = VcfReaderUtils.ParseVcfLine(vcfLine1, variantFactory, refNameToChromosome); var annotatedVariants1 = Annotator.GetAnnotatedVariants(position1.Variants); // SimplePositions Assert.Equal(new[] { "G" }, position1.AltAlleles); // Variants Assert.Equal(new[] { "" }, annotatedVariants1.Select(x => x.Variant.AltAllele).ToArray()); }
public void ParseVcfLine_non_informative_alleles_or_NonRef_filtered_only_in_variants() { const string vcfLine1 = "chr1 13133 . T <*>,G 36.00 PASS SNVSB=0.0;SNVHPOL=4 GT:GQ:GQX:DP:DPF:AD 0/1:62:20:7:1:3,4"; const string vcfLine2 = "chr1 13133 . T *,C 36.00 PASS SNVSB=0.0;SNVHPOL=4 GT:GQ:GQX:DP:DPF:AD 0/1:62:20:7:1:3,4"; const string vcfLine3 = "chr1 13133 . T <M>,A 36.00 PASS SNVSB=0.0;SNVHPOL=4 GT:GQ:GQX:DP:DPF:AD 0/1:62:20:7:1:3,4"; const string vcfLine4 = "chr1 13133 . T A,<NON_REF> 36.00 PASS SNVSB=0.0;SNVHPOL=4 GT:GQ:GQX:DP:DPF:AD 0/1:62:20:7:1:3,4"; var chromosome = new Chromosome("chr1", "1", 0); var refMinorProvider = new Mock <IRefMinorProvider>(); refMinorProvider.Setup(x => x.IsReferenceMinor(chromosome, 13133)).Returns(false); var refNameToChromosome = new Dictionary <string, IChromosome> { ["chr1"] = chromosome }; var variantFactory = new VariantFactory(refNameToChromosome, refMinorProvider.Object, false); var position1 = VcfReaderUtils.ParseVcfLine(vcfLine1, variantFactory, refNameToChromosome); var position2 = VcfReaderUtils.ParseVcfLine(vcfLine2, variantFactory, refNameToChromosome); var position3 = VcfReaderUtils.ParseVcfLine(vcfLine3, variantFactory, refNameToChromosome); var position4 = VcfReaderUtils.ParseVcfLine(vcfLine4, variantFactory, refNameToChromosome); var annotatedVariants1 = Annotator.GetAnnotatedVariants(position1.Variants); var annotatedVariants2 = Annotator.GetAnnotatedVariants(position2.Variants); var annotatedVariants3 = Annotator.GetAnnotatedVariants(position3.Variants); var annotatedVariants4 = Annotator.GetAnnotatedVariants(position4.Variants); // SimplePositions Assert.Equal(new[] { "<*>", "G" }, position1.AltAlleles); Assert.Equal(new[] { "*", "C" }, position2.AltAlleles); Assert.Equal(new[] { "<M>", "A" }, position3.AltAlleles); Assert.Equal(new[] { "A", "<NON_REF>" }, position4.AltAlleles); // Variants Assert.Equal(new[] { "G" }, annotatedVariants1.Select(x => x.Variant.AltAllele).ToArray()); Assert.Equal(new[] { "C" }, annotatedVariants2.Select(x => x.Variant.AltAllele).ToArray()); Assert.Equal(new[] { "A" }, annotatedVariants3.Select(x => x.Variant.AltAllele).ToArray()); Assert.Equal(new[] { "A" }, annotatedVariants4.Select(x => x.Variant.AltAllele).ToArray()); }
public void ParseVcfLine_line_with_only_NonRef_is_not_refMinor() { const string vcfLine = "1 10005 . C <NON_REF> . LowGQX END=10034;BLOCKAVG_min30p3a GT:GQX:DP:DPF 0/0:3:1:0"; var chromosome = new Chromosome("chr1", "1", 0); var refMinorProvider = new Mock <IRefMinorProvider>(); refMinorProvider.Setup(x => x.IsReferenceMinor(chromosome, 10005)).Returns(false); var refNameToChromosome = new Dictionary <string, IChromosome> { ["1"] = chromosome }; var variantFactory = new VariantFactory(refNameToChromosome, refMinorProvider.Object, false); var position = VcfReaderUtils.ParseVcfLine(vcfLine, variantFactory, refNameToChromosome); var annotatedVariants = Annotator.GetAnnotatedVariants(position.Variants); Assert.Equal("C", position.RefAllele); Assert.Equal(new[] { "<NON_REF>" }, position.AltAlleles); Assert.Equal("C", position.Variants[0].RefAllele); Assert.Equal("<NON_REF>", position.Variants[0].AltAllele); // Variants Assert.Null(annotatedVariants); }