示例#1
0
        public void ParseVcfLine_NonInformativeAlleles_WithNormalAllele_NotFiltered()
        {
            const string vcfLine1 = "chr1	13133	.	T	<*>,G	36.00	PASS	SNVSB=0.0;SNVHPOL=4	GT:GQ:GQX:DP:DPF:AD	0/1:62:20:7:1:3,4";
            const string vcfLine2 = "chr1	13133	.	T	*,C	36.00	PASS	SNVSB=0.0;SNVHPOL=4	GT:GQ:GQX:DP:DPF:AD	0/1:62:20:7:1:3,4";
            const string vcfLine3 = "chr1	13133	.	T	<M>,A	36.00	PASS	SNVSB=0.0;SNVHPOL=4	GT:GQ:GQX:DP:DPF:AD	0/1:62:20:7:1:3,4";
            const string vcfLine4 = "chr1	13133	.	T	A,<NON_REF>	36.00	PASS	SNVSB=0.0;SNVHPOL=4	GT:GQ:GQX:DP:DPF:AD	0/1:62:20:7:1:3,4";

            var refMinorProvider    = new Mock <IRefMinorProvider>();
            var seqProvider         = ParserTestUtils.GetSequenceProvider(13133, "T", 'A', ChromosomeUtilities.RefNameToChromosome);
            var refNameToChromosome = seqProvider.RefNameToChromosome;

            var variantFactory = new VariantFactory(seqProvider);

            var position1 = AnnotationUtilities.ParseVcfLine(vcfLine1, refMinorProvider.Object, variantFactory, refNameToChromosome);
            var position2 = AnnotationUtilities.ParseVcfLine(vcfLine2, refMinorProvider.Object, variantFactory, refNameToChromosome);
            var position3 = AnnotationUtilities.ParseVcfLine(vcfLine3, refMinorProvider.Object, variantFactory, refNameToChromosome);
            var position4 = AnnotationUtilities.ParseVcfLine(vcfLine4, refMinorProvider.Object, variantFactory, refNameToChromosome);

            var annotatedVariants1 = Annotator.GetAnnotatedVariants(position1.Variants);
            var annotatedVariants2 = Annotator.GetAnnotatedVariants(position2.Variants);
            var annotatedVariants3 = Annotator.GetAnnotatedVariants(position3.Variants);
            var annotatedVariants4 = Annotator.GetAnnotatedVariants(position4.Variants);

            // SimplePositions
            Assert.Equal(new[] { "<*>", "G" }, position1.AltAlleles);
            Assert.Equal(new[] { "*", "C" }, position2.AltAlleles);
            Assert.Equal(new[] { "<M>", "A" }, position3.AltAlleles);
            Assert.Equal(new[] { "A", "<NON_REF>" }, position4.AltAlleles);

            // Variants
            Assert.Equal(new[] { "<*>", "G" }, annotatedVariants1.Select(x => x.Variant.AltAllele).ToArray());
            Assert.Equal(new[] { "*", "C" }, annotatedVariants2.Select(x => x.Variant.AltAllele).ToArray());
            Assert.Equal(new[] { "<M>", "A" }, annotatedVariants3.Select(x => x.Variant.AltAllele).ToArray());
            Assert.Equal(new[] { "A", "<NON_REF>" }, annotatedVariants4.Select(x => x.Variant.AltAllele).ToArray());
        }
示例#2
0
        public void ParseVcfLine_line_with_only_non_informative_alleles_position_unchanged_but_variants_ignored()
        {
            const string vcfLine1 = "chr1	13133	.	T	<*>	36.00	PASS	SNVSB=0.0;SNVHPOL=4	GT:GQ:GQX:DP:DPF:AD	0/1:62:20:7:1:3,4";
            const string vcfLine2 = "chr1	13133	.	T	*	36.00	PASS	SNVSB=0.0;SNVHPOL=4	GT:GQ:GQX:DP:DPF:AD	0/1:62:20:7:1:3,4";
            const string vcfLine3 = "chr1	13133	.	T	<M>	36.00	PASS	SNVSB=0.0;SNVHPOL=4	GT:GQ:GQX:DP:DPF:AD	0/1:62:20:7:1:3,4";

            var chromosome       = new Chromosome("chr1", "1", 0);
            var refMinorProvider = new Mock <IRefMinorProvider>();

            refMinorProvider.Setup(x => x.IsReferenceMinor(chromosome, 13133)).Returns(false);
            var refNameToChromosome = new Dictionary <string, IChromosome> {
                ["chr1"] = chromosome
            };
            var variantFactory = new VariantFactory(refNameToChromosome, refMinorProvider.Object, false);

            var position1 = VcfReaderUtils.ParseVcfLine(vcfLine1, variantFactory, refNameToChromosome);
            var position2 = VcfReaderUtils.ParseVcfLine(vcfLine2, variantFactory, refNameToChromosome);
            var position3 = VcfReaderUtils.ParseVcfLine(vcfLine3, variantFactory, refNameToChromosome);

            var annotatedVariants1 = Annotator.GetAnnotatedVariants(position1.Variants);
            var annotatedVariants2 = Annotator.GetAnnotatedVariants(position2.Variants);
            var annotatedVariants3 = Annotator.GetAnnotatedVariants(position3.Variants);

            // SimplePositions unchanged
            Assert.Equal("<*>", position1.AltAlleles[0]);
            Assert.Equal("*", position2.AltAlleles[0]);
            Assert.Equal("<M>", position3.AltAlleles[0]);

            // Variants are null
            Assert.Null(annotatedVariants1);
            Assert.Null(annotatedVariants2);
            Assert.Null(annotatedVariants3);
        }
示例#3
0
        public void ParseVcfLine_NonInformativeAlleles_Alone_NotFiltered()
        {
            const string vcfLine1 = "chr1	13133	.	T	<*>	36.00	PASS	SNVSB=0.0;SNVHPOL=4	GT:GQ:GQX:DP:DPF:AD	0/1:62:20:7:1:3,4";
            const string vcfLine2 = "chr1	13133	.	T	*	36.00	PASS	SNVSB=0.0;SNVHPOL=4	GT:GQ:GQX:DP:DPF:AD	0/1:62:20:7:1:3,4";
            const string vcfLine3 = "chr1	13133	.	T	<M>	36.00	PASS	SNVSB=0.0;SNVHPOL=4	GT:GQ:GQX:DP:DPF:AD	0/1:62:20:7:1:3,4";

            var refMinorProvider = new Mock <IRefMinorProvider>();
            var seqProvider      = ParserTestUtils.GetSequenceProvider(13133, "T", 'A', ChromosomeUtilities.RefNameToChromosome);
            var variantFactory   = new VariantFactory(seqProvider);

            var position1 = AnnotationUtilities.ParseVcfLine(vcfLine1, refMinorProvider.Object, variantFactory, seqProvider.RefNameToChromosome);
            var position2 = AnnotationUtilities.ParseVcfLine(vcfLine2, refMinorProvider.Object, variantFactory, seqProvider.RefNameToChromosome);
            var position3 = AnnotationUtilities.ParseVcfLine(vcfLine3, refMinorProvider.Object, variantFactory, seqProvider.RefNameToChromosome);

            var annotatedVariants1 = Annotator.GetAnnotatedVariants(position1.Variants);
            var annotatedVariants2 = Annotator.GetAnnotatedVariants(position2.Variants);
            var annotatedVariants3 = Annotator.GetAnnotatedVariants(position3.Variants);

            // SimplePositions unchanged
            Assert.Equal("<*>", position1.AltAlleles[0]);
            Assert.Equal("*", position2.AltAlleles[0]);
            Assert.Equal("<M>", position3.AltAlleles[0]);

            // Variants not filtered
            Assert.Equal("<*>", annotatedVariants1[0].Variant.AltAllele);
            Assert.Equal("*", annotatedVariants2[0].Variant.AltAllele);
            Assert.Equal("<M>", annotatedVariants3[0].Variant.AltAllele);
        }
示例#4
0
        public void ParseVcfLine_line_with_only_NonRef_is_refMinor()
        {
            const string vcfLine = "1	10628385	.	C	<NON_REF>	.	LowGQX;HighDPFRatio	END=10628385;BLOCKAVG_min30p3a	GT:GQX:DP:DPF	0/0:24:9:18";

            var chromosome       = new Chromosome("chr1", "1", 0);
            var refMinorProvider = new Mock <IRefMinorProvider>();

            refMinorProvider.Setup(x => x.IsReferenceMinor(chromosome, 10628385)).Returns(true);
            refMinorProvider.Setup(x => x.GetGlobalMajorAlleleForRefMinor(chromosome, 10628385)).Returns("T");
            var refNameToChromosome = new Dictionary <string, IChromosome> {
                ["1"] = chromosome
            };
            var variantFactory = new VariantFactory(refNameToChromosome, refMinorProvider.Object, false);

            var position          = VcfReaderUtils.ParseVcfLine(vcfLine, variantFactory, refNameToChromosome);
            var annotatedVariants = Annotator.GetAnnotatedVariants(position.Variants);

            Assert.Equal("C", position.RefAllele);
            Assert.Equal(new[] { "<NON_REF>" }, position.AltAlleles);
            Assert.Equal("T", position.Variants[0].RefAllele);
            Assert.Equal("C", position.Variants[0].AltAllele);

            // Variants
            Assert.Equal(new[] { "C" }, annotatedVariants.Select(x => x.Variant.AltAllele).ToArray());
        }
        public void GetJsonString_DifferentOriginalChromosomeName()
        {
            const string originalChromosomeName = "originalChr1";

            IVariant[]          variants          = GetVariants();
            ISample[]           samples           = GetSamples();
            IAnnotatedVariant[] annotatedVariants = Annotator.GetAnnotatedVariants(variants);

            var position          = GetPosition(originalChromosomeName, variants, samples);
            var annotatedPosition = new AnnotatedPosition(position, annotatedVariants);

            string observedResult = annotatedPosition.GetJsonString();

            Assert.NotNull(observedResult);
            Assert.Contains($"\"chromosome\":\"{originalChromosomeName}\"", observedResult);
        }
示例#6
0
        public void ParseVcfLine_line_with_only_NonRef_is_not_refMinor()
        {
            const string vcfLine = "1	10005	.	C	<NON_REF>	.	LowGQX	END=10034;BLOCKAVG_min30p3a	GT:GQX:DP:DPF	0/0:3:1:0";

            var refMinorProvider = new Mock <IRefMinorProvider>();
            var seqProvider      =
                ParserTestUtils.GetSequenceProvider(10005, "C", 'A', ChromosomeUtilities.RefNameToChromosome);
            var variantFactory = new VariantFactory(seqProvider.Sequence, _vidCreator);

            var position          = AnnotationUtilities.ParseVcfLine(vcfLine, refMinorProvider.Object, seqProvider, null, variantFactory);
            var annotatedVariants = Annotator.GetAnnotatedVariants(position.Variants);

            Assert.Equal("C", position.RefAllele);
            Assert.Equal(new[] { "<NON_REF>" }, position.AltAlleles);
            Assert.Null(position.Variants);
            Assert.Null(annotatedVariants);
        }
        public void GetJsonString_fisherStrand()
        {
            const string vcfLine = "21\t9411410\t.\tC\tT\t9.51\tDRAGENSnpHardQUAL\tAC=2;AF=1.000;AN=2;DP=2;FS=0.000;MQ=100.00;QD=9.51;SOR=1.609";

            var refMinorProvider = new Mock <IRefMinorProvider>();
            var seqProvider      = ParserTestUtils.GetSequenceProvider(9411410, "C", 'A', ChromosomeUtilities.RefNameToChromosome);
            var variantFactory   = new VariantFactory(seqProvider.Sequence, new VariantId());

            var position = AnnotationUtilities.ParseVcfLine(vcfLine, refMinorProvider.Object, seqProvider, null, variantFactory);

            IVariant[]          variants          = GetVariants();
            IAnnotatedVariant[] annotatedVariants = Annotator.GetAnnotatedVariants(variants);
            var annotatedPosition = new AnnotatedPosition(position, annotatedVariants);

            string observedResult = annotatedPosition.GetJsonString();

            Assert.NotNull(observedResult);
            Assert.Contains("\"fisherStrandBias\":0", observedResult);
        }
示例#8
0
        public void Test_crash_caused_by_variant_trimming()
        {
            const string vcfLine1 = "chr1	8021910	rs373653682	GGTGCTGGACGGTGTCCCT	G	.	.	.";

            var refMinorProvider = new Mock <IRefMinorProvider>();
            var seqProvider      = ParserTestUtils.GetSequenceProvider(8021910, "GGTGCTGGACGGTGTCCCT", 'A', ChromosomeUtilities.RefNameToChromosome);

            var variantFactory = new VariantFactory(seqProvider.Sequence, _vidCreator);

            var position1 = AnnotationUtilities.ParseVcfLine(vcfLine1, refMinorProvider.Object, seqProvider, null, variantFactory);

            var annotatedVariants1 = Annotator.GetAnnotatedVariants(position1.Variants);

            // SimplePositions
            Assert.Equal(new[] { "G" }, position1.AltAlleles);

            // Variants
            Assert.Equal(new[] { "" }, annotatedVariants1.Select(x => x.Variant.AltAllele).ToArray());
        }
示例#9
0
        public void GetJsonString_BreakEndEventId()
        {
            const string vcfLine = "1\t38432782\tMantaBND:2312:0:1:0:0:0:0\tG\tG]6:28863899]\t971\tPASS\tSVTYPE=BND;MATEID=MantaBND:2312:0:1:0:0:0:1;EVENT=MantaBND:2312:0:1:0:0:0:0;JUNCTION_QUAL=716;BND_DEPTH=52;MATE_BND_DEPTH=56";

            var refMinorProvider = new Mock <IRefMinorProvider>();
            var seqProvider      = ParserTestUtils.GetSequenceProvider(38432782, "G", 'C', ChromosomeUtilities.RefNameToChromosome);
            var variantFactory   = new VariantFactory(seqProvider.Sequence, new VariantId());

            var position = AnnotationUtilities.ParseVcfLine(vcfLine, refMinorProvider.Object, seqProvider, null, variantFactory);

            IVariant[]          variants          = GetVariants();
            IAnnotatedVariant[] annotatedVariants = Annotator.GetAnnotatedVariants(variants);
            var annotatedPosition = new AnnotatedPosition(position, annotatedVariants);

            string observedResult = annotatedPosition.GetJsonString();

            Assert.NotNull(observedResult);
            Assert.Contains("\"breakendEventId\":\"MantaBND:2312:0:1:0:0:0:0\"", observedResult);
        }
示例#10
0
        public void GetJsonString_StrelkaSomatic()
        {
            const string vcfLine = "chr1	13813	.	T	G	.	LowQscore	SOMATIC;QSS=33;TQSS=1;NT=ref;QSS_NT=16;TQSS_NT=1;SGT=TT->GT;DP=266;MQ=23.89;MQ0=59;ALTPOS=69;ALTMAP=37;ReadPosRankSum=1.22;SNVSB=5.92;PNOISE=0.00;PNOISE2=0.00;VQSR=1.93";

            var refMinorProvider = new Mock <IRefMinorProvider>();
            var seqProvider      = ParserTestUtils.GetSequenceProvider(13813, "T", 'C', ChromosomeUtilities.RefNameToChromosome);
            var variantFactory   = new VariantFactory(seqProvider.Sequence, new VariantId());

            var position = AnnotationUtilities.ParseVcfLine(vcfLine, refMinorProvider.Object, seqProvider, variantFactory);

            IVariant[]          variants          = GetVariants();
            IAnnotatedVariant[] annotatedVariants = Annotator.GetAnnotatedVariants(variants);
            var annotatedPosition = new AnnotatedPosition(position, annotatedVariants);

            string observedResult = annotatedPosition.GetJsonString();

            Assert.NotNull(observedResult);
            Assert.Contains("\"jointSomaticNormalQuality\":16", observedResult);
            Assert.Contains("\"recalibratedQuality\":1.93", observedResult);
        }
示例#11
0
        public void ParseVcfLine_line_with_only_NonRef_is_refMinor()
        {
            const string vcfLine = "1	10628385	.	C	<NON_REF>	.	LowGQX;HighDPFRatio	END=10628385;BLOCKAVG_min30p3a	GT:GQX:DP:DPF	0/0:24:9:18";

            var refMinorProvider = new Mock <IRefMinorProvider>();

            refMinorProvider.Setup(x => x.GetGlobalMajorAllele(ChromosomeUtilities.Chr1, 10628385)).Returns("T");
            var seqProvider =
                ParserTestUtils.GetSequenceProvider(10628385, "C", 'A', ChromosomeUtilities.RefNameToChromosome);
            var variantFactory = new VariantFactory(seqProvider.Sequence, _vidCreator);

            var position          = AnnotationUtilities.ParseVcfLine(vcfLine, refMinorProvider.Object, seqProvider, null, variantFactory);
            var annotatedVariants = Annotator.GetAnnotatedVariants(position.Variants);

            Assert.Equal("C", position.RefAllele);
            Assert.Equal(new[] { "<NON_REF>" }, position.AltAlleles);
            Assert.Equal("T", position.Variants[0].RefAllele);
            Assert.Equal("C", position.Variants[0].AltAllele);

            // Variants
            Assert.Equal(new[] { "C" }, annotatedVariants.Select(x => x.Variant.AltAllele).ToArray());
        }
示例#12
0
        public void Test_crash_caused_by_variant_trimming()
        {
            const string vcfLine1 = "chr1	8021910	rs373653682	GGTGCTGGACGGTGTCCCT	G	.	.	.";

            var chromosome       = new Chromosome("chr1", "1", 0);
            var refMinorProvider = new Mock <IRefMinorProvider>();

            refMinorProvider.Setup(x => x.IsReferenceMinor(chromosome, 8021910)).Returns(false);
            var refNameToChromosome = new Dictionary <string, IChromosome> {
                ["chr1"] = chromosome
            };
            var variantFactory = new VariantFactory(refNameToChromosome, refMinorProvider.Object, false);
            var position1      = VcfReaderUtils.ParseVcfLine(vcfLine1, variantFactory, refNameToChromosome);

            var annotatedVariants1 = Annotator.GetAnnotatedVariants(position1.Variants);

            // SimplePositions
            Assert.Equal(new[] { "G" }, position1.AltAlleles);

            // Variants
            Assert.Equal(new[] { "" }, annotatedVariants1.Select(x => x.Variant.AltAllele).ToArray());
        }
示例#13
0
        public void ParseVcfLine_non_informative_alleles_or_NonRef_filtered_only_in_variants()
        {
            const string vcfLine1 = "chr1	13133	.	T	<*>,G	36.00	PASS	SNVSB=0.0;SNVHPOL=4	GT:GQ:GQX:DP:DPF:AD	0/1:62:20:7:1:3,4";
            const string vcfLine2 = "chr1	13133	.	T	*,C	36.00	PASS	SNVSB=0.0;SNVHPOL=4	GT:GQ:GQX:DP:DPF:AD	0/1:62:20:7:1:3,4";
            const string vcfLine3 = "chr1	13133	.	T	<M>,A	36.00	PASS	SNVSB=0.0;SNVHPOL=4	GT:GQ:GQX:DP:DPF:AD	0/1:62:20:7:1:3,4";
            const string vcfLine4 = "chr1	13133	.	T	A,<NON_REF>	36.00	PASS	SNVSB=0.0;SNVHPOL=4	GT:GQ:GQX:DP:DPF:AD	0/1:62:20:7:1:3,4";

            var chromosome       = new Chromosome("chr1", "1", 0);
            var refMinorProvider = new Mock <IRefMinorProvider>();

            refMinorProvider.Setup(x => x.IsReferenceMinor(chromosome, 13133)).Returns(false);
            var refNameToChromosome = new Dictionary <string, IChromosome> {
                ["chr1"] = chromosome
            };
            var variantFactory = new VariantFactory(refNameToChromosome, refMinorProvider.Object, false);

            var position1 = VcfReaderUtils.ParseVcfLine(vcfLine1, variantFactory, refNameToChromosome);
            var position2 = VcfReaderUtils.ParseVcfLine(vcfLine2, variantFactory, refNameToChromosome);
            var position3 = VcfReaderUtils.ParseVcfLine(vcfLine3, variantFactory, refNameToChromosome);
            var position4 = VcfReaderUtils.ParseVcfLine(vcfLine4, variantFactory, refNameToChromosome);

            var annotatedVariants1 = Annotator.GetAnnotatedVariants(position1.Variants);
            var annotatedVariants2 = Annotator.GetAnnotatedVariants(position2.Variants);
            var annotatedVariants3 = Annotator.GetAnnotatedVariants(position3.Variants);
            var annotatedVariants4 = Annotator.GetAnnotatedVariants(position4.Variants);

            // SimplePositions
            Assert.Equal(new[] { "<*>", "G" }, position1.AltAlleles);
            Assert.Equal(new[] { "*", "C" }, position2.AltAlleles);
            Assert.Equal(new[] { "<M>", "A" }, position3.AltAlleles);
            Assert.Equal(new[] { "A", "<NON_REF>" }, position4.AltAlleles);

            // Variants
            Assert.Equal(new[] { "G" }, annotatedVariants1.Select(x => x.Variant.AltAllele).ToArray());
            Assert.Equal(new[] { "C" }, annotatedVariants2.Select(x => x.Variant.AltAllele).ToArray());
            Assert.Equal(new[] { "A" }, annotatedVariants3.Select(x => x.Variant.AltAllele).ToArray());
            Assert.Equal(new[] { "A" }, annotatedVariants4.Select(x => x.Variant.AltAllele).ToArray());
        }
示例#14
0
        public void ParseVcfLine_line_with_only_NonRef_is_not_refMinor()
        {
            const string vcfLine          = "1	10005	.	C	<NON_REF>	.	LowGQX	END=10034;BLOCKAVG_min30p3a	GT:GQX:DP:DPF	0/0:3:1:0";
            var          chromosome       = new Chromosome("chr1", "1", 0);
            var          refMinorProvider = new Mock <IRefMinorProvider>();

            refMinorProvider.Setup(x => x.IsReferenceMinor(chromosome, 10005)).Returns(false);
            var refNameToChromosome = new Dictionary <string, IChromosome> {
                ["1"] = chromosome
            };
            var variantFactory = new VariantFactory(refNameToChromosome, refMinorProvider.Object, false);

            var position          = VcfReaderUtils.ParseVcfLine(vcfLine, variantFactory, refNameToChromosome);
            var annotatedVariants = Annotator.GetAnnotatedVariants(position.Variants);

            Assert.Equal("C", position.RefAllele);
            Assert.Equal(new[] { "<NON_REF>" }, position.AltAlleles);
            Assert.Equal("C", position.Variants[0].RefAllele);
            Assert.Equal("<NON_REF>", position.Variants[0].AltAllele);

            // Variants
            Assert.Null(annotatedVariants);
        }